This Notebook performs Enrichment Analyses of all the Phenotypic Classes using the WormBase Enrichment Suite.

import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib import rc
import tissue_enrichment_analysis as tea

rc('text', usetex=True)
rc('text.latex', preamble=r'\usepackage{cmbright}')
rc('font', **{'family': 'sans-serif', 'sans-serif': ['Helvetica']})

%matplotlib inline

# This enables SVG graphics inline. 
%config InlineBackend.figure_formats = {'png', 'retina'}

# JB's favorite Seaborn settings for notebooks
rc = {'lines.linewidth': 2, 
      'axes.labelsize': 18, 
      'axes.titlesize': 18, 
      'axes.facecolor': 'DFDFE5'}
sns.set_context('notebook', rc=rc)
sns.set_style("dark")

mpl.rcParams['xtick.labelsize'] = 16 
mpl.rcParams['ytick.labelsize'] = 16 
mpl.rcParams['legend.fontsize'] = 14

q = 0.1
tidy  = pd.read_csv('../output/SI1_dpy_22_results.csv')

tissue = tea.fetch_dictionary('tissue')
phenotype = tea.fetch_dictionary('phenotype')
go = tea.fetch_dictionary('go')
dicts = {'tissue': tissue, 'phenotype': phenotype, 'go': go}

# filter dictionaries and keep only transcripts that were detected
# at any level
for key, d in dicts.items():
    d = d[d.wbid.isin(tidy.ens_gene.unique())]
    dicts[key] = d

# perform all enrichment analysis and store them in a hash
analysis = {}
for phenoclass, group in tidy.groupby('phenotypic class'):
    frames = {}
    for k, d in dicts.items():
        df = tea.enrichment_analysis(group.ens_gene.unique(), d, show=False)
        frames[k] = df
    analysis[phenoclass] = frames

# pretty print the results:
for phenoclass, f in analysis.items():
    for k, d in f.items():
        # print only sig results (q < 10^-3)
        d['logQ'] = -d['Q value'].apply(np.log10)
        sig = (d['Q value'] < 10**-3)
        if d[sig].shape[0] == 0:
            continue

        # trim names for easier printing
        if k.lower() == 'tissue':
            d['minTerm'] = d.Term.str[:-13]
        if k.lower() == 'phenotype':
            d['minTerm'] = d.Term.str[:-20]
        if k.lower() == 'go':
            d['minTerm'] = d.Term.str[:-10]

        # subset dataframe to sig terms and make sure
        # there's >2 observations per term
        tmp = d[sig & (d.Observed > 2)]

        if tmp.shape[0] == 0:
            continue

        print(phenoclass, k)        
        print(tmp[['minTerm', 'logQ', 'Observed']].round(0))
        print('\n\n')

bx93 associated tissue
      minTerm  logQ  Observed
77  intestine   5.0       134


bx93 associated go
                            minTerm  logQ  Observed
59           immune system process    6.0        17
58  organic acid metabolic process    4.0        15
3      response to biotic stimulus    4.0        10


sy622 associated tissue
                 minTerm  logQ  Observed
33  cephalic sheath cell   5.0        31


sy622 associated go
                                 minTerm  logQ  Observed
71       organic acid metabolic process    6.0        37
72                immune system process    6.0        33
70  protein heterodimerization activity    4.0        12


sy622 specific tissue
               minTerm  logQ  Observed
246          intestine   8.0       649
64     muscular system   4.0       465
186  epithelial system   3.0       406


sy622 specific phenotype
                  minTerm  logQ  Observed
27  avoids bacterial lawn   3.0        62


sy622 specific go
                                     minTerm  logQ  Observed
105                    extracellular region   15.0        90
76        structural constituent of cuticle   14.0        44
110                         collagen trimer   14.0        42
2    nucleoside phosphate metabolic process    9.0        51
73           organic acid metabolic process    8.0        61
93      purine nucleotide metabolic process    7.0        38
69                            mitochondrion    7.0        79
88                      extracellular space    7.0        40
44       ribose phosphate metabolic process    7.0        38
97                 organelle inner membrane    5.0        34
21                                cytoplasm    4.0       322
117                       contractile fiber    4.0        27
60                       peptidase activity    3.0        54


transhet specific tissue
                 minTerm  logQ  Observed
22                  male  64.0       432
185  reproductive system  22.0       925
104               oocyte   3.0        28
8     amphid sheath cell   3.0        71


transhet specific go
                                               minTerm  logQ  Observed
17                           regulation of cell shape    7.0        32
113                      nucleoside phosphate binding    6.0       187
83                          purine nucleotide binding    6.0       169
95                             ribonucleotide binding    6.0       170
19                       peptidyl-serine modification    5.0        35
37                                  dephosphorylation    4.0        49
18   embryo development ending in birth or egg hatc...   4.0        54
86                       protein modification process    4.0       172
111                      phosphorus metabolic process    4.0       163
45                                    ATPase activity    4.0        48
30       hydrolase activity acting on acid anhydrides    3.0        87