This Notebook performs Enrichment Analyses of all the Phenotypic Classes using the WormBase Enrichment Suite.
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib import rc
import tissue_enrichment_analysis as tea
rc('text', usetex=True)
rc('text.latex', preamble=r'\usepackage{cmbright}')
rc('font', **{'family': 'sans-serif', 'sans-serif': ['Helvetica']})
%matplotlib inline
# This enables SVG graphics inline.
%config InlineBackend.figure_formats = {'png', 'retina'}
# JB's favorite Seaborn settings for notebooks
rc = {'lines.linewidth': 2,
'axes.labelsize': 18,
'axes.titlesize': 18,
'axes.facecolor': 'DFDFE5'}
sns.set_context('notebook', rc=rc)
sns.set_style("dark")
mpl.rcParams['xtick.labelsize'] = 16
mpl.rcParams['ytick.labelsize'] = 16
mpl.rcParams['legend.fontsize'] = 14
q = 0.1
tidy = pd.read_csv('../output/SI1_dpy_22_results.csv')
tissue = tea.fetch_dictionary('tissue')
phenotype = tea.fetch_dictionary('phenotype')
go = tea.fetch_dictionary('go')
dicts = {'tissue': tissue, 'phenotype': phenotype, 'go': go}
# filter dictionaries and keep only transcripts that were detected
# at any level
for key, d in dicts.items():
d = d[d.wbid.isin(tidy.ens_gene.unique())]
dicts[key] = d
# perform all enrichment analysis and store them in a hash
analysis = {}
for phenoclass, group in tidy.groupby('phenotypic class'):
frames = {}
for k, d in dicts.items():
df = tea.enrichment_analysis(group.ens_gene.unique(), d, show=False)
frames[k] = df
analysis[phenoclass] = frames
# pretty print the results:
for phenoclass, f in analysis.items():
for k, d in f.items():
# print only sig results (q < 10^-3)
d['logQ'] = -d['Q value'].apply(np.log10)
sig = (d['Q value'] < 10**-3)
if d[sig].shape[0] == 0:
continue
# trim names for easier printing
if k.lower() == 'tissue':
d['minTerm'] = d.Term.str[:-13]
if k.lower() == 'phenotype':
d['minTerm'] = d.Term.str[:-20]
if k.lower() == 'go':
d['minTerm'] = d.Term.str[:-10]
# subset dataframe to sig terms and make sure
# there's >2 observations per term
tmp = d[sig & (d.Observed > 2)]
if tmp.shape[0] == 0:
continue
print(phenoclass, k)
print(tmp[['minTerm', 'logQ', 'Observed']].round(0))
print('\n\n')