作者,Evil Genius
最近确实头疼,晚上睡不着,白天睡不醒。
还有一些之前的同事失业了,偶尔交流一下
日子反正一直在过,经历一下低谷未必是坏事。
这一篇我们来分享一下关于空间的一些分析。
我们来看看聚类
如果我们聚类的目的是划分区域,那么空间至少有6种聚类方式。
- 形态学划分
- 分子聚类
- 细胞聚类
- CNV聚类
- QuPath划分
- SME聚类,即考虑空间邻域的聚类方式。
我们这一篇就分享一下CNV聚类的相关内容,拿到如下的结果
废话就不多说了,直接来示例代码
大家直接先要分析inferCNV,这个分享了很多次了,不再重复了。
from IPython.display import display, HTML, Image
display(HTML("<style>.container { width:95% !important; }</style>"))
%load_ext autoreload
%autoreload 2
import warnings
warnings.filterwarnings("ignore")
import os
import copy
import numpy as np
import pandas as pd
import scanpy as sc
# import scFates as scf
import scanpy.external as sce
from anndata import AnnData
# import palantir
import matplotlib
from matplotlib import cm
import matplotlib.pyplot as plt
import matplotlib.patheffects as path_effects
## fix palantir breaking down some plots
import seaborn
seaborn.reset_orig()
%matplotlib inline
sc.set_figure_params()
# scf.set_figure_pubready()
import sys
sys.path.append("../../lib")
from stpalette import palette1
from plots import plotSpatialAll
from utils import loadCNVfromInferCNV
from utils import loadAdImage
sc.settings.verbosity = 3
sc.settings.logfile = sys.stdout
model = 'WM4007'
dataPath = '../../data/'
preprocessedCasperCNVDataPath = 'c:/Projects/A_ST/output_casper_%s/' % model
preprocessedInferCNVDataPath = 'c:/Projects/A_ST/inferCNV_results_%s/' % model
ids = sorted(np.loadtxt(dataPath + 'ids_%s_ST.txt' % model, dtype=str))
sids = [id[7:12] for id in ids]
palette1.update({sid: cm.terrain(0.01 + i/len(sids)) for i, sid in enumerate(sids)})
df_infercnv_cnv, df_infercnv_meta = loadCNVfromInferCNV(dataPath + 'For_inferCNV_%s_meta.data.tsv.gz' % model,
[preprocessedInferCNVDataPath + 'infercnv.references.txt',
preprocessedInferCNVDataPath + 'infercnv.observations.txt'])
print(model)
se = pd.Series(index=pd.MultiIndex.from_frame(df_infercnv_meta).droplevel(0), data=(df_infercnv_cnv!=0).mean(axis=0).values)
se.groupby(level=[0,1]).mean().unstack().fillna(0).style.background_gradient(axis=None).applymap(lambda x: 'background-color: transparent; color: transparent' if x==0 else '')
print(model)
se.groupby(level=[0]).mean().to_frame().T.style.background_gradient(axis=None)
se.groupby(level=[0]).mean().to_frame().style.background_gradient(axis=None)
ad_all = sc.read(dataPath + 'ad_all_human_clustered_st_%s.h5ad' % model)
print(ad_all.shape)
ad_all.obs['CNV burden per clone'] = se.groupby(level=[0]).mean().reindex(ad_all.obs['cluster']).values
ad_all.obs['CNV burden'] = (df_infercnv_cnv!=0).astype(int).mean(axis=0).reindex(ad_all.obs.index).values
preprocessedStDataPath = 'c:/Projects/A_ST/from HPCC 11 28 2022/results_NF1-nod-t2t-k35/%s/' % model
ads = {id: ad_all[ad_all.obs['sample']==id, :] for id in ids}
images = {id: sc.read(preprocessedStDataPath + '%s/st_adata_processed.h5ad' % id).uns['spatial'] for id in ids}
for sample in ids[:]:
ads[sample] = ad_all[ad_all.obs['sample']==sample, :].copy()
ads[sample].uns['spatial'] = images[sample]
plotSpatialAll(ads, ids=[id for id in ids if '_S2_' in id], identity='CNV burden', palette=palette1, f=0.75, fy=1.025, nx=6, panelSize=4, wspace = 0.25, hspace = 0.2)
plotSpatialAll(ads, ids=[id for id in ids if '_S2_' in id], identity='CNV burden per clone', palette=palette1, f=0.75, fy=1.025, nx=6, panelSize=4, wspace = 0.25, hspace = 0.2)
ad_cnv = sc.read(dataPath + 'ad_all_human_clustered_cnv_%s.h5ad' % model)
ad_cnv.obs['CNV burden'] = (ad_cnv.to_df().T!=0).astype(int).mean(axis=0).values
ad_cnv.obs['CNV burden amplifications'] = (ad_cnv.to_df().T>0).astype(int).mean(axis=0).values
ad_cnv.obs['CNV burden deletions'] = (ad_cnv.to_df().T<0).astype(int).mean(axis=0).values
ad_cnv.obs[['original_barcode', 'id', 'CNV burden', 'CNV burden amplifications', 'CNV burden deletions']].to_csv(dataPath + 'CNV_burden_%s.csv' % model)
ad_cnv.obs['CNV burden'].hist(bins=100, alpha=0.5)
(ad_cnv.obs['CNV burden amplifications']*2).hist(bins=100, alpha=0.5)
(ad_cnv.obs['CNV burden deletions']*2).hist(bins=100, alpha=0.5)
palette1.update({id: cm.terrain(0.01 + i/len(ids)) for i, id in enumerate(ids)})
ad_all = sc.read(dataPath + 'ad_all_human_clustered_st_%s.h5ad' % model)
df_infercnv_cnv, df_infercnv_meta = loadCNVfromInferCNV(dataPath + 'For_inferCNV_%s_meta.data.tsv.gz' % model,
[preprocessedInferCNVDataPath + 'infercnv.references.txt',
preprocessedInferCNVDataPath + 'infercnv.observations.txt'])
ad_cnv = sc.AnnData(df_infercnv_cnv.T)
ad_cnv.obs = df_infercnv_meta.loc[ad_cnv.obs.index]
ad_cnv.uns['cluster_colors'] = ad_all.uns['%s_colors' % rna_cluster_identity]
ad_cnv.uns['sample_colors'] = ad_all.uns['sample_colors']
ad_cnv.uns['time_colors'] = ad_all.uns['T_colors'] #[2:]
ad_cnv.obs['original_barcode'] = ad_all.obs['original_barcode'].loc[ad_cnv.obs.index]
ad_cnv.obs['id'] = ad_cnv.obs['sample'].replace(dict(zip(sids, ids)))
sc.pp.pca(ad_cnv, n_comps=100, zero_center=True, use_highly_variable=False)
pca_projections = pd.DataFrame(ad_cnv.obsm['X_pca'], index=ad_cnv.obs_names)
sc.pp.neighbors(ad_cnv, n_neighbors=100, use_rep='X_pca', metric='correlation')
sc.tl.umap(ad_cnv)
plt.rcParams["figure.figsize"] = (7, 7)
sc.pl.embedding(ad_cnv, color=['time', 'cluster'], basis='X_umap', title='time', cmap='nipy_spectral', size=50)
# generate neighbor draph in multiscale diffusion space
dm_res = palantir.utils.run_diffusion_maps(pca_projections)
ms_data = palantir.utils.determine_multiscale_space(dm_res, n_eigs=10)
ad_cnv.obsm['X_palantir']=ms_data.values
sc.pp.neighbors(ad_cnv, n_neighbors=100, use_rep='X_palantir', metric='correlation')
sc.tl.umap(ad_cnv)
plt.rcParams["figure.figsize"] = (4, 4)
sc.pl.embedding(ad_cnv, color=['time', 'cluster'], basis="X_umap", title='time', cmap='nipy_spectral', size=25)
res = 0.25
plt.rcParams["figure.figsize"] = (4, 4)
sc.tl.leiden(ad_cnv, key_added='cnv_clusters', resolution=res)
sc.pl.embedding(ad_cnv, color=['cnv_clusters'], basis="X_umap", title='time', palette=palette1)
Load images and create ADs
images = {id: [(ad_temp := sc.read(preprocessedStDataPath + '%s/st_adata_processed.h5ad' % id)).uns['spatial'], ad_temp.obs.index, ad_temp.obsm['spatial']] for id in ids}
ads = dict()
for sample in ids:
ads[sample] = ad_cnv[ad_cnv.obs['id']==sample, :].copy()
ads[sample].uns['spatial'] = images[sample][0]
ads[sample].obsm['spatial'] = pd.DataFrame(index=images[sample][1], data=images[sample][2]).reindex(ads[sample].obs['original_barcode']).values
plotSpatialAll(ads, identity='cnv_clusters', palette=palette1, f=0.75)
生活很好,有你更好
网友评论