美文网首页
空间转录组数据分析之CNV burden和CNV聚类(pytho

空间转录组数据分析之CNV burden和CNV聚类(pytho

作者: 单细胞空间交响乐 | 来源:发表于2024-05-14 17:08 被阅读0次

    作者,Evil Genius

    最近确实头疼,晚上睡不着,白天睡不醒。

    还有一些之前的同事失业了,偶尔交流一下

    日子反正一直在过,经历一下低谷未必是坏事。

    这一篇我们来分享一下关于空间的一些分析。

    我们来看看聚类

    如果我们聚类的目的是划分区域,那么空间至少有6种聚类方式。

    • 形态学划分
    • 分子聚类
    • 细胞聚类
    • CNV聚类
    • QuPath划分
    • SME聚类,即考虑空间邻域的聚类方式。
    我们这一篇就分享一下CNV聚类的相关内容,拿到如下的结果
    废话就不多说了,直接来示例代码
    大家直接先要分析inferCNV,这个分享了很多次了,不再重复了。
    
    
    from IPython.display import display, HTML, Image
    display(HTML("<style>.container { width:95% !important; }</style>"))
    
    %load_ext autoreload
    %autoreload 2
    
    import warnings
    warnings.filterwarnings("ignore")
    
    import os
    import copy
    
    import numpy as np
    import pandas as pd
    import scanpy as sc
    # import scFates as scf
    import scanpy.external as sce
    from anndata import AnnData
    # import palantir
    
    import matplotlib
    from matplotlib import cm
    import matplotlib.pyplot as plt
    import matplotlib.patheffects as path_effects
    
    ## fix palantir breaking down some plots
    import seaborn
    seaborn.reset_orig()
    %matplotlib inline
    sc.set_figure_params()
    # scf.set_figure_pubready()
    
    import sys
    sys.path.append("../../lib")
    from stpalette import palette1
    from plots import plotSpatialAll
    from utils import loadCNVfromInferCNV
    from utils import loadAdImage
    
    sc.settings.verbosity = 3
    sc.settings.logfile = sys.stdout
    
    model = 'WM4007'
    
    dataPath = '../../data/'
    preprocessedCasperCNVDataPath = 'c:/Projects/A_ST/output_casper_%s/' % model
    preprocessedInferCNVDataPath = 'c:/Projects/A_ST/inferCNV_results_%s/' % model
    
    ids = sorted(np.loadtxt(dataPath + 'ids_%s_ST.txt' % model, dtype=str))
    sids = [id[7:12] for id in ids]
    
    palette1.update({sid: cm.terrain(0.01 + i/len(sids)) for i, sid in enumerate(sids)})
    
    df_infercnv_cnv, df_infercnv_meta = loadCNVfromInferCNV(dataPath + 'For_inferCNV_%s_meta.data.tsv.gz' % model, 
                                                            [preprocessedInferCNVDataPath + 'infercnv.references.txt',
                                                             preprocessedInferCNVDataPath + 'infercnv.observations.txt'])
    
    print(model)
    se = pd.Series(index=pd.MultiIndex.from_frame(df_infercnv_meta).droplevel(0), data=(df_infercnv_cnv!=0).mean(axis=0).values)
    se.groupby(level=[0,1]).mean().unstack().fillna(0).style.background_gradient(axis=None).applymap(lambda x: 'background-color: transparent; color: transparent' if x==0 else '')
    
    print(model)
    se.groupby(level=[0]).mean().to_frame().T.style.background_gradient(axis=None)
    
    se.groupby(level=[0]).mean().to_frame().style.background_gradient(axis=None)
    
    ad_all = sc.read(dataPath + 'ad_all_human_clustered_st_%s.h5ad' % model)
    print(ad_all.shape)
    
    ad_all.obs['CNV burden per clone'] = se.groupby(level=[0]).mean().reindex(ad_all.obs['cluster']).values
    
    ad_all.obs['CNV burden'] = (df_infercnv_cnv!=0).astype(int).mean(axis=0).reindex(ad_all.obs.index).values
    
    preprocessedStDataPath = 'c:/Projects/A_ST/from HPCC 11 28 2022/results_NF1-nod-t2t-k35/%s/' % model
    ads = {id: ad_all[ad_all.obs['sample']==id, :] for id in ids}
    images = {id: sc.read(preprocessedStDataPath + '%s/st_adata_processed.h5ad' % id).uns['spatial'] for id in ids}
    for sample in ids[:]:
        ads[sample] = ad_all[ad_all.obs['sample']==sample, :].copy()    
        ads[sample].uns['spatial'] = images[sample]
    
    plotSpatialAll(ads, ids=[id for id in ids if '_S2_' in id], identity='CNV burden', palette=palette1, f=0.75, fy=1.025, nx=6, panelSize=4, wspace = 0.25, hspace = 0.2)
    plotSpatialAll(ads, ids=[id for id in ids if '_S2_' in id], identity='CNV burden per clone', palette=palette1, f=0.75, fy=1.025, nx=6, panelSize=4, wspace = 0.25, hspace = 0.2)
    
    ad_cnv = sc.read(dataPath + 'ad_all_human_clustered_cnv_%s.h5ad' % model)
    ad_cnv.obs['CNV burden'] = (ad_cnv.to_df().T!=0).astype(int).mean(axis=0).values
    ad_cnv.obs['CNV burden amplifications'] = (ad_cnv.to_df().T>0).astype(int).mean(axis=0).values
    ad_cnv.obs['CNV burden deletions'] = (ad_cnv.to_df().T<0).astype(int).mean(axis=0).values
    ad_cnv.obs[['original_barcode', 'id', 'CNV burden', 'CNV burden amplifications', 'CNV burden deletions']].to_csv(dataPath + 'CNV_burden_%s.csv' % model)
    ad_cnv.obs['CNV burden'].hist(bins=100, alpha=0.5)
    (ad_cnv.obs['CNV burden amplifications']*2).hist(bins=100, alpha=0.5)
    (ad_cnv.obs['CNV burden deletions']*2).hist(bins=100, alpha=0.5)
    
    
    palette1.update({id: cm.terrain(0.01 + i/len(ids)) for i, id in enumerate(ids)})
    
    ad_all = sc.read(dataPath + 'ad_all_human_clustered_st_%s.h5ad' % model)
    
    
    df_infercnv_cnv, df_infercnv_meta = loadCNVfromInferCNV(dataPath + 'For_inferCNV_%s_meta.data.tsv.gz' % model, 
                                                            [preprocessedInferCNVDataPath + 'infercnv.references.txt',
                                                             preprocessedInferCNVDataPath + 'infercnv.observations.txt'])
    
    
    ad_cnv = sc.AnnData(df_infercnv_cnv.T)
    ad_cnv.obs = df_infercnv_meta.loc[ad_cnv.obs.index]
    
    ad_cnv.uns['cluster_colors'] = ad_all.uns['%s_colors' % rna_cluster_identity]
    ad_cnv.uns['sample_colors'] = ad_all.uns['sample_colors']
    ad_cnv.uns['time_colors'] = ad_all.uns['T_colors'] #[2:]
    ad_cnv.obs['original_barcode'] = ad_all.obs['original_barcode'].loc[ad_cnv.obs.index]
    ad_cnv.obs['id'] = ad_cnv.obs['sample'].replace(dict(zip(sids, ids)))
    
    sc.pp.pca(ad_cnv, n_comps=100, zero_center=True, use_highly_variable=False)
    pca_projections = pd.DataFrame(ad_cnv.obsm['X_pca'], index=ad_cnv.obs_names)
    
    sc.pp.neighbors(ad_cnv, n_neighbors=100, use_rep='X_pca', metric='correlation')
    sc.tl.umap(ad_cnv)
    plt.rcParams["figure.figsize"] = (7, 7)
    sc.pl.embedding(ad_cnv, color=['time', 'cluster'], basis='X_umap', title='time', cmap='nipy_spectral', size=50)
    
    # generate neighbor draph in multiscale diffusion space
    dm_res = palantir.utils.run_diffusion_maps(pca_projections)
    ms_data = palantir.utils.determine_multiscale_space(dm_res, n_eigs=10)
    ad_cnv.obsm['X_palantir']=ms_data.values
    sc.pp.neighbors(ad_cnv, n_neighbors=100, use_rep='X_palantir', metric='correlation')
    
    sc.tl.umap(ad_cnv)
    plt.rcParams["figure.figsize"] = (4, 4)
    sc.pl.embedding(ad_cnv, color=['time', 'cluster'], basis="X_umap", title='time', cmap='nipy_spectral', size=25)
    
    res = 0.25
    plt.rcParams["figure.figsize"] = (4, 4)
    sc.tl.leiden(ad_cnv, key_added='cnv_clusters', resolution=res)
    sc.pl.embedding(ad_cnv, color=['cnv_clusters'], basis="X_umap", title='time', palette=palette1)
    
    Load images and create ADs
    images = {id: [(ad_temp := sc.read(preprocessedStDataPath + '%s/st_adata_processed.h5ad' % id)).uns['spatial'], ad_temp.obs.index, ad_temp.obsm['spatial']] for id in ids}
    ads = dict()
    for sample in ids:
        ads[sample] = ad_cnv[ad_cnv.obs['id']==sample, :].copy()    
        ads[sample].uns['spatial'] = images[sample][0]
        ads[sample].obsm['spatial'] = pd.DataFrame(index=images[sample][1], data=images[sample][2]).reindex(ads[sample].obs['original_barcode']).values
    
    plotSpatialAll(ads, identity='cnv_clusters', palette=palette1, f=0.75)
    

    生活很好,有你更好

    相关文章

      网友评论

          本文标题:空间转录组数据分析之CNV burden和CNV聚类(pytho

          本文链接:https://www.haomeiwen.com/subject/sxbmfjtx.html