美文网首页
单细胞空间数据分析之velocity

单细胞空间数据分析之velocity

作者: 单细胞空间交响乐 | 来源:发表于2024-05-16 11:15 被阅读0次

    作者,Evil Genius

    今天我们要完成另外一项工程,单细胞和空间的velocity,拿到如下的结果

    单细胞 空间

    在生物的世界里,velocyto衡量恶性细胞的进化方向非常常见。

    这也是轨迹分析和CNV分析相结合的分析点。

    调试了半天,大晚上的,夜景远没有上海漂亮~~~

    现在真的是两极分化,要么从源头上写代码分析数据,要么就用别人现成的内容,可调整的空间大大缩小。

    from IPython.display import display, HTML, Image
    display(HTML("<style>.container { width:95% !important; }</style>"))
    
    %load_ext autoreload
    %autoreload 2
    
    import warnings
    warnings.simplefilter(action='ignore', category=FutureWarning)
    import matplotlib.cbook
    warnings.simplefilter(action='ignore', category=matplotlib.cbook.MatplotlibDeprecationWarning)
    
    import os
    import numpy as np
    import pandas as pd
    import scanpy as sc
    from scipy.sparse import csr_matrix
    import scvelo as scv
    import matplotlib.pyplot as plt
    
    import sys
    sys.path.append("../../lib")
    
    from stpalette import palette_WM4007_rna, palette_WM4237_rna
    from pathways import genesDicts
    from plots import plotSpatialAll
    from plots import plotPhiHeatmap, plotFiForAd
    from utils import computFullGridVelocity, getGridPhi, getGridVelocityForSourceSink, getFiOfEmb, loadAddLayersVelocytoData
    
    需要加载的stpalette、pathways、plots、utils放在了百度网盘,接受服务,我也学一学别人接一下分析项目。
    model = 'WM4007'
    
    palette_rna = palette_WM4007_rna if model=='WM4007' else palette_WM4237_rna
    
    initialLibrarySizes = {'WM4007_T0_S1_ST': 30967.0,
                        'WM4007_T0_S2_ST': 31210.0,
                        'WM4007_T1_S1_ST': 27910.0,
                        'WM4007_T1_S2_ST': 28700.0,
                        'WM4007_T2_S1_ST': 20318.0,
                        'WM4007_T2_S2_ST': 14094.0,
                        'WM4007_T3_S1_ST': 25289.0,
                        'WM4007_T3_S2_ST': 24304.0,
                        'WM4007_T4_S1_ST': 26625.0,
                        'WM4007_T4_S2_ST': 30744.0,
                        'WM4007_TC_S1_ST': 26802.0,
                        'WM4007_TC_S2_ST': 37152.0}
    
    dataPath = '../../data/'
    preprocessedStDataPath = 'd:/from HPCC 11 28 2022/results_NF1-nod-t2t-k35/%s/' % model
    preprocessedVelDataPath = 'd:/from HPCC 11 28 2022/results_NF3-nod-t2t-k35/%s/' % model
    
    ids = sorted(np.loadtxt(dataPath + 'ids_%s_ST.txt' % model, dtype=str))
    print(ids, '\n')
    
    ids = pd.Series(ids)
    cond1 = ids.str.contains('E_S')
    cond2 = ids.str.contains('0_S')
    cond3 = ids.str.contains('C_S')
    #ids = ids[cond1].values.tolist() + ids[cond2].values.tolist() + ids[cond3].values.tolist() + ids[~cond1 & ~cond2 & ~cond3].values.tolist()
    ids = ids[cond2].values.tolist() + ids[cond3].values.tolist() + ids[~cond1 & ~cond2 & ~cond3].values.tolist()
    # ids = ids.values.tolist()
    print(ids)
    

    加载数据

    ad_all = sc.read(dataPath + 'ad_all_human_clustered_st_%s.h5ad' % model)
    ad_all = ad_all[ad_all.obs['human_ratio'] > 0.25]
    ad_all.shape
    
    sc.pl.umap(ad_all, color='cluster', palette=palette_rna)
    
    ads = {id: ad_all[ad_all.obs['sample']==id, :] for id in ids}
    
    images = {id: sc.read(preprocessedStDataPath + '%s/st_adata_processed.h5ad' % id).uns['spatial'] for id in ids}
    
    for sample in ids[:]:
        ads[sample] = ad_all[ad_all.obs['sample']==sample, :].copy()    
        ads[sample].uns['spatial'] = images[sample]
    
    plotSpatialAll(ads, identity='cluster', palette=palette_rna, f=0.65)
    
    loadAddLayersVelocytoData(ids=ids[:], ads=ads, dataPath=preprocessedVelDataPath, identity='cluster', initialLibrarySizes=initialLibrarySizes)
    
    def getLayersAll():   
        df_var = pd.concat([ads[id].var for id in ids])
        ad_all_temp = sc.concat({id:ads[id] for id in ids[:]}, label='sample', join='outer')
        ad_all_temp.var = df_var.loc[~df_var.index.duplicated()].reindex(ad_all_temp.var.index)
        ad_all_temp = ad_all_temp[:, ad_all_temp.var['is_human_gene']]
        return ad_all_temp.layers['spliced'], ad_all_temp.layers['unspliced'], ad_all_temp.obs.index, ad_all_temp.var.index
    
    layer_spliced, layer_unspliced, spots, genes = getLayersAll()
    
    ad_all_temp = ad_all[spots, genes].copy()
    ad_all_temp.layers['spliced'] = layer_spliced
    ad_all_temp.layers['unspliced'] = layer_unspliced
    del layer_spliced, layer_unspliced
    
    Calculate velocity for the entire model
    scv.tl.velocity_graph(ad_all_temp, basis='umap')
    
    plt.rcParams["figure.figsize"] = (10,10)
    identity = 'cluster'
    scv.pl.velocity_embedding_stream(ad_all_temp, basis='X_umap', arrow_size=1.2, color=identity, palette=palette_rna, title='', size=75, alpha=0.75, legend_loc='none')
    
    plt.rcParams["figure.figsize"] = (5,5)
    plotFiForAd(ad_all_temp, size=20, identity='cluster', palette=palette_rna)
    
    plt.rcParams["figure.figsize"] = (7,7)
    scv.pl.velocity(ad_all_temp, ['CREB3', 'DCT', 'RAF1', 'TYRP1'], ncols=2)
    
    plt.rcParams["figure.figsize"] = (4,4)
    scv.pl.scatter(ad_all_temp, 'CDKN2A', color=['cluster', 'velocity'], size=5, palette=palette_rna, wspace=0.35, add_outline='4')
    
    # This step needs plenty of memory
    scv.tl.rank_velocity_genes(ad_all_temp, groupby='cluster', min_corr=0.3)
    
    df = scv.DataFrame(ad_all_temp.uns['rank_velocity_genes']['names'])
    
    kwargs = dict()
    scv.pl.scatter(ad_all_temp, df['4'][:5], ylabel='4', frameon=False, size=5, linewidth=1.5, add_outline='4')
    
    scv.tl.velocity_confidence(ad_all_temp)
    
    keys = 'velocity_length', 'velocity_confidence'
    scv.pl.scatter(ad_all_temp, c=keys, cmap='coolwarm', perc=[5, 95], title=['Differentiation rate', 'Coherence'])
    
    df = ad_all_temp.obs.groupby('cluster')[keys].mean().T
    df.style.background_gradient(cmap='coolwarm', axis=1)
    
    scv.tl.paga(ad_all_temp, groups='cluster')
    
    scv.pl.paga(ad_all_temp, basis='umap', size=50, alpha=0.1, min_edge_width=2, node_size_scale=1.5)
    
    Calculate velocity on UMAP per timepoint
    ad_all_temp_T = ad_all_temp[ad_all_temp.obs['T'].isin(['TC']), :]
    sc.pp.neighbors(ad_all_temp_T, use_rep='X_pca_harmony')
    sc.tl.umap(ad_all_temp_T)
    scv.tl.velocity(ad_all_temp_T, mode='stochastic')
    scv.tl.velocity_graph(ad_all_temp_T, n_jobs=4, basis='umap')
    plt.rcParams["figure.figsize"] = (5, 5)
    scv.pl.velocity_embedding_stream(ad_all_temp_T, basis='X_umap', arrow_size=1.2, color=identity, palette=palette_rna, title='', size=75, alpha=0.75, legend_loc='none')
    plotFiForAd(ad_all_temp_T)
    

    
    
    f = 1.0
    ncols = 4
    nrows = 3
    fig, axs = plt.subplots(nrows, ncols, figsize=(f*4.5*ncols, f*4.5*nrows))
    for isample, sample in enumerate(ids):
        try:
            ax = axs[int((isample - isample%ncols)/nrows), isample%ncols]
            identity = 'cluster'
            ad = ads[sample]
    
            scv.pl.velocity_embedding_stream(ad, basis='X_spatial', arrow_size=0.6, color=identity, palette=palette_rna, 
                                             title=sample, size=75, alpha=0.75, show=False, ax=ax, legend_loc='none') # 250 for 12x12
    
            (xmin, xmax), (ymin, ymax) = ax.get_xlim(), ax.get_ylim()
            sf = ads[sample].uns['spatial']['library_id']['scalefactors']['tissue_lowres_scalef']
            image = ad.uns['spatial']['library_id']['images']['lowres'].copy()
            ax.imshow(image, interpolation='none', extent=[0, image.shape[1]/sf, -20/sf-image.shape[1]/sf, 0], zorder=-1)
    
            ax.legend(frameon=False, loc='upper left', fontsize=10, markerscale=1.5)
        except Exception as exception:
            print(exception)
    
    plt.subplots_adjust(wspace=0.15, hspace=0.2)
    # fig.savefig('spatial rna velocity.png', dpi=450)
    
    for id in ids:
        ad = ads[id]
    
        try:
            X_grid_full, V_grid_full, X_emb = computFullGridVelocity(ad, emb='spatial')
            gridPhi = getGridPhi(X_grid_full, V_grid_full, R=2)
            plotPhiHeatmap(gridPhi, X_grid_full, emb='spatial');
    
            if True:
                fig, axs = plt.subplots(1, 2, figsize=(10, 5))
    
                params = dict(density=1, arrow_size=100, arrow_length=2000, color=identity, palette=palette_rna, size=45, alpha=0.75) 
    
                ax = axs[0]
                X_source, V_source = getGridVelocityForSourceSink(X_grid_full, V_grid_full, gridPhi, isSource=True)
                scv.pl.velocity_embedding_grid(ad, basis='X_spatial', X_grid=X_source, V_grid=V_source, legend_loc='none', title='Source', ax=axs[0], show=False, **params)
    
                (xmin, xmax), (ymin, ymax) = ax.get_xlim(), ax.get_ylim()
                sf = ads[sample].uns['spatial']['library_id']['scalefactors']['tissue_lowres_scalef']
                image = ad.uns['spatial']['library_id']['images']['lowres'].copy()
                ax.imshow(image, interpolation='none', extent=[0, image.shape[1]/sf, -20/sf-image.shape[1]/sf, 0], zorder=-1)
                ax.legend(frameon=False, loc='upper left', fontsize=10, markerscale=1.5)
    
                ax = axs[1]
                X_sink, V_sink = getGridVelocityForSourceSink(X_grid_full, V_grid_full, gridPhi, isSource=False)
                scv.pl.velocity_embedding_grid(ad, basis='X_spatial', X_grid=X_sink, V_grid=V_sink, legend_loc='none', title='Sink', ax=axs[1], show=False, **params)
    
                (xmin, xmax), (ymin, ymax) = ax.get_xlim(), ax.get_ylim()
                sf = ads[sample].uns['spatial']['library_id']['scalefactors']['tissue_lowres_scalef']
                image = ad.uns['spatial']['library_id']['images']['lowres'].copy()
                ax.imshow(image, interpolation='none', extent=[0, image.shape[1]/sf, -20/sf-image.shape[1]/sf, 0], zorder=-1)
                ax.legend(frameon=False, loc='upper left', fontsize=10, markerscale=1.5)
    
            Fi_emb = getFiOfEmb(X_grid_full, gridPhi, X_emb)
            ad.obs['Fi_spatial'] = Fi_emb
            sc.pl.spatial(ad, color='Fi_spatial', cmap='bwr', vmin=-1, vmax=1)
            sc.pl.violin(ad, keys='Fi_spatial', groupby='cluster')
        except Exception as exception:
            print(exception) 
    

    还有下面这样的

    生活很好,有你更好

    相关文章

      网友评论

          本文标题:单细胞空间数据分析之velocity

          本文链接:https://www.haomeiwen.com/subject/sjibfjtx.html