之前我们演示cpdb分析的时候侧重的是R分析seurat对象准备input data,小伙伴提到“我单细胞一开始就是用scanpy分析的,怎么进行CPDB分析呢?”,其实之前的cellphonedb V5教程里面我们已经提到了。然而我们前面的帖子也分享了h5ad转seurat的方式(玩转单细胞(16):Scanpy单细胞h5ad数据转化为Seurat对象),以及seurat转h5ad的多个方式,所以他们之间不存在隔阂了,搞不定互相转化就好了。
cpdb分析counts_file_path参数可以是表达矩阵,也可以是单细胞分析结果h5ad对象。但是矩阵是normalize后的,我们演示使用的数据还是scanpy官网教程中PBMC的数据,数据没有任何生物学意义,仅仅作为cpdb流程演示。
import pandas as pd
import scanpy as sc
import numpy as np
#load data
adata = sc.read_h5ad('./pbmc3k.h5ad')
adata = adata.raw.to_adata()
adata
adata_cpdb = adata[~adata.obs['leiden'].isin(['Megakaryocytes'])]
#normalize
counts = adata_cpdb.X.toarray()
adata_cpdb.X = counts
sc.pp.normalize_per_cell(adata_cpdb, counts_per_cell_after=1e4)
adata_cpdb.layers["norm"] = adata_cpdb.X
counts = pd.DataFrame(data=adata_cpdb.layers["norm"], index=adata_cpdb.obs.index.tolist(), columns=adata_cpdb.var.index.tolist()).T
counts.index.name = "Gene"
#save meta
meta = pd.DataFrame(data=adata_cpdb.obs['leiden'].tolist(), index=adata_cpdb.obs.index.tolist(), columns=["Cell type"])
meta.index.name = "Cell"
meta
CDPB analysis:
import numpy as np
import pandas as pd
import scanpy as sc
import anndata
import os
import sys
from scipy import sparse
#Download database from source
from IPython.display import HTML, display
from cellphonedb.utils import db_releases_utils
display(HTML(db_releases_utils.get_remote_database_versions_html()['db_releases_html_table']))
cpdb_version = 'v5.0.0'
cpdb_target_dir = os.path.join('./', cpdb_version)
from cellphonedb.utils import db_utils
db_utils.download_database(cpdb_target_dir, cpdb_version)
sc.settings.verbosity = 3 # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_versions()
sc.settings.set_figure_params(dpi=100) # low dpi (dots per inch) yields small inline figures
cpdb_file_path = './v5.0.0/cellphonedb.zip'
meta_file_path = './meta_cpdb.txt'
counts_file_path = './count_cpdb.txt'
out_path = './test1'
from cellphonedb.src.core.methods import cpdb_statistical_analysis_method
cpdb_results1 = cpdb_statistical_analysis_method.call(
cpdb_file_path = cpdb_file_path,
meta_file_path = meta_file_path,
counts_file_path = counts_file_path,
counts_data = 'hgnc_symbol',
#active_tfs_file_path = active_tf_path,
#microenvs_file_path = microenvs_file_path,
score_interactions = True,
iterations = 1000,
threshold = 0.1,
threads = 5,
debug_seed = 42,
result_precision = 3,
pvalue = 0.05,
separator = '|',
debug = False,
output_path = out_path
)
CPDB分析:input h5ad & meta!
cpdb_file_path = './v5.0.0/cellphonedb.zip'
meta_file_path = './meta_cpdb.txt'
counts_file_path = './adata_cpdb.h5ad'
out_path = './test2'
from cellphonedb.src.core.methods import cpdb_statistical_analysis_method
cpdb_results2 = cpdb_statistical_analysis_method.call(
cpdb_file_path = cpdb_file_path,
meta_file_path = meta_file_path,
counts_file_path = counts_file_path,
counts_data = 'hgnc_symbol',
#active_tfs_file_path = active_tf_path,
#microenvs_file_path = microenvs_file_path,
score_interactions = True,
iterations = 1000,
threshold = 0.1,
threads = 5,
debug_seed = 42,
result_precision = 3,
pvalue = 0.05,
separator = '|',
debug = False,
output_path = out_path
)
整合结果:两种方式一样的结果!这种结果可以导出在R或者py中进行各种可视化!
cpdb_pbmc = summary_cpdb_results(cpdb_results1)
cpdb_pbmc.head()

利用cpdb里面的函数,提取结果进行一些可视化!
from cellphonedb.utils import search_utils
search_results = search_utils.search_analysis_results(
query_cell_types_1 = ['CD4 T'], # List of cells 1, will be paired to cells 2 (list or 'All').
query_cell_types_2 = ['CD14 Monocytes', 'FCGR3A Monocytes'], # List of cells 2, will be paired to cells 1 (list or 'All').
# query_genes = ['TGFBR1'], # filter interactions based on the genes participating (list).
#query_interactions = ['CSF1_CSF1R'], # filter intereactions based on their name (list).
significant_means = cpdb_results1['significant_means'], # significant_means file generated by CellphoneDB.
deconvoluted = cpdb_results1['deconvoluted'], # devonvoluted file generated by CellphoneDB.
interaction_scores = cpdb_results1['interaction_scores'], # interaction score generated by CellphoneDB.
query_minimum_score = 50, # minimum score that an interaction must have to be filtered.
separator = '|', # separator (default: |) employed to split cells (cellA|cellB).
long_format = True # converts the output into a wide table, removing non-significant interactions
)
search_results.head()

网友评论