参考生信技能树教程https://mp.weixin.qq.com/s/i6_x1yeMbXawfKm36ewnKQ
对原链接中混合使用shell和py脚本的方法进行改进,避免了不必要的错误,运行更高效
注意修改路径,读取路径为cellranger对各个样本的输出文件路径
import scrublet as scr
import scipy.io
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import sys
import os, sys
os.chdir('/biodata_01_4T/scRNA-seq_raw_data/Esophagus/PRJNA777911/result/')
file_to_search = "/biodata_01_4T/scRNA-seq_raw_data/Esophagus/PRJNA777911/result/"
dirlist=[]
for filename in os.listdir(file_to_search):
if os.path.isdir(filename) == True:
dirlist.append(filename)
print(dirlist)
path="/biodata_01_4T/scRNA-seq_raw_data/Esophagus/PRJNA777911/result/"
for i in dirlist:
input_dir = path + i
counts_matrix = scipy.io.mmread(input_dir + '/matrix.mtx.gz').T.tocsc()
counts_matrix
out_df = pd.read_csv(input_dir + '/barcodes.tsv.gz', header=None, index_col=None, names=['barcode'])
out_df
scrub = scr.Scrublet(counts_matrix, expected_doublet_rate=0.06)
doublet_scores, predicted_doublets = scrub.scrub_doublets(min_counts=2, min_cells=3, min_gene_variability_pctl=85,
n_prin_comps=30)
# doublets占比
print(scrub.detected_doublet_rate_)
out_df['doublet_scores'] = doublet_scores
out_df['predicted_doublets'] = predicted_doublets
out_df.to_csv('/biodata_01_4T/scRNA-seq_raw_data/Esophagus/PRJNA777911/scrublet_result/' + i + 'doublet.txt',
index=False, header=True)
# out_df.head()
print(out_df["predicted_doublets"].value_counts())
本方法经过比对与DoubletFinder R包的结果有较高的一致性,运行速度提升巨大
网友评论