scTyper

作者: LET149 | 来源:发表于2023-11-06 09:15 被阅读0次

    https://github.com/omicsCore/scTyper

    1. 安装

    # install BiocManager if necessary
    if (!requireNamespace("BiocManager", quietly = TRUE))
        install.packages("BiocManager")
    
    # install devtools if necessary
    BiocManager::install("devtools")
    library(devtools)
    
    # install the scTyper package
    devtools::install_github("omicsCore/scTyper")
    

    2. 使用

    以下是一个脚本示例

    require(infercnv)
    require(ComplexHeatmap)
    require(scTyper)
    require(Seurat)
    require(knitr)
    require(kableExtra)
    
    '#设置工作路径
    setwd("/Users/let149/Desktop/Dmel/SRR9705086/Output")
    
    #导入经Seurat处理后的数据
    load(file = "/Users/let149/Desktop/Dmel/SRR9705086/RData/SRR9705086_raw_top_half.Rdata")
    
    #为数据的meta.data添加后续处理需要的列
    SRR9705086_raw_top_half@meta.data[["sample_id"]] <- "SRR9705086"  #新添加scTyper需要使用的数特征
    SRR9705086_raw_top_half@meta.data[["sample.name"]] <- "SRR9705086"
    SRR9705086_raw_top_half@meta.data[["tissue.type"]] <- "SRR9705086_testis"
    SRR9705086_raw_top_half@meta.data[["cell.type.ref"]] <- "SRR9705086_testis"
    
    #自建marker
    #每个细胞类型的marker是一个数据框,此数据框只有一列,其中每行为一个marker基因
    #所有细胞类型的marker构成一个列表
    
    Early_spermatids <- read.csv(file="/Users/let149/Desktop/Dmel/SRR9705086/Marker/Marker_e-value_top9/Marker_e-value_top9_of_Early_spermatids.tsv", col.names = F)
    Early_spermatids <- Early_spermatids[,1]
    
    Late_spermatocytes <- read.csv(file="/Users/let149/Desktop/Dmel/SRR9705086/Marker/Marker_e-value_top9/Marker_e-value_top9_of_Late_spermatocytes.tsv", col.names = F)
    Late_spermatocytes <- Late_spermatocytes[,1]
    
    Early_spermatocytes <- read.csv(file="/Users/let149/Desktop/Dmel/SRR9705086/Marker/Marker_e-value_top9/Marker_e-value_top9_of_Early_spermatocytes.tsv", col.names = F)
    Early_spermatocytes <- Early_spermatocytes[,1]
    
    Hub_cells <- read.csv(file="/Users/let149/Desktop/Dmel/SRR9705086/Marker/Marker_e-value_top9/Marker_e-value_top9_of_Hub_cells.tsv", col.names = F)
    Hub_cells <- Hub_cells[,1]
    
    Cyst_cells <- read.csv(file="/Users/let149/Desktop/Dmel/SRR9705086/Marker/Marker_e-value_top9/Marker_e-value_top9_of_Cyst.tsv", col.names = F)
    Cyst_cells <- Cyst_cells[,1]
    
    Late_spermatogonia <- read.csv(file="/Users/let149/Desktop/Dmel/SRR9705086/Marker/Marker_e-value_top9/Marker_e-value_top9_of_Late_spermatogonia.tsv", col.names = F)
    Late_spermatogonia <- Late_spermatogonia[,1]
    
    Early_spermatogonia <- read.csv(file="/Users/let149/Desktop/Dmel/SRR9705086/Marker/Marker_e-value_top9/Marker_e-value_top9_of_GSC_Early_spermatogonia.tsv", col.names = F)
    Early_spermatogonia <- Early_spermatogonia[,1]
    
    Epithelial_cells <- read.csv(file="/Users/let149/Desktop/Dmel/SRR9705086/Marker/Marker_e-value_top9/Marker_e-value_top9_of_Epithelial_cells.tsv", col.names = F)
    Epithelial_cells <- Epithelial_cells[,1]
    
    Mature_spermatids <- read.csv(file="/Users/let149/Desktop/Dmel/SRR9705086/Marker/Marker_e-value_top9/Marker_e-value_top9_of_Mature_spermatids.tsv", col.names = F)
    Mature_spermatids <- Mature_spermatids[,1]
    
    marker_top9_list <- list(Early_spermatids=Early_spermatids, Early_spermatocytes=Early_spermatocytes, Hub_cells=Hub_cells, Cyst_cells=Cyst_cells, Late_spermatogonia=Late_spermatogonia, Early_spermatogonia=Early_spermatogonia, Epithelial_cells=Epithelial_cells, Mature_spermatids=Mature_spermatids, Late_spermatocytes=Late_spermatocytes)
    
    #载入pheno.fn文件的路径  #此文件要遵循固定的格式,见下图
    dir.pheno.fn <- "/Users/let149/Desktop/Dmel/SRR9705086/Marker/pheno.fn.csv"
    
    #对导入后的数据进行重新聚类,当以下使用scTyper进行cluster注释时,默认被注释的cluster是当前活跃的cluster
    SRR9705086_raw_top_half <- FindNeighbors(SRR9705086_raw_top_half, dims = 1:13)
    SRR9705086_raw_top_half <- FindClusters(SRR9705086_raw_top_half, resolution = 9)
    
    #运行scTyper
    SRR9705086.NTP.cluster=scTyper(seurat.object=SRR9705086_raw_top_half,  #对象名
                                            marker=marker_top9_list,  #marker列表名
                                            wd = getwd(),
                                            output.name = "SRR9705086.raw.top.half_NTP_cluster_cluster57_based.on.top9.marker.output",  #输出结果目录名
                                            pheno.fn = dir.pheno.fn ,  #此文件在上面给出了形式
                                            qc = FALSE,
                                            run.cellranger=FALSE,
                                            norm.seurat=FALSE,
                                            cell.typing.method="NTP",  #可采用"NTP", "Average", "ES"三种方法之一
                                            level="cluster",  #可选择对"cluster"或者"cell"进行聚类
                                            run.inferCNV=FALSE,
                                            proj.name = "SRR9705086.scTyper",
                                            gene.ref.gtf=NULL,
                                            feature.to.test = "tissue.type",
                                            report.mode=TRUE,
                                            mc.cores = 6)
    

    pheno.fn文件 :


    图片.png

    注意 : 文件名,文件格式,文件内容

    3. scTyper 源码

    #' @title scTyper
    #' @description The main scTyper function
    #' @param seurat.object Seurat object, if users have pre-processed seurat object, user have to insert seurat object as input
    #' @param marker Cell markers to use in cell typing, character or List (#identifier or #StudyName or User defined gene list)
    #' @param wd Working directory
    #' @param output.name Output directory name
    #' @param pheno.fn Phenotype file path
    #' @param qc  Whether to execute FASTQC (default=FALSE)
    #' @param run.cellranger whether to excute cellranger count (default=FALSE)
    #' @param norm.seurat whether to normalize seurat object (default=FALSE)
    #' @param cell.typing.method cell typing method, c("NTP", "ES", "Average"), (default = "NTP")
    #' @param level Indicate the cell assignment level (cell or cluster)
    #' @param run.inferCNV Indicate whether ‘malignant cell typing by inferCNV process run
    #' @param proj.name Project name
    #' @param fastqc.path FastQC program path
    #' @param fastq.dir FastQC output directory
    #' @param fq1.idx Index of the FASTQ file (Read 1)
    #' @param fq2.idx Index of the FASTQ file (Read 2)
    #' @param cellranger.path Cell Ranger program path
    #' @param cellranger.ref.dir Directory of Cell Ranger reference file
    #' @param percent.min.cells Cutoff to filter features containing minimum percent of cells
    #' @param min.features Cutoff to filter cells containing minimum number of features
    #' @param percent.mt Cutoff for filtering cells that have >n percent mitochondrial counts
    #' @param vars.to.regress Variables to regress out
    #' @param dims A vector of the dimensions to use in construction of the SNN graph.
    #' @param resolution Value of the resolution parameter, use a value above (below) 1.0 if you want to obtain a larger (smaller) number of communities.
    #' @param slot Data type of Seurat object, c("scale.data", "count.data", "data")
    #' @param assay Assay of Seurat object
    #' @param NTP.g.filter.method Method to filter genes in NTP
    #' @param NTP.gene.filter.cutoff Cutoff to filter genes of in NTP
    #' @param NTP.distance NTP distance method, a character, either c("correlation" or "cosine").
    #' @param NTP.norm.method NTP normalization method, either c("none", "row.std")
    #' @param gene.ref.gtf Path of GTF file including genomic location for genes
    #' @param feature.to.test Column header name of the meta data in Seurat object (select the cell groups for T.test) either "tissue.type" or "cell.type"
    #' @param cells.test_excluded A value indicates the cells to be excluded in T.test
    #' @param cells.test_reference A value indicates the cells to use as be excluded in T.test
    #' @param fc.cutoff Cutoff of fold change
    #' @param cutoff.gene.cluster A cutoff P-value for filtering out the gene clusters (calculated from GO analysis)
    #' @param malignant.cell.type Cell type to assign  malignant cell
    #' @param report.mode Generate report file
    #' @param mc.cores The number of cores to use. Must be at least one(default=1), and parallelization requires at least two cores.
    #' @return Seurat object
    #' @export
    scTyper<- function(seurat.object=NULL,
                       marker="Puram.2017.HNSCC.TME",
                       
                       wd=getwd(),
                       output.name = "test.result",
                       pheno.fn,
                       qc = FALSE,
                       run.cellranger=FALSE,
                       norm.seurat=FALSE,
                       cell.typing.method=c("NTP", "ES", "Average"),
                       level=c("cell","cluster"),
                       run.inferCNV=TRUE,
                       proj.name = "scTyper",
                       
                       fastqc.path=NULL,
                       fastq.dir=NULL,
                       fq1.idx="_R1_001.fastq",
                       fq2.idx="_R2_001.fastq",
                       
                       cellranger.path=NULL,
                       cellranger.ref.dir=NULL,
                       
                       percent.min.cells=0.1,
                       min.features=200,
                       percent.mt=10,
                       vars.to.regress=c("nCount_RNA", "percent.mt"),
                       dims=1:10,
                       resolution=2,
                       
                       slot=c("scale.data", "count.data", "data"),
                       assay="RNA",
                       NTP.g.filter.method=c("sd", "mad", "none"),
                       NTP.gene.filter.cutoff=0.3,
                       NTP.distance=c("cosine","correlation"),
                       NTP.norm.method=c("none", "row.std"),
                       
                       gene.ref.gtf=NULL,
                       feature.to.test = c("tissue.type", "cell.type"),
                       cells.test_excluded="Epithelial",
                       cells.test_reference = "immune",
                       fc.cutoff=0.05,
                       cutoff.gene.cluster=0.05,
                       malignant.cell.type="Epithelial",
                       report.mode=TRUE,
                       mc.cores = 1){
        
        start_time <- Sys.time()
        cell.typing.method=match.arg(cell.typing.method)
        level=match.arg(level)
        slot=match.arg(slot)
        NTP.gene.filter.method=match.arg(NTP.g.filter.method)
        NTP.distance=match.arg(NTP.distance)
        NTP.norm.method=match.arg(NTP.norm.method)
        feature.to.test=match.arg(feature.to.test)
        
        library(fastqcr)
        library(Seurat)
        library(infercnv)
        library(gProfileR)
        library(rmarkdown)
        library(parallel)
        library(perm)
        library(Biobase)
        library(reshape2)
        library(limma)
        library(GenomicRanges)
        library(ggplot2)
        library(grid)
        library(gridExtra)
        library(grDevices)
        library(ComplexHeatmap)
        library(circlize)
        library(png)
        library(knitr)
        library(kableExtra)
        library(pander)
        library(colorspace)
        
        # default outdirs
        qc.dir <- file.path(wd, output.name, "00_qc")
        count.dir <- file.path(wd, output.name, "01_count")
        ntp.dir <- file.path(wd, output.name, "02_NTP")
        inferCNV.dir  <- file.path(wd, output.name, "03_inferCNV")
        rda.dir <- file.path(wd, output.name, "data")
        
        
        
        #######################
        ### start scTyper
        dir.create(file.path(wd, output.name), showWarnings = F)
        dir.create(file.path(rda.dir), showWarnings = F)
        
        pheno.df = read.csv(pheno.fn, header = T, sep = ",")
        sample.name = as.character(pheno.df$Sample_ID)
        
        if(qc==TRUE){
            fastqc.outs = fastqc(fastqc.path, fastq.dir=fastq.dir, sample.name, fq1.idx=fq1.idx, fq2.idx=fq2.idx, output.dir=qc.dir, run.cmd=TRUE, mc.cores=mc.cores)
        }
        
        if(run.cellranger==TRUE){
            count.out.dirs = CellrangerCount(cellranger.path, fastq.dir=fastq.dir, output.dir=count.dir, cellranger.ref.dir=cellranger.ref.dir, sample.name=sample.name, run.cmd=TRUE, mc.cores=mc.cores)
            setwd(wd)
            metrics_summary = make.stat_summary(count.dir = count.dir, sample.name = sample.name, pheno.df=pheno.df, output.dir = count.dir)
        }
        
        if(norm.seurat==TRUE){
            seurat = run.seurat.process(count.dir = count.dir, rda.dir = rda.dir, project = proj.name, sample.name = sample.name, metrics_summary,
                                        percent.min.cells=percent.min.cells, min.features=min.features, percent.mt=percent.mt, vars.to.regress=vars.to.regress, dims=dims,
                                        resolution=resolution)
        }
        
        if(class(seurat.object)=="Seurat") seurat=seurat.object
        
        ##cell typing
        seurat=cell.typing.seurat(seurat=seurat, marker=marker, cell.typing.method=cell.typing.method, level=level, wd=wd, slot=slot, assay=assay, ntp.dir=ntp.dir, rda.dir=rda.dir, NTP.g.filter.method=NTP.g.filter.method, NTP.gene.filter.cutoff=NTP.gene.filter.cutoff, NTP.distance=NTP.distance, NTP.norm.method=NTP.norm.method, mc.cores=mc.cores)
        
        if(run.inferCNV==TRUE){
            seurat@meta.data$tissue.type = pheno.df[match(seurat@meta.data$sample.name, pheno.df[,"Sample_ID"]),"TissueType"]
            fdata = make.seurat.fdata(seurat, rda.dir, gene.ref.gtf = gene.ref.gtf)
            
            seurat = run.inferCNV(seurat = seurat, assay=assay, fdata, pheno_info = pheno.df, output.dir = inferCNV.dir, rda.dir = rda.dir, feature.to.test=feature.to.test, cells.test_reference=cells.test_reference, cells.test_excluded=cells.test_excluded, fc.cutoff=fc.cutoff, cutoff.gene.cluster=cutoff.gene.cluster, mc.cores=mc.cores)
            
            seurat = malignant.cellTyper(seurat = seurat, rda.dir = rda.dir, cells.test_reference=cells.test_reference, malignant.cell.type=malignant.cell.type)
            
        }
        end_time <- Sys.time()
        runtime = format(end_time-start_time, format="%H")
        #report
        envList=as.list(environment())
        if(report.mode) {report(envList, qc.dir, output.dir=file.path(wd, output.name))}
        
        return(seurat)
    }
    

    相关文章

      网友评论

          本文标题:scTyper

          本文链接:https://www.haomeiwen.com/subject/nnhahltx.html