美文网首页
2021-07-19 singleCellNet

2021-07-19 singleCellNet

作者: 汪小静 | 来源:发表于2021-07-19 21:54 被阅读0次

    相关链接

    https://github.com/pcahan1/singleCellNet

    https://pcahan1.github.io/singleCellNet/

    0. 准备数据

    #install
    install.packages("devtools")
    devtools::install_github("pcahan1/singleCellNet")
    library(singleCellNet)
    
    #download the data
    #training metadata, https://s3.amazonaws.com/cnobjects/singleCellNet/examples/sampTab_TM_053018.rda
    #training expression matrix , https://s3.amazonaws.com/cnobjects/singleCellNet/examples/expTM_Raw_053018.rda
    #query metadata, https://s3.amazonaws.com/cnobjects/singleCellNet/examples/stDat_beads_mar22.rda
    #query expression matrix, https://s3.amazonaws.com/cnobjects/singleCellNet/examples/6k_beadpurfied_raw.rda
    #human-mouse orthologs.https://s3.amazonaws.com/cnobjects/singleCellNet/examples/human_mouse_genes_Jul_24_2018.rda
    
    #loading training data MOUSE
    stTM <- utils_loadObject(fname = "sampTab_TM_053018.rda") #metadata
    expTMraw <- utils_loadObject(fname = "expTM_Raw_053018.rda") #expression matrix
    
    #loading query data HUMAN
    stQuery <- utils_loadObject(fname = "stDat_beads_mar22.rda") #metadata
    expQuery <- utils_loadObject(fname = "6k_beadpurfied_raw.rda") #expression matrix
    
    #Ortholog conversion for cross species analysis
    download.file("https://s3.amazonaws.com/cnobjects/singleCellNet/examples/human_mouse_genes_Jul_24_2018.rda", "human_mouse_genes_Jul_24_2018.rda")
    oTab <- utils_loadObject(fname = "human_mouse_genes_Jul_24_2018.rda")
    aa = csRenameOrth(expQuery = expQuery, expTrain = expTMraw, orthTable = oTab)
    #> query genes in ortholog table =  15532 
    #> training genes in ortholog table and query data =  14550
    expQueryOrth <- aa[['expQuery']]
    expTrainOrth <- aa[['expTrain']]
    
    #若为同一物种
    commonGenes<-intersect(rownames(expTrain), rownames(expQuery))
    expTrain <- expTrain[commonGenes, ]
    expQuery <- expQuery[commonGenes, ]
    
    #seurat 对象的导入
    seuratfile <- extractSeurat(sc, exp_slot_name = "counts")
    
    sampTab = seuratfile$sampTab
    expDat = seuratfile$expDat
    
    

    1. Training the data 建立分类器

    We use the same number of cells per cell type, i.e. balanced data, to train Top-Pair Random Forest classifier. The remaining of the data or the held-out data will be used as validation data to evaluate the performance of the TP-RF classifier. Empirically we have found 50 cells to be a minimal cell number to create a classifier with good performance, however it may vary depend on the quality of your reference data, so it is really important to assess the performance of your classifier before proceeding to query your sample of interest.

    stList<-splitCommon(sampTab = stTM, ncells = 50, dLevel = "newAnn")#以newAnn分类细胞
    alveolar macrophage : 62 
    B cell : 3134 
    bladder urothelial cell : 759 
    bladder_mesenchymal : 859 
    cardiac muscle cell : 60 
    cardiac_fibroblast : 222 
    chondrocyte-like : 165 
    endocardial cell : 52 
    endothelial cell : 1890 
    erythroblast : 152 
    erythrocyte : 74 
    granulocyte : 520 
    hematopoietic precursor cell : 117 
    hepatocyte : 882 
    keratinocyte : 1203 
    kidney capillary endothelial cell : 117 
    kidney proximal straight tubule epithelial cell : 618 
    kidney_duct_epithelial : 355 
    late pro-B cell : 141 
    limb_mesenchymal : 540 
    luminal epithelial cell of mammary gland : 137 
    lung_mammary_stromal : 2072 
    macrophage : 1340 
    mammary_basal_cell : 115 
    monocyte : 370 
    natural killer cell : 600 
    neuroendocrine cell : 282 
    skeletal muscle satellite cell : 190 
    T cell : 1823 
    tongue_basal_cell : 1726 
    trachea_epithelial : 434 
    trachea_mesenchymal : 3925 
    stTrain<-stList[[1]]
    expTrain <- expTrainOrth[,stTrain$cell]
    
    stTest <- stList[[2]]
    expTest <- expTrainOrth[,stTest$cell]
    

    This training step includes:

      1. normalize and log-transform & scale the training data,
      1. find top gene pairs and transform the training data into a binary matrix,
      1. train the Top-pair Random Forest classifier
    #- If you increase nTopGenes and nTopGenePairs, you may get a even better classifier performance on query data!
    class_info<-scn_train(stTrain = stTrain, expTrain = expTrain, nTopGenes = 10, nRand = 70, nTrees = 1000, nTopGenePairs = 25, dLevel = "newAnn", colName_samp = "cell")
    #a list containing normalized expression data, classification gene list, cnPRoc
    

    2. 评价分类器

    #apply heldout data
    system.time(classRes_val_all <- scn_predict(class_info[['cnProc']], expTest, nrand = 50))
    #> Loaded in the cnProc
    #> All Done
    #>    user  system elapsed 
    #>   0.208   0.012   0.220
    
    #assessment
    tm_heldoutassessment <- assess_comm(ct_scores = classRes_val_all, stTrain = stTrain, stQuery = stTest, dLevelSID = "cell", classTrain = "newAnn", classQuery = "newAnn")
    plot_PRs(tm_heldoutassessment)
    

    3. 进行分类

    crPBMC <- scn_predict(class_info[['cnProc']], expQueryOrth, nrand = 50)
    #需要对crPBMC过滤才能完成下一步
    test = crPBMC[,colnames(crPBMC) %in% colnames(expQueryOrth)]
    stQuery <- assign_cate(classRes = test, sampTab = stQuery, cThresh = 0.5) #选择classification score higher than 0.5
    

    4. 可视化

    #create labels for classification heatmap
    sgrp<-as.vector(stQuery$description)
    names(sgrp)<-rownames(stQuery)
    grpRand<-rep("rand", 50)
    names(grpRand)<-paste("rand_", 1:50, sep='')
    sgrp<-append(sgrp, grpRand)
    
    sc_hmClass(crPBMC, sgrp, max=5000, isBig=TRUE, cCol=F, font=8)
    
    sc_violinClass(sampTab = stQuery,classRes = crPBMC, cellIDCol = "sample_name", dLevel = "description", ncol = 9)
    
    sc_violinClass(sampTab = stQuery, classRes = crPBMC, cellIDCol = "sample_name", dLevel = "description", ncol = 9, sub_cluster = "B cell")
    
    # attribution plot
    plot_attr(sampTab = stQuery, classRes = crPBMC, nrand=50, sid="sample_name", dLevel="description")
    
    #Visualize top pair gene expression
    gpTab <- compareGenePairs(query_exp = expQueryOrth, training_exp = expTrainOrth, training_st = stTrain, classCol = "newAnn", sampleCol = "cell", RF_classifier = class_info$cnProc$classifier, numPairs = 20, trainingOnly = FALSE)
    #> [1] "All Good"
    
    sgrp<-as.vector(stQuery$prefix)
    names(sgrp)<-rownames(stQuery)
    train <- findAvgLabel(gpTab, stTrain = stTrain, dLevel = "newAnn")
    sgrp <- append(sgrp, train)
    
    hm_gpa_sel(gpTab, genes = class_info$cnProc$xpairs, grps = sgrp, maxPerGrp = 5)
    
    

    5. 插入seurat 对象中进行可视化

    sc@meta.data$category = stQuery$category
    DimPlot(sc,group.by = "category",label = T)
    

    相关文章

      网友评论

          本文标题:2021-07-19 singleCellNet

          本文链接:https://www.haomeiwen.com/subject/gafkmltx.html