美文网首页单细胞测序
小鼠全脑单细胞数据细胞类型鉴定

小鼠全脑单细胞数据细胞类型鉴定

作者: Aji | 来源:发表于2021-06-07 08:55 被阅读0次

    最近在处理小鼠全脑的单细胞数据,归纳总结下细胞类型鉴定的过程吧。我用的算法是Seurat label transfer function, 抽取了Tubula Muris部分已知数据,用这个算法进行了鉴定,准确率可以达到98%。后来我做细胞类型鉴定用的都是这个算法加已知marker双重验证的方法。

    1.首先下载两个小鼠全脑的数据

    a.Tabula Muris

    下载的是Tabula Muris中脑的数据
    facs_Brain_Non-Myeloid_seurat_tiss.Robj
    facs_Brain_Myeloid_seurat_tiss.Robj
    其中Myeloid主要是Microglia 以及Macrophage细胞类型
    Non-Myeloid主要是Neuron, Astrocyte, Oligodendrocyte, Endothelial等

    b.Mouse Cell Atlas

    找这套数据的脑的数据找了我半天
    后来发现可以在他们文章里找到数据的下载链接
    数据存储在figshare
    但是我的数据 我是从Tabula muris github网址上的data路径中下载下来的数据
    从Tabula muris github网址中可以找到Seurat处理好的MCA的数据可以直接导入到R里面分析,很方便
    这个压缩包里包含两个文件
    MCA_CellAssignments.csv
    MCA_All-batch-removed-assignments.csv

    MCA_All-batch-removed-assignments.csv数据内容
    我可纳闷了为啥不添加细胞类型信息 有毒
    但是这个下载下来的数据是没有细胞类型的,需要自己到这个网址再下载一下,我下载的是MCA_CellAssignment.csv
    MCA_CellAssignments.csv
    ,然后用R代码处理一下就可以得到对应的细胞类型。
    代码如下
    MCA_exp <- readRDS("MCA_merged_mat.rds")
    MCA_celltypeinfo <- read.csv("MCA_CellAssignments.csv")
    MCA_cellname <- read.csv("MCA_All-batch-removed-assignments.csv")
    index <- match(MCA_cellname$Cell.name, MCA_celltypeinfo$Cell.name)
    selected_cellname <- MCA_celltypeinfo[na.omit(index), ]
    dim(selected_cellname)
    brain_cellname <- selected_cellname[selected_cellname$Tissue=="Brain", ]
    brain_exp <- MCA_exp[, as.character(brain_cellname$Cell.name)]
    
    all.equal(as.character(brain_cellname$Cell.name), colnames(brain_exp))
    MCA_brain_rds <- list()
    MCA_brain_rds$exp <- brain_exp
    MCA_brain_rds$meta.data <- brain_cellname
    saveRDS(MCA_brain_rds, "MCA_brain_exp.rds")
    

    2.构建Seurat Reference Object

    这里遇到一个问题 就是用SCTtransform的时候 没办法运行成功 后来我用NormalizeData方法就可以的

    a.Tabula Muris

    load("facs_Brain_Non-Myeloid_seurat_tiss.Robj")
    selected_cells <- rownames(tiss@meta.data)
    Non_Myeloid_exp <- tiss@raw.data[, selected_cells]
    Non_Myeloid_label <- tiss@meta.data$cell_ontology_class
    all.equal(colnames(Non_Myeloid_exp), rownames(tiss@meta.data))
    dim(Non_Myeloid_exp) #23341  3401
    # write.csv(tiss@meta.data, "Tabula_Muris_Non_Myeloid_metadata1.csv")
    
    load("facs_Brain_Myeloid_seurat_tiss.Robj")
    selected_cells <- rownames(tiss@meta.data)
    Myeloid_exp <- tiss@raw.data[, selected_cells]
    Myeloid_label <- tiss@meta.data$cell_ontology_class
    all.equal(colnames(Myeloid_exp), rownames(tiss@meta.data))
    dim(Myeloid_exp) #23341  4455
    # write.csv(tiss@meta.data, "Tabula_Muris_Myeloid_metadata1.csv")
    ## 我在表格中去掉了Rik结尾的基因 以及4-Sep这样子的基因 基因名字写在了"rownames.csv"
    ## 是因为这些基因感觉没什么实质性作用 可以跳过这里 不做任何处理
    
    genes <- as.character(read.csv("rownames.csv", row.names= 1)$x)
    Brain_exp <- cbind(Non_Myeloid_exp, Myeloid_exp)
    Cell_ontology_class <- data.frame(celltype = c(Non_Myeloid_label, Myeloid_label))
    rownames(Cell_ontology_class) <- colnames(Brain_exp)
    
    Brain_exp <- Brain_exp[genes, ]
    Brain_exp <- Brain_exp[rowSums(Brain_exp>5) >= 20 & rowSums(Brain_exp) >= 20, ]
    Brain_exp <- na.omit(Brain_exp)
    
    Tabula_Muris_rds <- list()
    Tabula_Muris_rds$exp <- Brain_exp
    Tabula_Muris_rds$celltype <- Cell_ontology_class$celltype
    saveRDS(Tabula_Muris_rds, "Tabula_Muris_rawdata_with_cellinfo.rds")
    
    # 2) 构建NormalizeData的ref.seurat用来做reference data
    ref_exp <- Brain_exp
    ref.seurat <- CreateSeuratObject(counts = ref_exp)
    ref.seurat <- NormalizeData(ref.seurat, verbose = FALSE)
    ref.seurat <- FindVariableFeatures(ref.seurat, selection.method = "vst", 
                                                 nfeatures = 2000, verbose = FALSE)
    ref.seurat <- ScaleData(ref.seurat)
    ref.seurat <- RunPCA(object = ref.seurat, npcs = 30, verbose = FALSE)
    ref.seurat$celltype <- Cell_ontology_class$celltype
    saveRDS(ref.seurat, file = "Tabula_Muris_NormalizeData_ref_seurat.rds")
    
    # 3) 选取了部分数据 做reference 推断 准确率可以达到98.8%
    index <- sample(colnames(Brain_exp), 500)
    test_data <- Brain_exp[, index]
    test_celltype <- Cell_ontology_class[index, 1]
    test.seurat <- CreateSeuratObject(counts = test_data)
    test.seurat <- NormalizeData(test.seurat, verbose = FALSE)
    test.seurat <- FindVariableFeatures(test.seurat, selection.method = "vst", 
                                       nfeatures = 2000, verbose = FALSE)
    test.seurat <- ScaleData(test.seurat)
    test.seurat <- RunPCA(object = test.seurat, npcs = 30, verbose = FALSE)
    test.seurat$original_celltype <- test_celltype
    
    test.anchors <- FindTransferAnchors(reference = ref.seurat, 
                                            query = test.seurat, 
                                            dims = 1:30)
    predictions <- TransferData(anchorset = test.anchors, 
                                refdata = ref.seurat$celltype, 
                                dims = 1:30)
    test.seurat <- AddMetaData(test.seurat, metadata = predictions)
    

    b.Mouse Cell Atlas

    MCA_exp <- readRDS("MCA_merged_mat.rds")
    MCA_celltypeinfo <- read.csv("MCA_CellAssignments.csv")
    MCA_cellname <- read.csv("MCA_All-batch-removed-assignments.csv")
    index <- match(MCA_cellname$Cell.name, MCA_celltypeinfo$Cell.name)
    selected_cellname <- MCA_celltypeinfo[na.omit(index), ]
    dim(selected_cellname)
    brain_cellname <- selected_cellname[selected_cellname$Tissue=="Brain", ]
    brain_exp <- MCA_exp[, as.character(brain_cellname$Cell.name)]
    
    all.equal(as.character(brain_cellname$Cell.name), colnames(brain_exp))
    MCA_brain_rds <- list()
    MCA_brain_rds$exp <- brain_exp
    MCA_brain_rds$meta.data <- brain_cellname
    saveRDS(MCA_brain_rds, "MCA_brain_exp.rds")
    
    # 2.构建seurat reference 
    library(Seurat)
    ref_exp <- brain_exp
    ref.seurat <- CreateSeuratObject(counts = ref_exp)
    ref.seurat <- NormalizeData(ref.seurat, verbose = FALSE)
    ref.seurat <- FindVariableFeatures(ref.seurat, selection.method = "vst", 
                                       nfeatures = 2000, verbose = FALSE)
    ref.seurat <- ScaleData(ref.seurat)
    ref.seurat <- RunPCA(object = ref.seurat, npcs = 30, verbose = FALSE)
    ref.seurat$celltype <- as.character(brain_cellname$Annotation)
    saveRDS(ref.seurat, file = "MCA_NormalizeData_ref_seurat.rds")
    

    写在最后 佛系更文 很开心 终于有一百个粉丝了开心快乐的一天啊

    相关文章

      网友评论

        本文标题:小鼠全脑单细胞数据细胞类型鉴定

        本文链接:https://www.haomeiwen.com/subject/lbeheltx.html