Seurat重要命令汇总

作者: Hayley笔记 | 来源:发表于2021-06-02 23:53 被阅读0次

    参考:https://satijalab.org/seurat/articles/essential_commands.html

    ⚠️\color{#4285f4}{这}\color{#ea4335}{里}\color{#fbbc05}{提}\color{#34a853}{到}\color{#ea4335}{的}\color{#4285f4}{函}\color{#34a853}{数}\color{#376956}{务}\color{#ea4335}{必}\color{#fbbc05}{全}\color{#376956}{部}\color{#34a853}{熟}\color{#4285f4}{练}\color{#fbbc05}{掌}\color{#34a853}{握}⚠️

    1. Seurat 标准工作流程

    读入数据 -> 构建Seurat对象 -> 归一化 -> 寻找特征基因 -> 标准化 -> PCA降维 -> 计算KNN最近邻 -> 分群 -> 非线性降维UMAP和TSNE -> 可视化降维结果

    pbmc.counts <- Read10X(data.dir = "~/Downloads/pbmc3k/filtered_gene_bc_matrices/hg19/")
    pbmc <- CreateSeuratObject(counts = pbmc.counts)
    pbmc <- NormalizeData(object = pbmc)
    pbmc <- FindVariableFeatures(object = pbmc)
    pbmc <- ScaleData(object = pbmc)
    pbmc <- RunPCA(object = pbmc)
    pbmc <- FindNeighbors(object = pbmc)
    pbmc <- FindClusters(object = pbmc)
    pbmc <- RunTSNE(object = pbmc)
    DimPlot(object = pbmc, reduction = "tsne")
    

    2. Seurat对象交互

    获取细胞和基因名,并计数

    colnames(x = pbmc)
    rownames(x = pbmc)
    ncol(x = pbmc)
    nrow(x = pbmc)
    

    设置identity classes(active.ident)

    # 查看目前所用的identity(也就是查看目前用于细胞分类的metadata)并计数
    Idents(object = pbmc)
    levels(x = pbmc)
    
    # 存放细胞identity classes
    pbmc[["old.ident"]] <- Idents(object = pbmc) #将"old.ident"存放在pbmc的metadata中
    
    # 设置identity classes
    Idents(object = pbmc) <- "CD4 T cells"  #将"CD4 T cells"设置为pbmc的 "active.ident"
    Idents(object = pbmc, cells = 1:10) <- "CD4 T cells" #将前10个细胞的ident设置为"CD4 T cells"
    
    # 将meta data中已有的分类设置为identity classes
    Idents(object = pbmc, cells = 1:10) <- "orig.ident" #将前10个细胞的ident设置为"orig.ident"
    Idents(object = pbmc) <- "orig.ident" #将"orig.ident"设置为pbmc的 "active.ident"
    
    # 重命名 identity classes
    pbmc <- RenameIdents(object = pbmc, `CD4 T cells` = "T Helper cells")
    

    从seurat对象中取子集⚠️

    # 根据identity class取子集(also see ?SubsetData)
    subset(x = pbmc, idents = "B cells")
    subset(x = pbmc, idents = c("CD4 T cells", "CD8 T cells"), invert = TRUE)
    
    # 根据某个基因/特征的表达水平取子集
    subset(x = pbmc, subset = MS4A1 > 3)
    
    # 根据组合的标准取子集
    subset(x = pbmc, subset = MS4A1 > 3 & PC1 > 5)
    subset(x = pbmc, subset = MS4A1 > 3, idents = "B cells")
    
    # 根据某个meta data中的某一分组来取子集
    subset(x = pbmc, subset = orig.ident == "Replicate1")
    
    # Downsample the number of cells per identity class
    subset(x = pbmc, downsample = 100)
    

    多个Seurat对象的合并

    # Merge两个Seurat对象
    merge(x = pbmc1, y = pbmc2)
    # Merge多个Seurat对象
    merge(x = pbmc1, y = list(pbmc2, pbmc3))
    

    3. 数据访问

    #查看metadata数据库框, metadata存储在object@meta.data
    pbmc[[]]
    
    # 检索metadata中的特定指标
    pbmc$nCount_RNA
    pbmc[[c("percent.mito", "nFeature_RNA")]]
    
    # 添加metadata, (见?AddMetaData)
    random_group_labels <- sample(x = c("g1", "g2"), size = ncol(x = pbmc), replace = TRUE)
    pbmc$groups <- random_group_labels
    
    # 检索或设置表达矩阵数据,包括原始表达矩阵、标准化的矩阵和降维后的矩阵 ('counts', 'data', and 'scale.data')
    GetAssayData(object = pbmc, slot = "counts")[1:5,1:5]
    count.data <- GetAssayData(object = pbmc[["RNA"]], slot = "counts")
    count.data <- as.matrix(x = count.data + 1)
    new.seurat.object <- SetAssayData(object = pbmc, slot = "counts", new.data = count.data, assay = "RNA")
    #⚠️ 使用GetAssayData函数可以从Seurat对象访问数据。
    #⚠️ 可以使用SetAssayData将数据添加到counts,data或scale.data插槽中。新数据必须具有与当前数据相同顺序的相同细胞。添加到counts'或data`中的数据必须具有与当前数据相同的features。
    
    # Get cell embeddings and feature loadings
    Embeddings(object = pbmc, reduction = "pca") #检索每个细胞的PC矩阵
    Loadings(object = pbmc, reduction = "pca") #检索每个基因的PC矩阵
    Loadings(object = pbmc, reduction = "pca", projected = TRUE)
    
    # ⚠️FetchData函数可以从expression matrices, cell embeddings或metadata中取出任何值
    head(FetchData(object = pbmc, vars = c("PC_1", "percent.mt", "MS4A1")))
    #                        PC_1 percent.mt    MS4A1
    # AAACATACAACCAC-1 -4.7296855  3.0177759 0.000000
    # AAACATTGAGCTAC-1 -0.5174029  3.7935958 2.583047
    # AAACATTGATCAGC-1 -3.1891063  0.8897363 0.000000
    # AAACCGTGCTTCCG-1 12.7933021  1.7430845 0.000000
    # AAACCGTGTATGCG-1 -3.1288078  1.2244898 0.000000
    # AAACGCACTGGTAC-1 -3.1088963  1.6643551 0.000000
    

    4. 可视化

    Seurat的绘图基于ggplot2
    详见Seurat绘图函数总结

    # Dimensional reduction plot for PCA or tSNE
    DimPlot(object = pbmc, reduction = "tsne")
    DimPlot(object = pbmc, reduction = "pca")
    
    # Dimensional reduction plot, with cells colored by a quantitative feature
    FeaturePlot(object = pbmc, features = "MS4A1")
    
    # Scatter plot across single cells, replaces GenePlot
    FeatureScatter(object = pbmc, feature1 = "MS4A1", feature2 = "PC_1")
    FeatureScatter(object = pbmc, feature1 = "MS4A1", feature2 = "CD3D")
    
    # Scatter plot across individual features, repleaces CellPlot
    CellScatter(object = pbmc, cell1 = "AGTCTACTAGGGTG", cell2 = "CACAGATGGTTTCT")
    
    VariableFeaturePlot(object = pbmc)
    
    # Violin and Ridge plots
    VlnPlot(object = pbmc, features = c("LYZ", "CCL5", "IL32"))
    RidgePlot(object = pbmc, feature = c("LYZ", "CCL5", "IL32"))
    
    # Heatmaps
    DoHeatmap(object = pbmc, features = heatmap_markers)
    DimHeatmap(object = pbmc, reduction = "pca", cells = 200)
    
    # New things to try!  Note that plotting functions now return ggplot2 objects, so you can add themes, titles, and options
    # onto them
    VlnPlot(object = pbmc, features = "MS4A1", split.by = "groups")
    DotPlot(object = pbmc, features = c("LYZ", "CCL5", "IL32"), split.by = "groups")
    FeaturePlot(object = pbmc, features = c("MS4A1", "CD79A"), blend = TRUE)
    DimPlot(object = pbmc) + DarkTheme()
    DimPlot(object = pbmc) + labs(title = "2,700 PBMCs clustered using Seurat and viewed\non a two-dimensional tSNE")
    

    Seurat还提供了很多可以添加到ggplot2图中的预制个性化主题

    主题 功能
    DarkTheme Set a black background with white text
    FontSize Set font sizes for various elements of a plot
    NoAxes Remove axes and axis text
    NoLegend Remove all legend elements
    RestoreLegend Restores a legend after removal
    RotatedAxis Rotates x-axis labels
    # Plotting helper functions work with ggplot2-based scatter plots, such as DimPlot, FeaturePlot, CellScatter, and
    # FeatureScatter
    plot <- DimPlot(object = pbmc) + NoLegend()
    
    # HoverLocator replaces the former `do.hover` argument It can also show extra data throught the `information` argument,
    # designed to work smoothly with FetchData
    HoverLocator(plot = plot, information = FetchData(object = pbmc, vars = c("ident", "PC_1", "nFeature_RNA")))
    
    # FeatureLocator replaces the former `do.identify`
    select.cells <- FeatureLocator(plot = plot)
    
    # Label points on a ggplot object
    LabelPoints(plot = plot, points = TopCells(object = pbmc[["pca"]]), repel = TRUE)
    

    5. Multi-Assay Features

    With Seurat, you can easily switch between different assays at the single cell level (such as ADT counts from CITE-seq, or integrated/batch-corrected data). Most functions now take an assay parameter, but you can set a Default Assay to avoid repetitive statements.

    cbmc <- CreateSeuratObject(counts = cbmc.rna)
    # Add ADT data
    cbmc[["ADT"]] <- CreateAssayObject(counts = cbmc.adt)
    # Run analyses by specifying the assay to use
    NormalizeData(object = cbmc, assay = "RNA")
    NormalizeData(object = cbmc, assay = "ADT", method = "CLR")
    
    # Retrieve and set the default assay
    DefaultAssay(object = cbmc)
    DefaultAssay(object = cbmc) <- "ADT"
    DefaultAssay(object = cbmc)
    
    # Pull feature expression from both assays by using keys
    FetchData(object = cbmc, vars = c("rna_CD3E", "adt_CD3"))
    
    # Plot data from multiple assays using keys
    FeatureScatter(object = cbmc, feature1 = "rna_CD3E", feature2 = "adt_CD3")
    

    6. 其他好用的函数

    HVFInfo函数从Assay对象中提取特征均值和离散度。可变特征向量可以通过VariableFeatures函数提取。VariableFeatures也可以设置可变特征向量。

    # HVFInfo pulls mean, dispersion, and dispersion scaled
    # Useful for viewing the results of FindVariableFeatures
    > head(x = HVFInfo(object = pbmc))
    #                      mean    variance variance.standardized
    # AL627309.1    0.003411676 0.003401325             0.9330441
    # AP006222.2    0.001137225 0.001136363             0.9924937
    # RP11-206L10.2 0.001895375 0.001892500             0.9627290
    # RP11-206L10.9 0.001137225 0.001136363             0.9924937
    # LINC00115     0.006823351 0.006779363             0.9062135
    # NOC2L         0.107278241 0.159514698             0.7849309
    
    # VariableFeatures both accesses and sets the vector of variable features
    > head(x = VariableFeatures(object = pbmc))
    # [1] "PPBP"   "LYZ"    "S100A9" "IGLL5"  "GNLY"   "FTL" 
    # Set variable features example
    

    可以通过Stdev找到Seurat对象中存储的DimReduc的标准差向量。

    Stdev(object = pbmc, reduction.use = 'pca')
    # Warning: The following arguments are not used: reduction.use
    #  [1] 7.098420 4.495493 3.872592 3.748859 3.171755 2.545292 2.068137 1.945133
    #  [9] 1.847375 1.834689 1.820439 1.788429 1.779215 1.757395 1.751558 1.746384
    # [17] 1.732116 1.729550 1.722981 1.720541 1.717668 1.715182 1.710343 1.705735
    # [25] 1.705204 1.702498 1.700713 1.695816 1.694965 1.690976 1.688388 1.681423
    # [33] 1.679596 1.678656 1.674677 1.674487 1.670084 1.667662 1.666598 1.664131
    # [41] 1.660317 1.657907 1.656963 1.654456 1.654188 1.649514 1.647186 1.645571
    # [49] 1.642952 1.641145
    

    可以通过以下方法找到Seurat类:

    library(Seurat)
    utils::methods(class = 'Seurat')
    

    相关文章

      网友评论

        本文标题:Seurat重要命令汇总

        本文链接:https://www.haomeiwen.com/subject/gddosltx.html