美文网首页
Seurat for single-cell sequencin

Seurat for single-cell sequencin

作者: wkzhang81 | 来源:发表于2023-02-07 11:23 被阅读0次

    Data import and integration

    p_load(Seurat, tidyverse)
    # Load data for individual sample
    sce1 <- Read10X("./ST1/", gene.column = 1) # add gene.column = 1 for old version
    sce1 <- CreateSeuratObject(counts = sce1, 
                                  project = "ST1", 
                                  min.cells = 3, min.features = 200)
    sce1[["percent.mt"]] <- PercentageFeatureSet(sce1, pattern = "^mt-")
    sce1<- subset(sce1, subset = nFeature_RNA > 200 & nFeature_RNA < 7000 
                  & percent.mt < 5 & nCount_RNA < quantile(nCount_RNA, 0.95) 
                  & nCount_RNA > 500)
    sce2 <- Read10X("./ST2/", gene.column = 1) # add gene.column = 1 for old version
    sce2 <- CreateSeuratObject(counts = sce2, 
                                  project = "ST2", 
                                  min.cells = 3, min.features = 200)
    sce2[["percent.mt"]] <- PercentageFeatureSet(sce2, pattern = "^mt-")
    sce2<- subset(sce2, subset = nFeature_RNA > 200 & nFeature_RNA < 7000 
                  & percent.mt < 5 & nCount_RNA < quantile(nCount_RNA, 0.95) 
                  & nCount_RNA > 500)
    
    # Data integration
    sce <- merge(sce1, sce2)
    sce <- NormalizeData(sce)
    sce <- FindVariableFeatures(sce)
    sce <- ScaleData(sce)
    sce <- RunPCA(sce)
    sce <- IntegrateLayers(object = sce, method = RPCAIntegration,
                           orig.reduction = "pca", new.reduction = "integrated.rpca",
                           verbose = FALSE)
    sce <- FindNeighbors(sce, reduction = "integrated.rpca", dims = 1:30)
    sce <- FindClusters(sce, resolution = 0.2, cluster.name = "rpca_clusters")
    
    sce <- RunUMAP(sce, reduction = "integrated.rpca", 
                   dims = 1:30, reduction.name = "umap.rpca")
    ElbowPlot(sce, ndims = 30)
    
    # Plots
    DimPlot(sce, label = T, split.by = "orig.ident") + NoAxes() + NoLegend()
    
    # Split for integrative analysis
    sce <- split(sce[["RNA"]], f = sce$orig.ident)
    # Join for marker analysis
    sce <- JoinLayers(sce)
    

    Data analysis

    # Search for marker genes
    markers <- FindAllMarkers(sce, only.pos = T, min.pct = 0.25, logfc.threshold = 0.25, assay="RNA")
    write.csv(markers, "marker_genes.csv")
    topnmarkers <- markers %>% group_by(cluster) %>% top_n(n=5, wt = avg_log2FC)
    DotPlot(sce, 
      features = topnmarkers$gene,
      assay = 'RNA',
      cols = c('lightgrey', 'red')) + RotatedAxis()
    
    # Marker genes for annotation
    genes_to_check = c("Slc17a7","Slc30a3","Otof","Rorb","Rspo1","Fam84b",
                       "Syt6","Cplx3","Tshz2", #Glutamatergic
                       "Pvalb","Sst","Vip","Sncg","Lamp5", #GABAergic
                       "Sox10","Olig1","Mbp", #Oligodendracytes
                       "Pdgfra", #OPC
                       "Gfap","Aqp4", #Astrocytes
                       "Gli3", #NPCs
                       "Ctss","Ptprc", #Microglial
                       "Flt1","Cd14",
                       "Acta2", #SMC
                       "Kcnj8" #Pericytes
    )
    DotPlot(sce, 
      features = genes_to_check,
      assay = 'RNA',
      cols = c('lightgrey', 'red')) + RotatedAxis() + coord_flip()
    
    # SingleR for annotation
    p_load(Seurat, tidyverse, SingleR, reshape2)
    load("~/Desktop/Saved index/....")
    count_for_SingleR <- GetAssayData(sce, layer="data")
    singler <- SingleR(test = count_for_SingleR, ref = ident_matrix, labels = ident_matrix$label.main, assay.type.test=1)  ###main could be replaced by fine
    singler_result <- as.data.frame.matrix(table(singler$labels, sce$seurat_clusters))
    write.csv(singler_result, "singler_annotation.csv")
    
    # Annotation
    new.cluster.ids <- c("...")
    names(new.cluster.ids) <- levels(sce)
    sce <- RenameIdents(sce, new.cluster.ids)
    sce@meta.data$cellType <- sce@active.ident
    DimPlot(sce,  split.by = "orig.ident", label = T, pt.size = 0.5) + NoLegend()
    
    VlnPlot(sce, features = c("..."), assay = "RNA")
    FeaturePlot(sce, features = c("..."))
    
    # Significant test
    sig_gene <- FindMarkers(sce, ident.1 = "0", logfc.threshold = log(1.5), assay = "RNA")
    sig_gene <- sig_gene[order(sig_gene$avg_log2FC),]
    
    sig_gene$change = ifelse(sig_gene$p_val_adj < 0.05 & abs(sig_gene$avg_log2FC) >= 1, 
      ifelse(sig_gene$avg_log2FC >= 1, 'Up', 'Down'), 'Stable')
    sig_gene_sig <- subset(sig_gene, change != "Stable")
    sig_gene_sig <- sig_gene_sig %>% top_n(20, abs(avg_log2FC))
    p <- ggplot(sig_gene, aes(avg_log2FC, -log(p_val_adj, 10), colour = change)) + 
        geom_point(alpha=0.3, size = 3) + 
        scale_color_manual(values=c("blue", "grey","red")) + 
        xlab(expression("log"[2]*"FC")) + ylab(expression("-log"[10]*"FDR")) + 
        geom_vline(xintercept=c(-1,1),lty=4,col="lightgrey",lwd=0.8) + 
        geom_hline(yintercept = -log10(0.05),lty=4,col="lightgrey",lwd=0.8)  +
        geom_text_repel(aes(label = rownames(sig_gene_sig)), data = sig_gene_sig) +
        theme_classic()
    p + theme(legend.position = "none")
    rm(sig_gene_sig)
    table(sig_gene$change)
    
    av <- AverageExpression(immuno_1, assays = "RNA")
    av <- av[[1]]
    cg <- names(tail(sort(apply(av, 1, sd)), 1000))
    pheatmap(cor(av[cg,], method = "spearman"), border = F, angle_col = 45, clustering_method = "median")
    
    interest_module <- "Cxcl"
    interest_module <- features[grepl(interest_module, features)]
    DoHeatmap(sce, features = interest_module, label = F, slot = "data") + scale_fill_gradientn(colors = c("black","red3"))
    

    1. Load R packages and count_matrix.

    • For csv matrix loading:
    p_load(Seurat, tidyverse)
    plan("multisession", workers = 4)
    setwd("<...>")
    count_matrix <- read.csv("<SequenceNo_Counts.csv>", row.names = 1)
    count_sc <- CreateSeuratObject(counts = count_matrix, project = "ProjName", 
        min.cells = 3, min.features = 200)
    
    • For H5 matrix loading:
    count_matrix_h5 <- Read10X_h5("<SequenceNo_Counts.h5>")
    count_sc <- CreateSeuratObject(counts = count_matrix_h5, project = "ProjName", 
        min.cells = 3, min.features = 200)
    
    • For mtx loading:
    counts <- Read10X(data.dir = "<...>")
    count_sc <- CreateSeuratObject(counts = counts)
    
    • Combine samples
    ### Create Seurat objects
    count_sc_1 <- CreateSeuratObject(counts = count_1, project = "1", min.cells = 3, min.features = 200)
    count_sc_2 <- CreateSeuratObject(counts = count_2, project = "2", min.cells = 3, min.features = 200)
    
    ### Normalization
    count_sc_1 <- NormalizeData(count_sc_1, normalization.method = "LogNormalize", scale.factor = 1e4)
    count_sc_2 <- NormalizeData(count_sc_2, normalization.method = "LogNormalize", scale.factor = 1e4)
    
    ### Find variable features and integration anchors
    count_sc_1 <- FindVariableFeatures(count_sc_1, selection.method = "vst", nfeatures = 2000)
    count_sc_2 <- FindVariableFeatures(count_sc_2, selection.method = "vst", nfeatures = 2000)
    merge_anchors <- FindIntegrationAnchors(object.list = list(count_sc_1, count_sc_2), dims = 1:20)
    count_merge <- integrateData(anchorset = merge_anchors, dims = 1:20)
    DefaultAssay(count_merge) <- "integrated"
    

    2. Quarlity control and count filtering by mitochondrial-RNA percentage.

    count_sc[["percent.mt"]] <- PercentageFeatureSet(count_sc, pattern = "^MT-")
    VlnPlot(count_sc, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
    
    plot1 <- FeatureScatter(count_sc, feature1 = "nCount_RNA", feature2 = "percent.mt")
    plot2 <- FeatureScatter(count_sc, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
    plot1 + plot2
    
    count_sc <- subset(count_sc, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5)
    

    3. Normalization.

    count_sc <- NormalizeData(count_sc, normalization.method = "LogNormalize", scale.factor = 10000)
    count_sc <- FindVariableFeatures(count_sc, selection.method = "vst", nfeatures = 2000)
    top10 <- head(VariableFeatures(count_sc), 10)
    plot1 <- VariableFeaturePlot(count_sc)
    plot2 <- LabelPoints(plot = plot1, points = top10, repel = T)
    plot1 + plot2
    

    4. Linear dimensional reduction.

    all.genes <- rownames(count_sc)
    count_sc <- ScaleData(count_sc, features = all.genes)
    count_sc <- RunPCA(count_sc, features = VariableFeatures(object = count_sc))
    print(count_sc[["pca"]], dims = 1:5, nfeatures = 5)
    
    VizDimLoadings(count_sc, dims = 1:2, reduction = "pca")
    DimPlot(count_sc, reduction = "pca")
    DimHeatmap(count_sc, dims = 1, cells = 500, balanced = TRUE)
    


    5. Defining the dimensions.

    count_sc <- JackStraw(count_sc, num.replicate = 100)
    count_sc <- ScoreJackStraw(count_sc, dims = 1:20)
    JackStrawPlot(count_sc, dims = 1:15)
    ElbowPlot(count_sc)
    

    6. Clustering of the cells with t-sne.

    count_sc <- FindNeighbors(count_sc, dims = 1:15)
    count_sc <- FindClusters(count_sc, resolution = 0.5)
    head(Idents(count_sc), 5)
    count_sc <- RunTSNE(count_sc, dims = 1:10)
    head(count_sc@reductions$tsne@cell.embeddings)
    DimPlot(count_sc, reduction = "tsne")
    

    7. Finding marker genes for each cluster.

    sc_markers <- FindAllMarkers(count_sc, only.pos = T, min.pct = 0.25)
    top5 <- sc_markers %>% group_by(cluster) %>% top_n(n = 5, wt = avg_log2FC)
    DoHeatmap(count_sc, features = top5$gene) + NoLegend()
    

    8. Search for interested genes

    VlnPlot(count_sc, features = c("FABP4", "APOC1", "DCN", "RGS5", "MYH11", "DARC", "CD52"))
    FeaturePlot(count_sc, features = c("FABP4", "APOC1", "DCN", "RGS5", "MYH11", "DARC", "CD52"))
    DotPlot(count_sc, features = c("FABP4", "APOC1", "DCN", "RGS5", "MYH11", "DARC", "CD52")) + RotatedAxis()
    


    9. Defining cell populations

    new.cluster.ids <- c("Naive CD4 T", "Memory CD4 T", "CD14+ Mono", "B", "CD8 T", "FCGR3A+ Mono", "NK", "DC")
    names(new.cluster.ids) <- levels(count_sc)
    count_sc <- RenameIdents(count_sc, new.cluster.ids)
    
    DimPlot(count_sc, reduction = "tsne", label = TRUE, pt.size = 0.5) + NoLegend()
    

    相关文章

      网友评论

          本文标题:Seurat for single-cell sequencin

          本文链接:https://www.haomeiwen.com/subject/orwxkdtx.html