美文网首页 生物信息学分析
scRNAseq与scATACseq整合分析

scRNAseq与scATACseq整合分析

作者: 生信编程日常 | 来源:发表于2020-12-15 23:36 被阅读0次
    library(Seurat)
    library(ggplot2)
    library(patchwork)
    

    下载所需要的文件:scATAC-seq, scATAC-seq metadata, scRNA-seq
    注释文件下载如下

    #下载注释文件并解压
    wget ftp://ftp.ensembl.org/pub/grch37/release-87/gtf/homo_sapiens/Homo_sapiens.GRCh37.87.gtf.gz
    gunzip Homo_sapiens.GRCh37.87.gtf.gz
    
    peaks <- Read10X_h5("data/atac_v1_pbmc_10k_filtered_peak_bc_matrix.h5")
    # create a gene activity matrix from the peak matrix and GTF, using chromosomes 1:22, X, and Y.
    # Peaks that fall within gene bodies, or 2kb upstream of a gene, are considered
    activity.matrix <- CreateGeneActivityMatrix(peak.matrix = peaks, annotation.file = "data/Homo_sapiens.GRCh37.87.gtf", seq.levels = c(1:22, "X", "Y"), upstream = 2000, verbose = TRUE)
    

    设置Seurat中的对象,原始峰值计数存储在“ ATAC”Assay,基因表达矩阵存储在“ RNA”Assay。

    对象设置

    pbmc.atac <- CreateSeuratObject(counts = peaks, assay = "ATAC", project = "10x_ATAC")
    pbmc.atac[["ACTIVITY"]] <- CreateAssayObject(counts = activity.matrix)
    meta <- read.table("data/atac_v1_pbmc_10k_singlecell.csv", sep = ",", header = TRUE, row.names = 1, stringsAsFactors = FALSE)
    meta <- meta[colnames(pbmc.atac), ]
    pbmc.atac <- AddMetaData(pbmc.atac, metadata = meta)
    #质控,过滤掉scATAC-seq数据中总数少于5K的细胞。
    pbmc.atac <- subset(pbmc.atac, subset = nCount_ATAC > 5000)
    pbmc.atac$tech <- "atac"
    

    数据预处理

    DefaultAssay(pbmc.atac) <- "ACTIVITY"
    pbmc.atac <- FindVariableFeatures(pbmc.atac)
    pbmc.atac <- NormalizeData(pbmc.atac)
    pbmc.atac <- ScaleData(pbmc.atac)
    
    DefaultAssay(pbmc.atac) <- "ATAC"
    VariableFeatures(pbmc.atac) <- names(which(Matrix::rowSums(pbmc.atac) > 100))
    pbmc.atac <- RunLSI(pbmc.atac, n = 50, scale.max = NULL)
    pbmc.atac <- RunUMAP(pbmc.atac, reduction = "lsi", dims = 1:50)
    

    下载已分析的PBMCs数据here.

    pbmc.rna <- readRDS("../data/pbmc_10k_v3.rds")
    pbmc.rna$tech <-"rna"
    
    p1 <- DimPlot(pbmc.atac, reduction = "umap") + NoLegend() + ggtitle("scATAC-seq")
    p2 <- DimPlot(pbmc.rna, group.by = "celltype", label = TRUE, repel = TRUE) + NoLegend() + ggtitle("scRNA-seq")
    p1 + p2
    

    我们可以识别scATAC-seq数据集和scRNA-seq数据集之间的锚点,并使用这些锚点将scRNA-seq数据中细胞类型标签转移到scATAC-seq细胞。

    transfer.anchors <- FindTransferAnchors(reference = pbmc.rna, query = pbmc.atac, features = VariableFeatures(object = pbmc.rna), 
        reference.assay = "RNA", query.assay = "ACTIVITY", reduction = "cca")
    

    为了定义群ID,我们为refdata参数提供了RNA先前注释的细胞类型标签的向量。输出将包含一个矩阵,其中包含每个ATAC细胞的预测和置信度分数。

    celltype.predictions <- TransferData(anchorset = transfer.anchors, refdata = pbmc.rna$celltype, 
        weight.reduction = pbmc.atac[["lsi"]])
    pbmc.atac <- AddMetaData(pbmc.atac, metadata = celltype.predictions)
    

    检查预测分数的分布,并可以选择滤除那些得分较低的细胞

    hist(pbmc.atac$prediction.score.max)
    abline(v = 0.5, col = "red")
    
    table(pbmc.atac$prediction.score.max > 0.5)
    

    我们可以在scATAC-seq数据的UMAP表示形式上查看预测的细胞类型,并发现所转移的标签与UMAP结构高度一致。

    pbmc.atac.filtered <- subset(pbmc.atac, subset = prediction.score.max > 0.5)
    pbmc.atac.filtered$predicted.id <- factor(pbmc.atac.filtered$predicted.id, levels = levels(pbmc.rna))  # to make the colors match
    p1 <- DimPlot(pbmc.atac.filtered, group.by = "predicted.id", label = TRUE, repel = TRUE) + ggtitle("scATAC-seq cells") + 
        NoLegend() + scale_colour_hue(drop = FALSE)
    p2 <- DimPlot(pbmc.rna, group.by = "celltype", label = TRUE, repel = TRUE) + ggtitle("scRNA-seq cells") + 
        NoLegend()
    p1 + p2
    

    最后,为了将所有细胞一起可视化,我们可以将scRNA-seq和scATAC-seq细胞共同嵌入同一低维空间中。

    # note that we restrict the imputation to variable genes from scRNA-seq, but could impute the
    # full transcriptome if we wanted to
    genes.use <- VariableFeatures(pbmc.rna)
    refdata <- GetAssayData(pbmc.rna, assay = "RNA", slot = "data")[genes.use, ]
    
    # refdata (input) contains a scRNA-seq expression matrix for the scRNA-seq cells.  imputation
    # (output) will contain an imputed scRNA-seq matrix for each of the ATAC cells
    imputation <- TransferData(anchorset = transfer.anchors, refdata = refdata, weight.reduction = pbmc.atac[["lsi"]])
    
    # this line adds the imputed data matrix to the pbmc.atac object
    pbmc.atac[["RNA"]] <- imputation
    coembed <- merge(x = pbmc.rna, y = pbmc.atac)
    
    # Finally, we run PCA and UMAP on this combined object, to visualize the co-embedding of both
    # datasets
    coembed <- ScaleData(coembed, features = genes.use, do.scale = FALSE)
    coembed <- RunPCA(coembed, features = genes.use, verbose = FALSE)
    coembed <- RunUMAP(coembed, dims = 1:30)
    coembed$celltype <- ifelse(!is.na(coembed$celltype), coembed$celltype, coembed$predicted.id)
    
    p1 <- DimPlot(coembed, group.by = "tech")
    p2 <- DimPlot(coembed, group.by = "celltype", label = TRUE, repel = TRUE)
    p1 + p2
    

    参考:https://satijalab.org/seurat/v3.2/atacseq_integration_vignette.html

    相关文章

      网友评论

        本文标题:scRNAseq与scATACseq整合分析

        本文链接:https://www.haomeiwen.com/subject/bnfggktx.html