美文网首页差异分析单细胞单细胞测序
复现3:AD外周血的单细胞转录组与免疫组库分析

复现3:AD外周血的单细胞转录组与免疫组库分析

作者: 小贝学生信 | 来源:发表于2021-09-08 21:08 被阅读0次

    Single-Cell RNA Sequencing of Peripheral Blood Reveals Immune Cell Signatures in Alzheimer's Disease
    PMID: 34447367 | IF=7.561 | Front Immunol. 2021 Aug 9


    1、关于文章

    • 文献思路:作者对来自AD病人与Normal对照的外周血取样,同时进行单细胞转录组测序与单细胞免疫组库测序,以研究AD病人的外周血免疫微环境的“潜在变化”,具体分析思路如下图所示。


    • 文章相关图表


      单细胞类型注释结果
      基于细胞类型水平,AD与NC差异基因的富集分析
      免疫组库分析(TCR)

    免疫组库以及数据分析的相关教程参考如下
    (1) 淋巴细胞、抗原受体以及免疫组库测序的意义(录屏版)_哔哩哔哩_bilibili
    (2)上游比对:【cellranger】https://support.10xgenomics.com/single-cell-vdj/software/pipelines/latest/using/vdj
    https://support.10xgenomics.com/single-cell-vdj/software/downloads/latest
    (3)下游分析:【scRepertoire】免疫组库数据分析1-scRepertoire - 简书 (jianshu.com)

    2、复现

    努力复现的内容主要包括:scRNA-seq的细胞类型注释、差异分析、富集分析;T细胞免疫组库的相关分析。分析工具主要为Serve Rstudio。

    2.1 scRNA-seq

    (1)数据预处理
    setwd("~/imm/")
    library(Seurat)
    
    fs=list.files('./GSE181279_RAW/','^GSM')
    fs
    library(tidyverse)
    samples=str_split(fs,'_',simplify = T)[,1]
    samples
    lapply(unique(samples),function(x){
      y=fs[grepl(x,fs)]
      folder=paste0("GSE181279_RAW/", str_split(y[1],'_',simplify = T)[,2])
      dir.create(folder,recursive = T)
      #为每个样本创建子文件夹
      file.rename(paste0("GSE181279_RAW/",y[1]),file.path(folder,"barcodes.tsv.gz"))
      #重命名文件,并移动到相应的子文件夹里
      file.rename(paste0("GSE181279_RAW/",y[2]),file.path(folder,"features.tsv.gz"))
      file.rename(paste0("GSE181279_RAW/",y[3]),file.path(folder,"matrix.mtx.gz"))
    })
    
    
    library(Seurat)
    samples=list.files("GSE181279_RAW/")
    samples
    dir <- file.path('./GSE181279_RAW',samples)
    names(dir) <- samples
    #合并方法1
    counts <- Read10X(data.dir = dir)
    scRNA = CreateSeuratObject(counts)
    dim(scRNA)   #查看基因数和细胞总数
    table(scRNA@meta.data$orig.ident)  #查看每个样本的细胞数
    head(scRNA@meta.data)
    scRNA$group = substr(scRNA$orig.ident, 1, 2)
    table(scRNA$group)
    
    #检查数据与文章的细胞数相符,所以应该是已经质控后的
    feats <- c("nFeature_RNA")
    p1=VlnPlot(scRNA, features = feats, pt.size = 0) + 
      NoLegend()
    feats <- c("nCount_RNA")
    p2=VlnPlot(scRNA, features = feats, pt.size = 0) + 
      NoLegend()
    mito_genes=rownames(scRNA)[grep("^MT-", rownames(scRNA))] 
    str(mito_genes) 
    scRNA=PercentageFeatureSet(scRNA, "^MT-", col.name = "percent_mito")
    fivenum(scRNA@meta.data$percent_mito)
    feats <- c("percent_mito")
    p3=VlnPlot(scRNA, features = feats, pt.size = 0) + 
      NoLegend()
    
    p1 | p2 | p3
    
    (2)标准化、高变基因、归一化、降维
    #标准化-归一化-高变基因
    scRNA <- NormalizeData(scRNA, normalization.method = "LogNormalize", scale.factor = 10000)
    scRNA <- FindVariableFeatures(scRNA, selection.method = "vst", nfeatures = 2000) 
    # this sclae step may take a long time if set "vars.to.regress"
    scRNA <- ScaleData(scRNA, features = VariableFeatures(scRNA),
                       vars.to.regress = c("nFeature_RNA","percent_mito"))
    
    # 可视化高变基因
    top10 <- head(VariableFeatures(scRNA), 10) 
    p_hvg <- VariableFeaturePlot(scRNA) 
    library(ggplot2)
    LabelPoints(plot = p_hvg, points = top10, repel = TRUE, size=2.5) +
      theme(legend.position = c(0.1,0.8))
    
    scRNA <- RunPCA(scRNA, features = VariableFeatures(scRNA)) 
    ElbowPlot(scRNA, ndims=30, reduction="pca") 
    pc.num=1:20
    # tSNE takes longer time than umap
    scRNA = RunTSNE(scRNA, dims = pc.num)
    scRNA = RunUMAP(scRNA, dims = pc.num)
    DimPlot(scRNA, reduction = "tsne", group.by = "group") +
      DimPlot(scRNA, reduction = "umap", group.by = "group")
    
    (3)分群、细胞类型注释
    scRNA <- FindNeighbors(scRNA, dims = pc.num) 
    scRNA <- FindClusters(scRNA, resolution = c(0.01,0.05,0.1,0.2,0.3,0.4,0.5))
    library(clustree)
    library(patchwork)
    p_clu = clustree(scRNA@meta.data, prefix = "RNA_snn_res.")
    Idents(scRNA) = scRNA$RNA_snn_res.0.3
    table(scRNA@active.ident)
    p_tsne = DimPlot(scRNA, reduction = "tsne", label = T)
    p_clu | p_tsne
    

    因为是外周血样本,所以主要是免疫相关细胞,相关marker基因也比较常见~

    • 先注意查看下每种marker基因在所有cluster的表达情况
    # T cell 0 1 2 3 4 6 12 13
    cg=c("CD3D","CD3E","CD3G")
    DotPlot(scRNA, assay = "RNA",
            features = cg) + coord_flip()  + p_tsne
    # naive T : 10, 12,14,15
    # CD4+ T : CD4  --- 0,1,2
    # CD8+ T : CD8A, CD8B  --- 6, 13
    # NKT : 3,4
    
    cg=c("CD3D","CD3E","CD3G", 
         "CD4",
         "CD8A","CD8B",
         "GZMB", "PRF1",
         "FGFBP2",  "CX3CR1",
         "LEF1","SELL")
    DotPlot(scRNA, assay = "RNA",
            features = cg) + coord_flip()  + p_tsne
    # B cell : 5,9,11
    cg=c("CD19","CD79A","CD79B")
    DotPlot(scRNA, assay = "RNA",
            features = cg) + coord_flip()  + p_tsne
    # natural killer (NK) cells --- 7,8
    cg=c("NKG7","GZMB","GNLY","NCR1")
    DotPlot(scRNA, assay = "RNA",
            features = cg) + coord_flip()  + p_tsne
    # monocyte–macrophage --- 16,17,19
    cg=c("CD14","CD68")  
    DotPlot(scRNA, assay = "RNA",
            features = cg) + coord_flip()  + p_tsne
    
    # mixed with hemoglobin and platelets --- 18
    cg=c("PF4")  
    DotPlot(scRNA, assay = "RNA",
            features = cg) + coord_flip()  + p_tsne
    
    • 然后统一观察
    scRNA@active.ident = factor(scRNA@active.ident, levels = c(15,10,12,14,
                                                               0,1,2,13,6,3,4,
                                                               8,7,5,9,11,
                                                               16,17,19,18))
    cgs = list(
      Tcell = c("CD3D","CD3E","CD3G"),
      naiveT = c("LEF1","SELL"),
      `CD4+T` = c("CD4"),
      `CD8+T` = c("CD8A", "CD8B"),
      NK    = c("GZMB","NKG7","GNLY","NCR1"),
      Bcell = c("CD19","CD79A","CD79B"),
      `Mono/Macr` = c("CD14","CD68"),
      Mixed = c("PF4"))
    
    
    p_tmp=DotPlot(scRNA, features = cgs, assay = "RNA") + 
      theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) 
    p_tmp
    
    • 最后进行注释
    scRNA$celltype = case_when(
      scRNA@active.ident %in% c(10,12,14,15) ~ "naive T",
      scRNA@active.ident %in% c(0,1,2) ~ "CD4+ T",
      scRNA@active.ident %in% c(6,13) ~ "CD8+ T",
      scRNA@active.ident %in% c(3,4) ~ "NKT",
      scRNA@active.ident %in% c(7,8) ~ "NK",
      scRNA@active.ident %in% c(5,9,11) ~ "B",
      scRNA@active.ident %in% c(16,17,19) ~ "Mono/Macr",
      scRNA@active.ident %in% c(18) ~ "Mixed"
    )
    table(scRNA$celltype)
    Idents(scRNA) = scRNA$celltype
    table(scRNA@active.ident)
    #调整因子顺序,美观的dotplot
    scRNA@active.ident = factor(scRNA@active.ident, 
                                levels = c("naive T","CD4+ T","CD8+ T","NKT",
                                           "NK","B","Mono/Macr","Mixed"))
    p_anno=DimPlot(scRNA, reduction = "tsne",
            cols = c("#e41a1c","#377eb8","#4daf4a","#984ea3",
                     "#ff7f00","#ffff33","#a65628","#f781bf"))
    p_tmp=DotPlot(scRNA, features = cgs, assay = "RNA") + 
      theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) 
    
    p_group=DimPlot(scRNA, reduction = "tsne", group.by = "group",
            cols = c("#d7191c","#2b83ba"))
    p_tmp + p_anno
    
    image.png
    • 相关marker基因的单独展示(文章的图)
    FeaturePlot(scRNA, features = "CD3D", reduction = "tsne", 
                cols = c("#d9d9d9", "#ef3b2c")) | p_anno
    FeaturePlot(scRNA, features = "CD4", reduction = "tsne", 
                cols = c("#d9d9d9", "#ef3b2c")) | p_anno  
    FeaturePlot(scRNA, features = "CD8A", reduction = "tsne", 
                cols = c("#d9d9d9", "#ef3b2c")) | p_anno 
    
    VlnPlot(scRNA, features = c("CD79A","CD3D","CD4","GZMB"), group.by = "RNA_snn_res.0.3", 
            pt.size = 0, ncol = 1) + NoLegend()
    
    (4)细胞组成占比
    pie_data = scRNA@meta.data[,c("celltype","group")] 
    library(ggplot2)
    #remotes::install_github("Rkabacoff/ggpie")
    library(ggpie)
    pie_AD = ggpie(subset(pie_data, group == "AD"), celltype, offset=0.7, title="AD") 
    pie_NC = ggpie(subset(pie_data, group == "NC"), celltype, offset=0.7, title="NC")
    pie_AD | pie_NC 
    
    (5)FindAllMarkers细胞类型marker gene热图
    library(future)
    plan()
    plan("multiprocess", workers = 4)
    plan()
    
    table(scRNA@active.ident)
    scRNA = scRNA[,!scRNA@active.ident %in%  "Mixed"]
    table(scRNA@active.ident)
    scRNA_sample = subset(scRNA, downsample = 2000)
    table(scRNA_sample@active.ident)
    diff.wilcox = FindAllMarkers(scRNA_sample)
    head(diff.wilcox)
    
    
    library(tidyverse)
    all.markers = diff.wilcox %>% select(gene, everything()) %>%
      subset(p_val<0.05 & abs(diff.wilcox$avg_log2FC) > 0.5)
    top20 = all.markers %>% group_by(cluster) %>% top_n(n = 20, wt = avg_log2FC)
    scRNA_sample = ScaleData(scRNA_sample, features = top20$gene,
                             vars.to.regress = c("nFeature_RNA","percent_mito"))
    DoHeatmap(scRNA_sample, features = top20$gene) +
      scale_fill_gradientn(colors = c("#2171b5", "#f7fbff", "#ef3b2c"))
    
    (6)细胞类型水平的AD与NC差异分析
    • 仅分析5种免疫细胞类型
    scRNA_sub = scRNA[,scRNA@active.ident %in% c("B","CD4+ T","CD8+ T","NK","Mono/Macr")]
    table(scRNA_sub$main, scRNA_sub$group)
    #差异分析
    scRNA_sub$compare = paste(scRNA_sub$main, scRNA_sub$group, sep = "_")
    table(scRNA_sub$compare)
    ct = levels(scRNA_sub@active.ident)
    all_markers = lapply(ct, function(x){
      # x = ct[1]
      print(x)
      markers <- FindMarkers(scRNA_sub, group.by = "compare",
                             logfc.threshold = 0.1,
                             ident.1 = paste(x,"AD",sep = "_"),
                             ident.2 = paste(x,"NC",sep = "_"))
      #markers_sig <- subset(markers, p_val_adj < 0.1)
      return(markers)
    })
    length(all_markers)
    names(all_markers) = ct
    lapply(all_markers,nrow)
    all_markers_sig = lapply(all_markers, function(x){
      markers_sig <- subset(x, p_val_adj < 0.05 & abs(avg_log2FC) > 0.5)
      #markers_sig <- subset(x, p_val < 0.0001)
    })
    sapply(all_markers_sig,nrow)
    # CD4+ T        NK    CD8+ T         B Mono/Macr 
    # 141       214       186       154       179
    
    degs = lapply(all_markers_sig, rownames)
    #install.packages("UpSetR")
    library(UpSetR)
    #upset(fromList(degs))
    combns = lapply(1:10, function(i){
      c(combn(names(degs),2)[,i])
    })
    mt = fromList(degs)
    #list name不能含有特殊字符
    colnames(mt) = gsub(" ","",colnames(mt))
    colnames(mt) = gsub("[+]","",colnames(mt))
    colnames(mt) = gsub("[/]","",colnames(mt))
    combns = lapply(1:10, function(i){
      c(combn(colnames(mt),2)[,i])
    })
    #展示出仅两两交集的关系
    UpSetR::upset(mt, intersections = combns)
    
    (7)富集分析
    library(clusterProfiler)
    library(org.Hs.eg.db)
    msigdb_reac = clusterProfiler::read.gmt("./geneset/c2.cp.reactome.v7.4.symbols.gmt")
    msigdb_kegg = clusterProfiler::read.gmt("./geneset/c2.cp.kegg.v7.4.symbols.gmt")
    msigdb_bp = clusterProfiler::read.gmt("./geneset/c5.go.bp.v7.4.symbols.gmt")
    msigdb = rbind(msigdb_reac, msigdb_kegg, msigdb_bp)
    res_CD4 = enricher(degs$`CD4+ T`, TERM2GENE = msigdb, 
                   pvalueCutoff = 1, qvalueCutoff = 1)
    res_CD8 = enricher(degs$`CD8+ T`, TERM2GENE = msigdb, 
                       pvalueCutoff = 1, qvalueCutoff = 1)
    res_NK = enricher(degs$NK, TERM2GENE = msigdb, 
                       pvalueCutoff = 1, qvalueCutoff = 1)
    res_B = enricher(degs$B, TERM2GENE = msigdb, 
                      pvalueCutoff = 1, qvalueCutoff = 1)
    res_Mo = enricher(degs$`Mono/Macr`, TERM2GENE = msigdb, 
                     pvalueCutoff = 1, qvalueCutoff = 1)
    
    #参考文章,展示特定20条通路在5种免疫细胞DEG的富集程度
    library(clusterProfiler)
    library(org.Hs.eg.db)
    msigdb_reac = clusterProfiler::read.gmt("./geneset/c2.cp.reactome.v7.4.symbols.gmt")
    msigdb_kegg = clusterProfiler::read.gmt("./geneset/c2.cp.kegg.v7.4.symbols.gmt")
    msigdb_bp = clusterProfiler::read.gmt("./geneset/c5.go.bp.v7.4.symbols.gmt")
    msigdb = rbind(msigdb_reac, msigdb_kegg, msigdb_bp)
    res_CD4 = enricher(degs$`CD4+ T`, TERM2GENE = msigdb, 
                   pvalueCutoff = 1, qvalueCutoff = 1)
    res_CD8 = enricher(degs$`CD8+ T`, TERM2GENE = msigdb, 
                       pvalueCutoff = 1, qvalueCutoff = 1)
    res_NK = enricher(degs$NK, TERM2GENE = msigdb, 
                       pvalueCutoff = 1, qvalueCutoff = 1)
    res_B = enricher(degs$B, TERM2GENE = msigdb, 
                      pvalueCutoff = 1, qvalueCutoff = 1)
    res_Mo = enricher(degs$`Mono/Macr`, TERM2GENE = msigdb, 
                     pvalueCutoff = 1, qvalueCutoff = 1)
    #Top20 genesets
    pathways = c("GOBP_POSITIVE_REGULATION_OF_CELL_DEATH",
    "GOBP_PHAGOCYTOSIS",
    "GOBP_REGULATION_OF_TUMOR_NECROSIS_FACTOR_MEDIATED_SIGNALING_PATHWAY",
    "GOBP_CELLULAR_DEFENSE_RESPONSE",
    "GOBP_POSITIVE_REGULATION_OF_DEFENSE_RESPONSE",
    "GOBP_RESPONSE_TO_BACTERIUM",
    "GOBP_OSTEOCLAST_DIFFERENTIATION",
    "GOBP_INTERFERON_GAMMA_PRODUCTION",
    "REACTOME_ADAPTIVE_IMMUNE_SYSTEM",
    "GOBP_LYMPHOCYTE_ACTIVATION",
    "GOBP_IMMUNE_RESPONSE_REGULATING_SIGNALING_PATHWAY",
    "GOBP_LEUKOCYTE_MIGRATION",
    "GOBP_MYELOID_LEUKOCYTE_ACTIVATION",
    "REACTOME_HEMOSTASIS",
    "KEGG_NATURAL_KILLER_CELL_MEDIATED_CYTOTOXICITY",
    "KEGG_HEMATOPOIETIC_CELL_LINEAGE",
    "GOBP_CYTOKINE_MEDIATED_SIGNALING_PATHWAY",
    "GOBP_ADAPTIVE_IMMUNE_RESPONSE",
    "GOBP_B_CELL_ACTIVATION")
    
    test = res_CD4@result
    colnames(test)
    enrich_res2 = list(
      CD4 = res_CD4,
      CD8 = res_CD8,
      NK = res_NK,
      Mo = res_Mo,
      B = res_B)
    res_hp = lapply(enrich_res2, function(x){
      -log10(x@result[pathways,"pvalue"])
    })
    res_hp = do.call(cbind, res_hp)
    rownames(res_hp) = pathways
    res_hp[is.na(res_hp)] = -5
    pheatmap::pheatmap(res_hp, cluster_rows = F, cluster_cols = F, 
                       color = colorRampPalette(colors = c("grey","white","red"))(100))
    

    2.2 免疫组库分析(TCR为例)

    (1)上游比对
    • 以其中一个数据为例
    #GSM5494112 AD2_TCR
    #SRR15319897
    #SRR15319898
    #SRR15319899
    #SRR15319900
    
    cat fq.txt | while read id
    do
    wget -c ftp://ftp.sra.ebi.ac.uk/vol1/fastq/${id:0:6}/0${id:0-2}/${id}/${id}_1.fastq.gz
    wget -c ftp://ftp.sra.ebi.ac.uk/vol1/fastq/${id:0:6}/0${id:0-2}/${id}/${id}_2.fastq.gz
    done
    
    mv SRR15319897_1.fastq.gz AD2_TCR_S2_L001_R1_001.fastq.gz
    mv SRR15319897_2.fastq.gz AD2_TCR_S2_L001_R2_001.fastq.gz
    mv SRR15319898_1.fastq.gz AD2_TCR_S2_L002_R1_001.fastq.gz
    mv SRR15319898_2.fastq.gz AD2_TCR_S2_L002_R2_001.fastq.gz
    mv SRR15319899_1.fastq.gz AD2_TCR_S2_L003_R1_001.fastq.gz
    mv SRR15319899_2.fastq.gz AD2_TCR_S2_L003_R2_001.fastq.gz
    mv SRR15319900_1.fastq.gz AD2_TCR_S2_L004_R1_001.fastq.gz
    mv SRR15319900_2.fastq.gz AD2_TCR_S2_L004_R2_001.fastq.gz
    
    bin=~/biosoft/cellranger-6.0.2/bin/cellranger
    db=~/imm/refdata-cellranger-vdj-GRCh38-alts-ensembl-5.0.0
    fq_dir=./fastq/
    $bin vdj --id=AD2_TCR \
                     --reference=$db \
                     --fastqs=$fq_dir \
                     --sample=AD2_TCR \
                     --localcores=8 \
                     --localmem=64
    
    (2)下游分析
    【2.1】 整合所有TCR数据,以及分组信息
    # library(devtools)
    # install_github("ncborcherding/scRepertoire")
    library(Seurat)
    library(scRepertoire)
    library(tidyverse)
    library(patchwork)
    
    fls = list.files("VDJ/", pattern = "*TCR_filtered_contig_annotations.csv", full.names = T)
    TCR = lapply(fls, function(x){
      tcr = read.csv(x)
      return(tcr)
    })
    
    combined_TCR <- combineTCR(TCR, 
                               samples = c("AD1", "AD2", "AD3", "NC1", "NC2"),
                               cells = "T-AB")
    combined_TCR <- addVariable(combined_TCR, name = "group", 
                                variables = c("AD", "AD", "AD", "NC", "NC"))
    names(combined_TCR) = c("AD1", "AD2", "AD3", "NC1", "NC2")
    str(combined_TCR)
    
    【2.2】 图A:每个样本的Top10 克隆型; 图B:样本克隆型类型的分布情况(按AD、NC分组)
    #P_A: 以氨基酸序列作为克隆型依据
    Top10_clone = lapply(combined_TCR, function(x){
      head(sort(table(x$CTaa), decreasing = T),10) / nrow(x)
    })
    Top10_clone_stat = do.call(cbind, Top10_clone)
    rownames(Top10_clone_stat) = 1:10 
    Top10_clone_stat=reshape2::melt(Top10_clone_stat)
    colnames(Top10_clone_stat) = c("ID","sample","Freq")
    Top10_clone_stat$group = substr(Top10_clone_stat$sample,1,2)
    head(Top10_clone_stat)
    Top10_clone_stat$sample = factor(Top10_clone_stat$sample,
                                     levels = c("NC1","NC2","AD1","AD2","AD3"))
    p_A = ggplot(Top10_clone_stat, aes(x=sample, y=Freq, color = group)) +
      geom_boxplot() + 
      ggtitle(label = "The relative frequency of\n the 10 most abundant clonesis") +
      theme_bw() + 
      theme(
        # Hide panel borders and remove grid lines
        panel.border = element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        # Change axis line
        axis.line = element_line(colour = "black", size = 0.5),
        #no legend
        legend.position = "none",
        # center legend
        plot.title = element_text(hjust = 0.5)
      )
    #P_B: 样本克隆型的分布情况
    clone_NC1 = unique(combined_TCR$NC1$CTaa)
    clone_NC2 = unique(combined_TCR$NC2$CTaa)
    # intersect(clone_NC1, clone_NC2)
    veen_NC = list(NC1 = clone_NC1,
                      NC2 = clone_NC2)
    library(VennDiagram)
    library(cowplot)
    library(ggplotify)
    library(gridGraphics)
    
    # plt=venn.diagram(veen_NC, filename=NULL,fill = c("#beaed4", "#fdc086"),
    #                  cat.pos = c(0, 0))
    # p_tmp <- grobTree(plt)
    # as.ggplot(plot_grid(p_tmp))
    v = draw.pairwise.venn(
      area1 = 61,
      area2 = 59,
      cross.area = 20,
      category = c("NC1", "NC2"),
      fill = c("#beaed4", "#fdc086"),
      cat.pos = c(-180,-180),
      lty = 1, cat.cex = 2)
    #lapply(v, function(i) i$label)
    v[[5]]$label = 3573
    v[[6]]$label = 3435
    v[[7]]$label = 137
    p_tmp <- grobTree(v)
    p_B1=as.ggplot(plot_grid(p_tmp))
    
    
    clone_AD1 = unique(combined_TCR$AD1$CTaa)
    clone_AD2 = unique(combined_TCR$AD2$CTaa)
    clone_AD3 = unique(combined_TCR$AD3$CTaa)
    # veen_NC = list(AD1 = clone_AD1,
    #                AD3 = clone_AD3,
    #                AD2 = clone_AD2)
    # plt=venn.diagram(veen_NC, filename=NULL)
    # p_tmp <- grobTree(plt)
    # as.ggplot(plot_grid(p_tmp))
    
    
    v = draw.triple.venn(
      area1 = 60,
      area2 = 60,
      area3 = 60,
      n12 = 21,
      n23 = 22,
      n13 = 19,
      n123 = 10,
      category = c("AD2", "AD1", "AD3"),
      fill = c("#8dd3c7", "#b3cde3", "#ccebc5"),
      lty = 1, 
      cat.cex = 2,
      cat.just=list(c(0.5,0) , c(0.5,0) , c(0.5,0.7)),
      cat.pos = c(150,-150,0),
      rotation.degree =180)
    #lapply(v, function(i) i$label)
    v[[7]]$label = 4380
    v[[8]]$label = 0
    v[[9]]$label = 3704
    v[[10]]$label = 1
    v[[11]]$label = 0
    v[[12]]$label = 0
    v[[13]]$label = 4168
    p_tmp <- grobTree(v)
    p_B2=as.ggplot(plot_grid(p_tmp))
    p_B = p_B1 | p_B2
    
    #合并图片
    (p_A | p_B) + plot_layout(widths = c(1,2))
    
    【2.3】 图C:AD、NC组的克隆型多样性评价; 图D:每个样本的TCR受体氨基酸序列长度分布特征
    #P_C 
    p=clonalDiversity(combined_TCR, cloneCall = "aa", group = "group")
    diver_stat = p$data
    diver_stat_inv = subset(diver_stat, variable == "Inv.Simpson")
    p_C1=ggplot(diver_stat_inv, aes(x=group, y=value )) +
      geom_bar(stat = "identity", fill = "#5fba7e", width=0.4) +
      ggtitle("Invsimpson") + 
      scale_y_continuous(expand=c(0,0))  +
      theme_bw() + 
      theme(
        # Hide panel borders and remove grid lines
        panel.border = element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        # Change axis line
        axis.line = element_line(colour = "black", size = 0.5),
        # center legend
        plot.title = element_text(hjust = 0.5)
        )
    diver_stat_shan = subset(diver_stat, variable == "Shannon")
    p_C2=ggplot(diver_stat_shan, aes(x=group, y=value )) +
      geom_bar(stat = "identity", fill = "#fed976", width=0.4) +
      coord_cartesian(ylim = c(7.8, 8.5)) +
      ggtitle("Shannon") +
      scale_y_continuous(expand=c(0,0))  +
      theme_bw() + 
      theme(
        # Hide panel borders and remove grid lines
        panel.border = element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        # Change axis line
        axis.line = element_line(colour = "black", size = 0.5),
        # center legend
        plot.title = element_text(hjust = 0.5))
    p_C = p_C1 | p_C2
    
    
    p_D1 = lengthContig(combined_TCR, cloneCall="aa", chain = "TRA") + 
      scale_fill_manual(values=c("#fec44f", "#fe9929", "#ec7014", "#74a9cf", "#3690c0")) +
      ggtitle(label = "TRA")
    p_D2 = lengthContig(combined_TCR, cloneCall="aa", chain = "TRB") + 
      scale_fill_manual(values=c("#fec44f", "#fe9929", "#ec7014", "#74a9cf", "#3690c0")) +
      ggtitle(label = "TRB")
    p_D = p_D1 / p_D2 + plot_layout(guides = "collect")
    
    (p_C | p_D) 
    
    【2.4】 图E:TCR受体的V基因、J基因家族丰度分布
    ## TRB V/J gene family
    ### V
    gene_family = lapply(combined_TCR, function(x){
      x = combined_TCR[[1]]
      return(x$TCR2)
    })
    gene_family_NC = c(gene_family[[4]], gene_family[[5]])
    gene_family_NC_V = stringr::str_split(gene_family_NC, "[.]",simplify = T)[,1]
    table(gene_family_NC_V)
    sle_J = c("TRBV5-1","TRBV28","TRBV7-9","TRBV19","TRBV3-1","TRBV29-1",
              "TRBV5-4","TRBV7-8","TRBV20-1","TRBV16","TRBV9","TRBV7-3")
    gene_family_NC_V[!(gene_family_NC_V %in% sle_J)] = "other"
    table(gene_family_NC_V)
    
    gene_family_AD = c(gene_family[[1]], gene_family[[2]], gene_family[[3]])
    gene_family_AD_V = stringr::str_split(gene_family_AD, "[.]",simplify = T)[,1]
    table(gene_family_AD_V)
    gene_family_AD_V[!(gene_family_AD_V %in% sle_J)] = "other"
    table(gene_family_AD_V)
    
    tb1 = as.data.frame(table(gene_family_NC_V))
    colnames(tb1) = c("TRBV","Frequency")
    tb1$group = "NC"
    tb2 = as.data.frame(table(gene_family_AD_V))
    colnames(tb2) = c("TRBV","Frequency")
    tb2$group = "AD"
    tb=rbind(tb1, tb2)
    tb$TRBV = factor(tb$TRBV, levels = rev(c("other","TRBV5-1","TRBV28","TRBV7-9","TRBV19","TRBV3-1","TRBV29-1",
                                         "TRBV5-4","TRBV7-8","TRBV20-1","TRBV16","TRBV9","TRBV7-3")))
    p_E1=ggplot(tb, aes(x=group, y=Frequency, fill=TRBV)) +
      geom_bar(stat = "identity", position="fill", width = 0.5) +
      scale_fill_manual(values=rev(c("#bdbdbd","#9e0142","#d53e4f","#f46d43","#fdae61","#fee08b",
                                 "#ffffbf","#e6f598","#abdda4","#66c2a5","#3288bd","#5e4fa2","#003c30"))) +
      ggtitle(label = "TRBV family") +
      scale_y_continuous(expand=c(0,0))  +
      theme_bw() + 
      theme(
        # Hide panel borders and remove grid lines
        panel.border = element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        # Change axis line
        axis.line = element_line(colour = "black", size = 0.5),
        # center legend
        plot.title = element_text(hjust = 0.5))
    
    
    
    
    ### J
    gene_family = lapply(combined_TCR, function(x){
      x = combined_TCR[[1]]
      return(x$TCR2)
    })
    gene_family_NC = c(gene_family[[4]], gene_family[[5]])
    gene_family_NC_J = stringr::str_split(gene_family_NC, "[.]",simplify = T)[,3]
    table(gene_family_NC_J)
    gene_family_NC_J[gene_family_NC_J == ""] = "other"
    table(gene_family_NC_J)
    
    gene_family_AD = c(gene_family[[1]], gene_family[[2]], gene_family[[3]])
    gene_family_AD_J = stringr::str_split(gene_family_AD, "[.]",simplify = T)[,3]
    table(gene_family_AD_J)
    gene_family_AD_J[gene_family_AD_J == ""] = "other"
    table(gene_family_AD_J)
    
    tb1 = as.data.frame(table(gene_family_NC_J))
    colnames(tb1) = c("TRBJ","Frequency")
    tb1$group = "NC"
    tb2 = as.data.frame(table(gene_family_AD_J))
    colnames(tb2) = c("TRBJ","Frequency")
    tb2$group = "AD"
    tb=rbind(tb1, tb2)
    
    p_E2=ggplot(tb, aes(x=group, y=Frequency, fill=TRBJ)) +
      geom_bar(stat = "identity", position="fill", width = 0.5) +
      scale_fill_manual(values=c("#bdbdbd","#9e0142","#d53e4f","#f46d43","#fdae61","#fee08b","#bf812d",
                                     "#ffffbf","#e6f598","#abdda4","#66c2a5","#3288bd","#5e4fa2","#003c30")) +
      ggtitle(label = "TRBJ family") +
      scale_y_continuous(expand=c(0,0))  +
      theme_bw() + 
      theme(
        # Hide panel borders and remove grid lines
        panel.border = element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        # Change axis line
        axis.line = element_line(colour = "black", size = 0.5),
        # center legend
        plot.title = element_text(hjust = 0.5))
    p_E = p_E1 | p_E2
    

    相关文章

      网友评论

        本文标题:复现3:AD外周血的单细胞转录组与免疫组库分析

        本文链接:https://www.haomeiwen.com/subject/aatbwltx.html