10X V(D)J实战

作者: nvzhang | 来源:发表于2020-08-28 10:42 被阅读0次

    文献

    Generation and function of progenitor T cells from StemRegenin-1–expanded CD34+ human hematopoietic progenitor ells

    背景

    Broader clinical application of umbilical cord blood (UCB), as a source of hematopoietic stem/progenitor cells (HSPCs), is limited by low CD34+ and T-cell numbers, contributing to slow lymphohematopoietic recovery, infection, and relapse. Studies have evaluated the safety, feasibility, and expedited neutrophil recovery associated with the transplantation of CD34+ HSPCs from ex vivo expansion cultures using the aryl hydrocarbon receptor antagonist StemRegenin-1 (SR1).To expedite immune recovery, we hypothesized that SR1-expanded HSPCs together with proT cells could overcome the known T-cell immune deficiency that occurs post-HSCT. Single-cell RNA sequencing of peripheral CD3+ T cells from mice injected with either naive or SR1 proT cells revealed functional subsets of T cells with polyclonal T-cell receptor repertoires.

    数据结构:

    Superseries Subseries
    GSE135929 GSE135927 5' scRNA Seq
    GSE135928 3' VDJ Seq

    其中5' scRNA Seq数据使用cellranger -count进行分析,得到barcode,matrix,features三个结果,用来下游分析;3' VDJ Seq数据使用cellranger -vdj进行分析,得到contigs.annotation.csv文件,用来下游分析。(这些结果也能在GEO上直接下载)

    准备

    mkdir -p bloodadv2019/{srr,cellranger,fastq}
    cd bloodadv2019/srr
    for i in `seq 69 84 `;
    do prefetch SRR99927${i} ;done
    for i in `seq 69 84 `;
    do fastq-dump --gzip --split-files -A SRR9927${i}  SRR9927${i} -O ../fastq; done
    #将fastq文件名修改为cellranger能识别的形式([Sample Name]_S1_L00[Lane Number]_[Read Type]_001.fastq.gz)
    ls *_1.fastq.gz | while read id ;
    do mv ${id%%_*}_1.fastq.gz ${id%%_*}_S1_L001_I1_001.fastq.gz;
    mv ${id%%_*}_2.fastq.gz ${id%%_*}_S1_L001_R1_001.fastq.gz;
    mv ${id%%_*}_3.fastq.gz  ${id%%_*}_S1_L001_R2_001.fastq.gz;
    done
    

    cellranger -count

    安装cellranger和相应参考文库:https://cloud.tencent.com/developer/article/1606141

    cd ../cellranger
    for i in `seq 73 76`;
    do /home/ubuntu/cellranger-4.0.0/cellranger count --id=SR1_rna --fastqs=/home/ubuntu/bloodadv2019/fastq/ --sample=SRR99927${i}_S1 --transcriptome=/home/ubuntu/refdata-cellranger-GRCh38-and-mm10-3.1.0 --nosecondary;
    done
    for i in `seq 69 72`;
    do /home/ubuntu/cellranger-4.0.0/cellranger count --id=naive_rna --fastqs=/home/ubuntu/bloodadv2019/fastq/ --sample=SRR99927${i}_S1 --transcriptome=/home/ubuntu/refdata-cellranger-GRCh38-and-mm10-3.1.0 --nosecondary;
    done
    

    cellranger -vdj

    for i in `seq 81 84`;
    do /home/ubuntu/cellranger-4.0.0/cellranger vdj --id=SR1_vdj --fastqs=/home/ubuntu/bloodadv2019/fastq/ --sample=Naive_TCR --reference=/home/ubuntu/refdata-cellranger-vdj-GRCh38-alts-ensembl-4.0.0/;
    done
    for i in `seq 77 80`;
    do /home/ubuntu/cellranger-4.0.0/cellranger vdj --id=naive_vdj --fastqs=/home/ubuntu/bloodadv2019/fastq/ --sample=Naive_TCR --reference=/home/ubuntu/refdata-cellranger-vdj-GRCh38-alts-ensembl-4.0.0/;
    done
    

    结果整合

    (还在施工)

    Seurat

    三个结果带前缀的话会报错
    library(Seurat)
    library(cowplot)
    library(hdf5r)
    #加载Cellranger output file
    input_naive <- Read10X(data.dir = "/RStudio/working_path/10x vdj/GSE135929_RAW/filtered_gene_bc_matrices/naive/")
    input_SR1 <- Read10X(data.dir = "/RStudio/working_path/10x vdj/GSE135929_RAW/filtered_gene_bc_matrices/SR1/")
    naive <- CreateSeuratObject(counts = input_naive,project = "10X")
    SR1 <- CreateSeuratObject(counts = input_SR1,project = "10X")
    #设置阈值过滤细胞
    naive$percent.mito <- PercentageFeatureSet(naive, pattern = "^mt-")
    VlnPlot(object = naive,features = c('nFeature_RNA','nCount_RNA','percent.mito'))
    naive <- subset(naive,nCount_RNA >= 92 & nCount_RNA <= 30000)
    SR1$percent.mito <- PercentageFeatureSet(SR1, pattern = "^mt-")
    VlnPlot(object = SR1,features = c('nFeature_RNA','nCount_RNA','percent.mito'))
    SR1 <- subset(SR1,nCount_RNA >= 37 & nCount_RNA <= 15000)
    #add contigs
    add_contigs <- function(contigs_path, seurat_obj){
    contigs <- read.csv(contigs_path)
    contigs <- contigs[!duplicated(contigs$barcode), ]
    rownames(contigs) <- contigs[,1]
    contigs[,1] <- NULL
    contigs_seurat <- AddMetaData(seurat_obj, metadata=contigs)}
    s_naive <- add_contigs("./contig/GSM4038045_Naive.csv",naive)
    s_SR1 <- add_contigs("./contig/GSM4038045_Naive.csv",SR1)
    #进行常规workflow
    workflow <- function(seurat_objective){
    seu_obj <- NormalizeData(seurat_objective, normalization.method = "LogNormalize", scale.factor = 10000)
    seu_obj <- FindVariableFeatures(seu_obj,selection.method = "vst", nfeatures = 2000)
    all.genes <- rownames(seu_obj)
    seu_obj <- ScaleData(seu_obj, features = all.genes)
    seu_obj <- RunPCA(seu_obj, features = VariableFeatures(object = seu_obj))
    seu_obj <- RunTSNE(seu_obj, features = VariableFeatures(object = seu_obj))}
    s_naive <- workflow(s_naive)
    s_SR1 <- workflow(s_SR1)
    #cluster
    cluster <- function(seu_obj,n){
    use.pcs = 1:10
    s <- FindNeighbors(seu_obj, dims = use.pcs)
    s <- FindClusters(s, resolution = c(n))
    s <- RunUMAP(s, dims = use.pcs)}
    c_naive <- cluster(s_naive,0.45)
    c_SR1 <-cluster(s_SR1,0.8)
    #plot
    ##changetable
    current.cluster.ids <- c(0, 1, 2, 3, 4, 5, 6)
    new.cluster.ids <- c("C1","C2","C3","C4","C5","C6","C7")
    c_naive@active.ident <- plyr::mapvalues(x = c_naive@active.ident,
      from = current.cluster.ids,to = new.cluster.ids)
    c_SR1@active.ident <- plyr::mapvalues(x = c_SR1@active.ident,
    from = current.cluster.ids,to = new.cluster.ids)
    plot <- function(x,title){
    p1 <- DimPlot(x, reduction = "tsne", label = TRUE) + labs(title = title)
    p2 <- FeaturePlot(x, reduction = "tsne", features = "CD4") +labs(title = title, subtitle = "CD4")
    p3 <- FeaturePlot(x, reduction = "tsne", features = "CD8A")+labs(title = title, subtitle = "CD8A")
    library(patchwork)
    p1+p2+p3}
    plot(c_naive,"Naive")
    plot(c_SR1,"SR1")
    

    图片.png
    #findmarkers
    findmarkers <- function(x,title){s_markers <- FindAllMarkers(x,min.pct = 0.25)
    top5 <- s_markers %>% group_by(cluster) %>% top_n(5, avg_logFC)
    DoHeatmap(x, features = top5$gene) + NoLegend()+ labs(title = title)}
    findmarkers(c_naive,"Naive")
    findmarkers(c_SR1,"SR1")
    

    ???如何做出像原文中指定cluster的热图



    immunarch

    用原始contig数据做出的图太奇怪了,所以我按chain的类型拆成了TRA和TRB.....

    library(immunarch)
    naive_contig <- read.csv("./contig/GSM4038045_Naive.csv")
    SR1_contig <- read.csv("./contig/GSM4038046_SR1.csv")
    split <- function(x,prefix){
    TRA <- x[which(x$chain=="TRA"),]
    TRB <- x[which(x$chain=="TRB"),]
    write.csv(TRA,paste0(prefix,"_TRA.csv"))
    write.csv(TRB,paste0(prefix,"_TRB.csv")) }
    split(naive_contig,"naive")
    split(SR1_contig,"SR1")
    TCR <- repLoad("./contig/TCR/")
    TRA <- repLoad("./contig/TRA/")
    TRB <- repLoad("./contig/TRB/")
    names(TCR$data) <- c("Naive","SR1")
    names(TRA$data) <- c("Naive","SR1")
    names(TRB$data) <- c("Naive","SR1")
    
    repDiversity(.data = TCR$data, .method = "div", .q = 1) %>% vis()
    p4 <- spectratype(TRA$data[[1]], .col="aa+v",.quant = "count") %>% vis()
    p5 <- spectratype(TRA$data[[2]], .col="aa+v",.quant = "count") %>% vis()
    p4/p5
    imm_gu <- geneUsage(TRB$data[c(1, 2)], "hs.trbv", .norm = T)
    vis(imm_gu)
    
    图片.png
    图片.png
    图片.png

    相关文章

      网友评论

        本文标题:10X V(D)J实战

        本文链接:https://www.haomeiwen.com/subject/wducsktx.html