GEO单细胞测序数据生信分析复现

作者: 18b79e7933ad | 来源:发表于2021-06-26 23:50 被阅读0次

    复现文章:Analyses of metastasis-associated genes in IDH wild-type glioma
    数据来源:GSE84465
    图1


    image.png

    数据筛选条件


    image.png
    image.png
    除了min.cells min.features percent.mt,还有Pearson相关系数的指标(Fig S1)
    根据Pearson相关系数大于0.4这个指标,去除了Tumor_BT_S1和Tumor_BT_S4两个样本,只针对剩余的6个样本分析
    Fig S1
    image.png

    第一步:下载GSE84465_GBM_All_data.csv.gz和Metadata的SraRunTable.txt文件
    第二步代码

    library(Seurat)
    library(ggplot2)
    library(clustree)
    library(cowplot)
    library(dplyr)
    
    
    a<-read.table("GSE84465_GBM_All_data.csv.gz")
    head(rownames(a))
    tail(rownames(a),10)
    a=a[1:(nrow(a)-5),]  #最后5行行名异常,剔除
    
    b<-read.table("SraRunTable.txt",sep = ",",header = T)   #样本信息
    table(b$PATIENT_ID)
    table(b$tissue)
    table(b$tissue, b$PATIENT_ID)
    
    new.b <- b[,c("Plate.ID","well","tissue","PATIENT_ID")]
    new.b$sample <- paste0("X",b$Plate.ID,".",b$well)
    head(new.b)
    identical(colnames(a),new.b$sample)
    
    pbmc <- CreateSeuratObject(counts = a)
    head(pbmc@meta.data)
    
    patient<-new.b$PATIENT_ID
    pbmc <- AddMetaData(object = pbmc, metadata = patient, col.name = 'PATIENT_ID')
    tissue<-new.b$tissue
    pbmc <- AddMetaData(object = pbmc, metadata = tissue, col.name = 'Tissue')
    sample<-paste0(tissue,"_",patient)
    pbmc <- AddMetaData(object = pbmc, metadata = sample, col.name = 'Sample')
    head(pbmc@meta.data)
    
    pbmc[["percent.mt"]] <- PercentageFeatureSet(pbmc, pattern = "^MT-")
    
    table(pbmc$PATIENT_ID,pbmc$Tissue)
    
    ###########################
    #计算Pearson相关系数
    pbmc.filt <- subset(pbmc, subset = PATIENT_ID=="BT_S6"&Tissue=="Periphery")    #BT_S1 2 4 6 Tumor Periphery
    dim(pbmc.filt)
    
    plot1 <- FeatureScatter(pbmc.filt, feature1 = "nCount_RNA", feature2 = "percent.mt",group.by = "PATIENT_ID",pt.size=1.5)
    plot2 <- FeatureScatter(pbmc.filt, feature1 = "nCount_RNA", feature2 = "nFeature_RNA",group.by = "PATIENT_ID",pt.size=1.5)
    plot1 + plot2
    ############################
    selected_f <- rownames(pbmc)[Matrix::rowSums(pbmc@assays$RNA@counts > 0 ) > 3]
    pbmc.filt <- subset(pbmc, subset = nFeature_RNA > 50 & nFeature_RNA < 6000 & percent.mt < 0.05 & 
                          Sample%in%c("Periphery_BT_S1","Periphery_BT_S2","Periphery_BT_S4","Periphery_BT_S6","Tumor_BT_S2","Tumor_BT_S6"),
                        features = selected_f)
    dim(pbmc.filt)
    VlnPlot(object = pbmc.filt, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"),group.by = "Sample")
    
    image.png
    image.png
    #标准化
    pbmc <- NormalizeData(pbmc, normalization.method = "LogNormalize", scale.factor = 10000)
    
    ### 鉴定高变异基因
    pbmc <- FindVariableFeatures(pbmc, selection.method = "vst", nfeatures = 1500)   #变异最大的1500个基因
    
    #输出特征方差图
    top10 <- head(VariableFeatures(pbmc), 10)
    plot1 <- VariableFeaturePlot(pbmc)
    plot2 <- LabelPoints(plot = plot1, points = top10,repel = TRUE)
    plot1+plot2
    
    image.png

    图2


    image.png
    #标准化
    pbmc <- NormalizeData(pbmc, normalization.method = "LogNormalize", scale.factor = 10000)
    
    ### 鉴定高变异基因
    pbmc <- FindVariableFeatures(pbmc, selection.method = "vst", nfeatures = 500)   #变异最大的1500个基因
    
    #输出特征方差图
    top10 <- head(VariableFeatures(pbmc), 10)
    plot1 <- VariableFeaturePlot(pbmc)
    plot2 <- LabelPoints(plot = plot1, points = top10,repel = TRUE)
    plot1+plot2
    
    
    all.genes <- rownames(pbmc)
    pbmc <- ScaleData(pbmc, features = all.genes)
    pbmc <- ScaleData(pbmc, vars.to.regress = "percent.mt")
    
    pbmc <- RunPCA(pbmc, features = VariableFeatures(object = pbmc))
    print(pbmc[["pca"]], dims = 1:5, nfeatures = 5)
    
    
    VizDimLoadings(object = pbmc, dims = 1:2, reduction = "pca",nfeatures = 20)  #4个PC 20个基因
    
    DimPlot(pbmc, reduction = "pca",group.by="Sample")
    
    DimHeatmap(pbmc, dims = 1:2, cells = 500, balanced = TRUE,nfeatures = 30,ncol=2)
    
    pbmc <- JackStraw(object = pbmc, num.replicate = 100)
    pbmc <- ScoreJackStraw(object = pbmc, dims = 1:20)
    JackStrawPlot(object = pbmc, dims = 1:20,reduction = "pca")
    ElbowPlot(pbmc,reduction="pca")  #根据ElbowPlot确定PC数量
    
    ##TSNE聚类分析
    pbmc <- FindNeighbors(pbmc, dims = 1:20)   
    pbmc <- FindClusters(pbmc, resolution = 0.5)  
    
    #tSNE降维
    pbmc <- RunTSNE(object = pbmc, dims = 1:20)   
    DimPlot(pbmc, reduction = "tsne",label = TRUE,pt.size = 1)
    
    image.png
    image.png
    image.png
    image.png
    image.png
    table(pbmc@meta.data$Tissue,pbmc@meta.data$seurat_clusters)
    
    image.png
    ### 差异表达分析
    # 找到每一个cluster当中的marker,并且只展示阳性的marker。
    pbmc.markers <- FindAllMarkers(pbmc, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
    head(pbmc.markers, n = 10)
    write.table(pbmc.markers,file = 'pbmc.markers.txt',sep = '\t',row.names=F,quote=F)
    
    library("tidyverse")
    sig.markers<- pbmc.markers %>% select(gene,everything()) %>%
      subset(p_val_adj<0.05 & abs(pbmc.markers$avg_log2FC)>1)
    dim(sig.markers)
    write.table(sig.markers,file="sigmarkers.xls",sep="\t",row.names=F,quote=F)
    
    top10 <- pbmc.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC)
    #差异基因可视化
    VlnPlot(object = pbmc, features = c("EGFR", "CCL3","AGXT2L1","DCN","GPR17","MOG","SYT1"),group.by = "seurat_clusters",pt.size = 1)
    
    image.png

    Cluster 2 3 6被定义为metastatic tumor cell,比较metastatic和non-mentastatic之间的基因表达差异

    metastatic.markers <- FindMarkers(pbmc, ident.1 = c(2,3,6), ident.2 = c(0,1,4,5,7,8,9,10,11,12), min.pct = 0.25)
    head(metastatic.markers,10)
    sig.metastatic.markers<- metastatic.markers %>% select(colnames(metastatic.markers),everything()) %>%
      subset(p_val_adj<0.05 & abs(metastatic.markers$avg_log2FC)>1)
    dim(sig.metastatic.markers)
    write.table(sig.metastatic.markers,file="sigmetastaticmarkers.xls",sep="\t",row.names=F,quote=F)
    

    相关文章

      网友评论

        本文标题:GEO单细胞测序数据生信分析复现

        本文链接:https://www.haomeiwen.com/subject/aiqtultx.html