2021-12-15 UMAP

作者: 千容安 | 来源:发表于2021-12-17 21:23 被阅读0次
    setwd("C:\\Users\\Administrator.DESKTOP-4UQ3Q0K\\Desktop")
    library(magrittr)
    cts <- data.table::fread('GSE98638_HCC.TCell.S5063.count.txt',stringsAsFactors = F) %>% as.data.frame()
    cts <- cts[!duplicated(cts$symbol),]
    cts <- cts[!is.na(cts$symbol),]
    rownames(cts) <- cts$symbol
    cts <- cts[,-c(1:2)]
    save(cts,file = 'cts.rda')
    
    meta <- data.table::fread('meta.tsv',header = T)
    patient <- data.table::fread('Patient.tsv',header = T)
    
    meta$Patient[which(meta$Patient == 'P1202t')] <- 'P1202'
    library(tidyverse)
    meta$Tissue <- case_when(str_detect(meta$sampleType,'^J')~'Joint',
                             str_detect(meta$sampleType,'^N')~'Normal',
                             str_detect(meta$sampleType,'^P')~'Blood',
                             str_detect(meta$sampleType,'^T')~'Tumor')
    
    Patient.tsv meta.tsv
    meta <- left_join(meta,patient)
    #Error: `by` must be supplied when `x` and `y` have no common variables.
    #i use by = character()` to perform a cross-join.
    #Run `rlang::last_error()` to see where the error occurred.
    
    将patient.tsv中内容作了修改。删掉了
    > meta <- left_join(meta,patient)
    Joining, by = c("Sex", "Stage", "Tissue")
    

    输入:

    rownames(meta) <- meta$UniqueCell_ID
    

    报错:

    Error in `.rowNamesDF<-`(x, value = value) : 不允许有重复的'row.names'
    In addition: Warning message:
    non-unique value when setting 'row.names': ‘’ 
    

    原因:
    发现数据底部空行都被识别进去了。可能是因为excel粘贴时多粘贴了几列
    处理方法一种是:删除空白行

    meta <- meta[,-1]
    save(meta,file = 'meta.rda')
    
    library(Seurat)
    obj <- CreateSeuratObject(cts,project = 'GSE98638',meta.data = meta)
    obj$pct_rp <- PercentageFeatureSet(obj,'^RP[SL]')
    obj <- NormalizeData(obj)
    obj <- ScaleData(obj)
    
    

    但是电脑内存还有很多,设置分配给 R 的内存量

    > memory.limit()
    [1] 8101
    > memory.limit(size=56000)
    [1] 56000
    
    obj <- FindVariableFeatures(obj)
    
    obj <- RunPCA(obj)
    
    obj <- RunUMAP(obj,dims = 1:20)
    
    pdf('dim_meta.pdf',height = 8,width = 18)
    DimPlot(obj,group.by = c('Patient','Sex','sampleType','Stage','Tissue','majorCluster'))
    dev.off()  #关闭指定的( 默认为当前 )设备
    
    pdf('dim_majorCluster.pdf',width = 10)
    DimPlot(obj,group.by = 'majorCluster',label = T)
    dev.off()
    
    save(obj,file = 'seurat_all.rda')
    
    treg <- subset(obj, majorCluster %in% c('C07_CD4-FOXP3','C08_CD4-CTLA4'))
    save(treg,file = 'seurat_treg.rda')
    
    treg$Isolation <- case_when(str_detect(treg$sampleType,'H$') ~ 'CD4+CD25-',
                                str_detect(treg$sampleType,'R$') ~ 'CD4+CD25+')
    
    pdf('bar_cellCts_prop.pdf',width = 10,height = 6)
    
    library(ggplot2)
    library(patchwork)
    ggplot(treg@meta.data) +
      geom_bar(aes(x = Patient,fill = majorCluster)) +
      facet_grid(Tissue ~ Sex,scales = 'free_x') +
      theme_bw() +
      ylab('Cell number of clusters') +
    ggplot(treg@meta.data) +
      geom_bar(aes(x = Patient,fill = majorCluster),position = 'fill') +
      facet_grid(Tissue ~ Sex,scales = 'free_x') +
      ylab('Propotion of clusters') +
      theme_bw() +
      plot_layout(guides = 'collect')
    dev.off()
    
    treg$grp <- paste(treg$majorCluster,treg$Sex,sep = ':')
    
    Idents(treg) <- paste(treg$majorCluster,treg$Sex,sep = ':')
    mk_fm_grp2all <- FindAllMarkers(treg,only.pos = T,test.use = 'MAST')
    mk_fm_grp2all <- subset(mk_fm_grp2all,p_val_adj < 0.05)
    write.csv(mk_fm_grp2all,file = 'mk_fm_grp2all.csv')
    
    mk_fm_c8 <- FindMarkers(treg,
                            ident.1 = 'C08_CD4-CTLA4:Female',
                            ident.2 = 'C08_CD4-CTLA4:Male',
                            only.pos = T,
                            test.use = 'MAST') %>% subset(p_val_adj < 0.05)
    
    mk_fm_c7 <- FindMarkers(treg,
                            ident.1 = 'C07_CD4-FOXP3:Female',
                            ident.2 = 'C07_CD4-FOXP3:Male',
                            only.pos = T,
                            test.use = 'MAST') %>% subset(p_val_adj < 0.05)
    write.csv(mk_fm_c7,file = 'mk_fm_c7.csv')
    write.csv(mk_fm_c8,file = 'mk_fm_c8.csv')
    
    Idents(treg) <- treg$Sex
    mk_fm <- FindAllMarkers(treg,only.pos = T,test.use = 'MAST')
    write.csv(mk_fm,file = 'mk_fm.csv')
    
    
    intersectG <- intersect(subset(mk_fm_c7,avg_log2FC > 0) %>% rownames(),
                            subset(mk_fm_c8,avg_log2FC > 0) %>% rownames())
    
    DoHeatmap(treg,features = intersectG,group.by = 'grp')
    


    UMAP有随机算法,所以每次生成的图会有微小差异。可以用命令来固定:
    set.seed(99)

    两次得到的上图不同,normal中male的数据缺失,待解决,以及得到的KEGG图仿佛与肝癌(病人样本)不相关,得树形图后同样,待解决。



    相关文章

      网友评论

        本文标题:2021-12-15 UMAP

        本文链接:https://www.haomeiwen.com/subject/pcjmfrtx.html