2021-12-15 UMAP

作者: 千容安 | 来源:发表于2021-12-17 21:23 被阅读0次
setwd("C:\\Users\\Administrator.DESKTOP-4UQ3Q0K\\Desktop")
library(magrittr)
cts <- data.table::fread('GSE98638_HCC.TCell.S5063.count.txt',stringsAsFactors = F) %>% as.data.frame()
cts <- cts[!duplicated(cts$symbol),]
cts <- cts[!is.na(cts$symbol),]
rownames(cts) <- cts$symbol
cts <- cts[,-c(1:2)]
save(cts,file = 'cts.rda')

meta <- data.table::fread('meta.tsv',header = T)
patient <- data.table::fread('Patient.tsv',header = T)

meta$Patient[which(meta$Patient == 'P1202t')] <- 'P1202'
library(tidyverse)
meta$Tissue <- case_when(str_detect(meta$sampleType,'^J')~'Joint',
                         str_detect(meta$sampleType,'^N')~'Normal',
                         str_detect(meta$sampleType,'^P')~'Blood',
                         str_detect(meta$sampleType,'^T')~'Tumor')
Patient.tsv meta.tsv
meta <- left_join(meta,patient)
#Error: `by` must be supplied when `x` and `y` have no common variables.
#i use by = character()` to perform a cross-join.
#Run `rlang::last_error()` to see where the error occurred.
将patient.tsv中内容作了修改。删掉了
> meta <- left_join(meta,patient)
Joining, by = c("Sex", "Stage", "Tissue")

输入:

rownames(meta) <- meta$UniqueCell_ID

报错:

Error in `.rowNamesDF<-`(x, value = value) : 不允许有重复的'row.names'
In addition: Warning message:
non-unique value when setting 'row.names': ‘’ 

原因:
发现数据底部空行都被识别进去了。可能是因为excel粘贴时多粘贴了几列
处理方法一种是:删除空白行

meta <- meta[,-1]
save(meta,file = 'meta.rda')

library(Seurat)
obj <- CreateSeuratObject(cts,project = 'GSE98638',meta.data = meta)
obj$pct_rp <- PercentageFeatureSet(obj,'^RP[SL]')
obj <- NormalizeData(obj)
obj <- ScaleData(obj)


但是电脑内存还有很多,设置分配给 R 的内存量

> memory.limit()
[1] 8101
> memory.limit(size=56000)
[1] 56000
obj <- FindVariableFeatures(obj)

obj <- RunPCA(obj)

obj <- RunUMAP(obj,dims = 1:20)

pdf('dim_meta.pdf',height = 8,width = 18)
DimPlot(obj,group.by = c('Patient','Sex','sampleType','Stage','Tissue','majorCluster'))
dev.off()  #关闭指定的( 默认为当前 )设备

pdf('dim_majorCluster.pdf',width = 10)
DimPlot(obj,group.by = 'majorCluster',label = T)
dev.off()

save(obj,file = 'seurat_all.rda')

treg <- subset(obj, majorCluster %in% c('C07_CD4-FOXP3','C08_CD4-CTLA4'))
save(treg,file = 'seurat_treg.rda')

treg$Isolation <- case_when(str_detect(treg$sampleType,'H$') ~ 'CD4+CD25-',
                            str_detect(treg$sampleType,'R$') ~ 'CD4+CD25+')

pdf('bar_cellCts_prop.pdf',width = 10,height = 6)
library(ggplot2)
library(patchwork)
ggplot(treg@meta.data) +
  geom_bar(aes(x = Patient,fill = majorCluster)) +
  facet_grid(Tissue ~ Sex,scales = 'free_x') +
  theme_bw() +
  ylab('Cell number of clusters') +
ggplot(treg@meta.data) +
  geom_bar(aes(x = Patient,fill = majorCluster),position = 'fill') +
  facet_grid(Tissue ~ Sex,scales = 'free_x') +
  ylab('Propotion of clusters') +
  theme_bw() +
  plot_layout(guides = 'collect')
dev.off()

treg$grp <- paste(treg$majorCluster,treg$Sex,sep = ':')

Idents(treg) <- paste(treg$majorCluster,treg$Sex,sep = ':')
mk_fm_grp2all <- FindAllMarkers(treg,only.pos = T,test.use = 'MAST')
mk_fm_grp2all <- subset(mk_fm_grp2all,p_val_adj < 0.05)
write.csv(mk_fm_grp2all,file = 'mk_fm_grp2all.csv')

mk_fm_c8 <- FindMarkers(treg,
                        ident.1 = 'C08_CD4-CTLA4:Female',
                        ident.2 = 'C08_CD4-CTLA4:Male',
                        only.pos = T,
                        test.use = 'MAST') %>% subset(p_val_adj < 0.05)

mk_fm_c7 <- FindMarkers(treg,
                        ident.1 = 'C07_CD4-FOXP3:Female',
                        ident.2 = 'C07_CD4-FOXP3:Male',
                        only.pos = T,
                        test.use = 'MAST') %>% subset(p_val_adj < 0.05)
write.csv(mk_fm_c7,file = 'mk_fm_c7.csv')
write.csv(mk_fm_c8,file = 'mk_fm_c8.csv')

Idents(treg) <- treg$Sex
mk_fm <- FindAllMarkers(treg,only.pos = T,test.use = 'MAST')
write.csv(mk_fm,file = 'mk_fm.csv')


intersectG <- intersect(subset(mk_fm_c7,avg_log2FC > 0) %>% rownames(),
                        subset(mk_fm_c8,avg_log2FC > 0) %>% rownames())

DoHeatmap(treg,features = intersectG,group.by = 'grp')


UMAP有随机算法,所以每次生成的图会有微小差异。可以用命令来固定:
set.seed(99)

两次得到的上图不同,normal中male的数据缺失,待解决,以及得到的KEGG图仿佛与肝癌(病人样本)不相关,得树形图后同样,待解决。



相关文章

网友评论

    本文标题:2021-12-15 UMAP

    本文链接:https://www.haomeiwen.com/subject/pcjmfrtx.html