setwd("C:\\Users\\Administrator.DESKTOP-4UQ3Q0K\\Desktop")
library(magrittr)
cts <- data.table::fread('GSE98638_HCC.TCell.S5063.count.txt',stringsAsFactors = F) %>% as.data.frame()
cts <- cts[!duplicated(cts$symbol),]
cts <- cts[!is.na(cts$symbol),]
rownames(cts) <- cts$symbol
cts <- cts[,-c(1:2)]
save(cts,file = 'cts.rda')
meta <- data.table::fread('meta.tsv',header = T)
patient <- data.table::fread('Patient.tsv',header = T)
meta$Patient[which(meta$Patient == 'P1202t')] <- 'P1202'
library(tidyverse)
meta$Tissue <- case_when(str_detect(meta$sampleType,'^J')~'Joint',
str_detect(meta$sampleType,'^N')~'Normal',
str_detect(meta$sampleType,'^P')~'Blood',
str_detect(meta$sampleType,'^T')~'Tumor')


meta <- left_join(meta,patient)
#Error: `by` must be supplied when `x` and `y` have no common variables.
#i use by = character()` to perform a cross-join.
#Run `rlang::last_error()` to see where the error occurred.


> meta <- left_join(meta,patient)
Joining, by = c("Sex", "Stage", "Tissue")
输入:
rownames(meta) <- meta$UniqueCell_ID
报错:
Error in `.rowNamesDF<-`(x, value = value) : 不允许有重复的'row.names'
In addition: Warning message:
non-unique value when setting 'row.names': ‘’
原因:
发现数据底部空行都被识别进去了。可能是因为excel粘贴时多粘贴了几列
处理方法一种是:删除空白行
meta <- meta[,-1]
save(meta,file = 'meta.rda')
library(Seurat)
obj <- CreateSeuratObject(cts,project = 'GSE98638',meta.data = meta)
obj$pct_rp <- PercentageFeatureSet(obj,'^RP[SL]')
obj <- NormalizeData(obj)
obj <- ScaleData(obj)

但是电脑内存还有很多,设置分配给 R 的内存量
> memory.limit()
[1] 8101
> memory.limit(size=56000)
[1] 56000
obj <- FindVariableFeatures(obj)
obj <- RunPCA(obj)
obj <- RunUMAP(obj,dims = 1:20)
pdf('dim_meta.pdf',height = 8,width = 18)
DimPlot(obj,group.by = c('Patient','Sex','sampleType','Stage','Tissue','majorCluster'))
dev.off() #关闭指定的( 默认为当前 )设备
pdf('dim_majorCluster.pdf',width = 10)
DimPlot(obj,group.by = 'majorCluster',label = T)
dev.off()
save(obj,file = 'seurat_all.rda')
treg <- subset(obj, majorCluster %in% c('C07_CD4-FOXP3','C08_CD4-CTLA4'))
save(treg,file = 'seurat_treg.rda')
treg$Isolation <- case_when(str_detect(treg$sampleType,'H$') ~ 'CD4+CD25-',
str_detect(treg$sampleType,'R$') ~ 'CD4+CD25+')
pdf('bar_cellCts_prop.pdf',width = 10,height = 6)
library(ggplot2)
library(patchwork)
ggplot(treg@meta.data) +
geom_bar(aes(x = Patient,fill = majorCluster)) +
facet_grid(Tissue ~ Sex,scales = 'free_x') +
theme_bw() +
ylab('Cell number of clusters') +
ggplot(treg@meta.data) +
geom_bar(aes(x = Patient,fill = majorCluster),position = 'fill') +
facet_grid(Tissue ~ Sex,scales = 'free_x') +
ylab('Propotion of clusters') +
theme_bw() +
plot_layout(guides = 'collect')
dev.off()
treg$grp <- paste(treg$majorCluster,treg$Sex,sep = ':')
Idents(treg) <- paste(treg$majorCluster,treg$Sex,sep = ':')
mk_fm_grp2all <- FindAllMarkers(treg,only.pos = T,test.use = 'MAST')
mk_fm_grp2all <- subset(mk_fm_grp2all,p_val_adj < 0.05)
write.csv(mk_fm_grp2all,file = 'mk_fm_grp2all.csv')
mk_fm_c8 <- FindMarkers(treg,
ident.1 = 'C08_CD4-CTLA4:Female',
ident.2 = 'C08_CD4-CTLA4:Male',
only.pos = T,
test.use = 'MAST') %>% subset(p_val_adj < 0.05)
mk_fm_c7 <- FindMarkers(treg,
ident.1 = 'C07_CD4-FOXP3:Female',
ident.2 = 'C07_CD4-FOXP3:Male',
only.pos = T,
test.use = 'MAST') %>% subset(p_val_adj < 0.05)
write.csv(mk_fm_c7,file = 'mk_fm_c7.csv')
write.csv(mk_fm_c8,file = 'mk_fm_c8.csv')
Idents(treg) <- treg$Sex
mk_fm <- FindAllMarkers(treg,only.pos = T,test.use = 'MAST')
write.csv(mk_fm,file = 'mk_fm.csv')
intersectG <- intersect(subset(mk_fm_c7,avg_log2FC > 0) %>% rownames(),
subset(mk_fm_c8,avg_log2FC > 0) %>% rownames())
DoHeatmap(treg,features = intersectG,group.by = 'grp')


UMAP有随机算法,所以每次生成的图会有微小差异。可以用命令来固定:
set.seed(99)



两次得到的上图不同,normal中male的数据缺失,待解决,以及得到的KEGG图仿佛与肝癌(病人样本)不相关,得树形图后同样,待解决。


网友评论