rm(list=ls())
##this Rcode is used for 1):extract target gene expression from GSE file; and 2) draw box plot
##annotation is accomplished by relevant bioconductor package
##----------------
##GSE matrix file download
if(F){
suppressPackageStartupMessages(library(GEOquery))
eSet <- getGEO('GSE6919', destdir=".",
AnnotGPL = F,
getGPL = T)
#getGPL = F: no GPL will be downloaded
#getGPL = T: a GPLxxx.SOFT file will be downloaded
save(eSet,file='GSE6919_eSet.Rdata')
}
#extract probeID2symbol information from featureData of eSet[[1]]
#little tips: according to our goals,if we wanna the epxression values of a gene,we can look the gpl file for this gene first,instead of processing the eSet first.
gpl_soft_df <- as(featureData(eSet[[2]]),'data.frame')
class(gpl_soft_df)
ann_df <- data.frame(ID = rownames(gpl_soft_df),
symbol = gpl_soft_df$`Gene Symbol`)
#whether this gpl has the target gene?
table(ann_df$symbol == 'TRAF4')
#now we have get the probeID of the target gene
#next step: processing the eSet
ann_df_select <- ann_df[ann_df$symbol == 'TRAF4',]
save(eSet, ann_df_select,file = 'GSE6919_eSet_select_probe2symbol.Rdata')
##----------------
##row exprSet(rownames are probeids) and phenotype preparation
rm(list = ls())
load('GSE6919_eSet_select_probe2symbol.Rdata')
b = eSet[[2]]
raw_exprSet=exprs(b)
raw_exprSet[1:4,1:4]
phe=pData(b)
phe$title
library(stringr)
group_list= tolower(str_split(as.character(phe$title),' ',simplify = T)[,1])
# head(group_list);table(group_list)
#identical(rownames(phe),colnames(raw_exprSet)) # the return value must be TRUE
save(raw_exprSet,group_list,ann_df_select,
file='GSE6919_raw_exprSet.Rdata')
##----------------
##annotation and pickout intersted genes
rm(list=ls())
load(file='GSE6919_raw_exprSet.Rdata')
#annotation with pre-processed: ann_df_select
exprSet <- raw_exprSet[ann_df_select$ID,]
exprSet <- log2(exprSet)
##if only one probeID mapped,then
dat=data.frame(gene= exprSet,
mut= group_list)
head(dat)
##if more than one probeIDs maaped, then
##pick out the most max value of rowMeans
dat=data.frame(gene= exprSet[which.max(rowMeans(exprSet)),],
mut= group_list)
head(dat)
#-----------------
#visualization
if(require('ggpubr')){
library(ggpubr)
# google search : ggpubr boxplot add p-value
# http://www.sthda.com/english/articles/24-ggpubr-publication-ready-plots/76-add-p-values-and-significance-levels-to-ggplots/
p <- ggboxplot(dat, x = "mut", y = "gene",
color = "mut", palette = "jco",
add = "jitter")
# Add p-value
p + stat_compare_means()
}
if(require('ggstatsplot')){
library(ggstatsplot)
ggbetweenstats(data = dat, x = mut, y = gene)
}
if(require('ggplot2')){
library(ggplot2)
ggplot(dat,aes(x=mut,y=gene))+
geom_boxplot()+
theme_bw()
}
网友评论