美文网首页
比较特异性乳腺癌相关成纤维细胞(CAFs)耐药与敏感患者的分子特

比较特异性乳腺癌相关成纤维细胞(CAFs)耐药与敏感患者的分子特

作者: Forest_Lee | 来源:发表于2019-04-15 21:35 被阅读0次
rm( list = ls() ) #一键清空,好习惯

options( stringsAsFactors = F )

library( "GEOquery" ) #加载R包,用于下载GEO数据

GSE_name = 'GSE108565'

options( 'download.file.method.GEOquery' = 'libcurl' ) #长城外解决方法之一

gset <- getGEO( GSE_name, getGPL = T ) #获取GEO数据,包括注释信息GPL

save( gset, file = 'gset_2.Rdata' ) #保存

load("gset_2.Rdata") #加载
gset <- gset[[1]] #降级处理
pdata <- pData(gset) #获取样本信息
exprSet <- exprs(gset) #获取表达矩阵
GPL = gset@featureData@data #获取注释信息
gset
chl <- length(colnames(pdata)) #看一下样本信息的列数
group_list <- as.character(pdata[,"neo-adjuvant chemotherapy:ch1"]) #得到分组信息 即对治疗抵抗和敏感
dim(exprSet)
exprSet[1:5,1:5] #看一下前几行
table(group_list) #统计分组信息 7抵抗 7敏感
save(exprSet,file = "final_exprSet.Rdata") #随时保存 下次直接load
## 下面就是画图plot的过程

library( "ggfortify" ) #ggfortify包基于ggplot2 可以Plotting PCA

## 聚类

{
  
  colnames( exprSet ) = paste( group_list, 1:ncol( exprSet ), sep = '_' ) #根据分组信息表达矩阵改变列名 
  
  nodePar <- list( lab.cex = 0.4, pch = c( NA, 19 ), cex = 0.6, col = "red" ) #调整参数
  
  hc = hclust( dist( t( exprSet ) ) ) #聚类表达矩阵
  
  png('hclust.png', res = 100, height = 1800) #生成画板 设置参数
  
  plot( as.dendrogram( hc ), nodePar = nodePar, horiz = TRUE ) #在画板上水平plot聚类结果的dendrogram即系统树图
  
  dev.off() #关闭指定的绘图(默认情况下为当前设备)
  
}
image.png
## PCA主成分分析

data = as.data.frame( t( exprSet ) ) #transpose exprSet

data$group = group_list 

png( 'pca_plot.png', res=100 )

autoplot( prcomp( data[ , 1:( ncol( data ) - 1 ) ] ), data = data, colour = 'group') + theme_bw() #进行PCA分析 图片保存在'pca_plot.png',也可以先plot再保存,可选png或pdf格式 ;theme_bw():A theme with white background and black gridlines.

dev.off() #关闭
image.png
### 差异分析 
load( "./final_exprSet.Rdata" )

library( "limma" ) #使用limma包找出表达矩阵的差异表达基因DEGs 看说明书 GEO2R的差异分析就是基于limma包

{
  
  design <- model.matrix( ~0 + factor( group_list ) ) #建立合适的设计矩阵
  
  colnames( design ) = levels( factor( group_list ) ) #改变列名
  
  rownames( design ) = colnames( exprSet ) #改变行名
  
}

design


## 为了使两组之间的成对比较,可以创建适当的对比度矩阵
contrast.matrix <- makeContrasts( "resistant-sensitive", levels = design )

contrast.matrix



{
  
  fit <- lmFit( exprSet, design )
  
  fit2 <- contrasts.fit( fit, contrast.matrix ) 
  
  fit2 <- eBayes( fit2 )
  
  nrDEG = topTable( fit2, coef = 1, n = Inf ) #获取差异表达基因
  
  write.table( nrDEG, file = "nrDEG.out") #output
  
}

head(nrDEG)
results <- decideTests(fit2) #每个假设检验的结果可以用fit2获得
vennDiagram(fit2) #在每一个比较中都可以得到一个显示有意义的基因数量的维恩图
image.png
## heatmap 画热图

library( "pheatmap" )

{
  
  nrDEG_Z = nrDEG[ order( nrDEG$logFC ), ] #按照logFC由小到大排列 nrDEG生成nrDEG_Z
  
  nrDEG_F = nrDEG[ order( -nrDEG$logFC ), ] #同上
  
  choose_gene = c( rownames( nrDEG_Z )[1:100], rownames( nrDEG_F )[1:100] ) #在差异表达基因中选择前100上调top和前100下调down
  
  choose_matrix = exprSet[ choose_gene, ] #组成新的选择矩阵
  
  choose_matrix = t( scale( t( choose_matrix ) ) ) #t(scale(t()))标准化 行的值
  
  
  
  choose_matrix[choose_matrix > 1] = 1
  
  choose_matrix[choose_matrix < -1] = -1
  
  
  
  annotation_col = data.frame( CellType = factor( group_list ) )
  
  rownames( annotation_col ) = colnames( exprSet )
  
  pheatmap( fontsize = 2, choose_matrix, annotation_col = annotation_col, show_rownames = F, 
            
            annotation_legend = F, cluster_cols = F,filename = "heatmap.png") #画热图 图的基本字体大小为2 不显示行名 不显示图例 不显示列的聚类 具体参数可适当修改
  
}
image.png
## volcano plot 画火山图

library( "ggplot2" )

logFC_cutoff <- with( nrDEG, mean( abs( logFC ) ) + 2 * sd( abs( logFC ) ) )

logFC_cutoff #注意理解含义



{
  
  nrDEG$change = as.factor( ifelse( nrDEG$P.Value < 0.01 & abs(nrDEG$logFC) > logFC_cutoff,
                                    
                                    ifelse( nrDEG$logFC > logFC_cutoff , 'UP', 'DOWN' ), 'NOT' ) ) #新建change列,满足nrDEG$P.Value < 0.01 & abs(nrDEG$logFC)则change为up,否则,若满足nrDEG$logFC > logFC_cutoff则change为down,否则为not
  
  
  
  save( nrDEG, file = "nrDEG.Rdata" )
  
  
  
  this_tile <- paste0( 'Cutoff for logFC is ', round( logFC_cutoff, 3 ),
                       
                       '\nThe number of up gene is ', nrow(nrDEG[ nrDEG$change =='UP', ] ),
                       
                       '\nThe number of down gene is ', nrow(nrDEG[ nrDEG$change =='DOWN', ] ) ) #round( logFC_cutoff, 3 ) 保留小数点后3位
  
  
  
  volcano = ggplot(data = nrDEG, aes( x = logFC, y = -log10(P.Value), color = change)) +
    
    geom_point( alpha = 0.4, size = 1.75) +
    
    theme_set( theme_set( theme_bw( base_size = 15 ) ) ) +
    
    xlab( "log2 fold change" ) + ylab( "-log10 p-value" ) +
    
    ggtitle( this_tile ) + theme( plot.title = element_text( size = 15, hjust = 0.5)) +
    
    scale_colour_manual( values = c('blue','black','red') )
  
  print( volcano )
  
  ggsave( volcano, filename = 'volcano.png' )
  
  dev.off()
  
}
image.png
rm( list = ls() )
load( "./nrDEG.Rdata" )
## 注释

library( "clusterProfiler" ) #clusterProfiler implements methods to analyze and visualize functional profiles of genomic coordinates (supported by ChIPseeker), gene and gene clusters.
if (F) {browseVignettes("clusterProfiler")
  } #可查看帮助文档 

library( "org.Hs.eg.db" ) #加载注释包

## 需要注意这个数据的行名是ACCNUM
ls("package:org.Hs.eg.db") #看一下包里包含的内容
df <- bitr( rownames( nrDEG ), fromType = "ACCNUM", toType = c( "ENTREZID" ), OrgDb = org.Hs.eg.db ) #根据org.Hs.eg.db包,将rownames( nrDEG )即ACCNUM转化为ENTREZID形式,生成data.frame

head( df )

{
  
  nrDEG$SYMBOL = rownames( nrDEG ) 
  
  nrDEG = merge( nrDEG, df, by.y='ACCNUM', by.x='SYMBOL')#将nrDEG与df融合,注意x和y分别对应df和nrDEG
  
}

head( nrDEG )



{
  
  gene_up = nrDEG[ nrDEG$change == 'UP', 'ENTREZID' ] #提取上调基因的ENTREZID
  
  gene_down = nrDEG[ nrDEG$change == 'DOWN', 'ENTREZID' ] #提取下调基因的ENTREZID
  
  gene_diff = c( gene_up, gene_down ) #合并
  
  gene_all = as.character(nrDEG[ ,'ENTREZID'] ) #提取所有ENTREZID,UP DOWN NOT
  
}



{
  
  geneList = nrDEG$logFC #numeric vector
  
  names( geneList ) = nrDEG$ENTREZID #named vector
  
  geneList = sort( geneList, decreasing = T ) #decreasing order
  
}



{
  
  ## KEGG pathway analysis
  
  kk.up <- enrichKEGG(   gene          =  gene_up    ,
                         
                         organism      =  'hsa'      ,
                         
                         universe      =  gene_all   ,
                         
                         pvalueCutoff  =  0.8        ,
                         
                         qvalueCutoff  =  0.8        ) #    对上调基因做KEGG分析;hsa    = Homo sapiens (human)
  
  kk.down <- enrichKEGG( gene          =  gene_down  ,
                         
                         organism      =  'hsa'      ,
                         
                         universe      =  gene_all   ,
                         
                         pvalueCutoff  =  0.8        ,
                         
                         qvalueCutoff  =  0.8        ) #    对上调基因做KEGG分析
  
}



head( kk.up )[ ,1:6 ] #凡事head

head( kk.down )[ ,1:6 ]
image.png
library( "ggplot2" ) #可视化

{
  
  kegg_down_dt <- as.data.frame( kk.down ) #转化data.frame
  
  kegg_up_dt <- as.data.frame( kk.up )
  
  down_kegg <- kegg_down_dt[ kegg_down_dt$pvalue < 0.05, ] #设置p值
  
  down_kegg$group = -1
  
  up_kegg <- kegg_up_dt[ kegg_up_dt$pvalue < 0.05, ] #同上
  
  up_kegg$group = 1
  
  
  
  dat = rbind( up_kegg, down_kegg ) #合并
  
  dat$pvalue = -log10( dat$pvalue ) #取对数
  
  dat$pvalue = dat$pvalue * dat$group #up正 down负
  
  
  
  dat = dat[ order( dat$pvalue, decreasing = F ), ]
  
  
  
  g_kegg <- ggplot( dat, 
                    
                    aes(x = reorder( Description, order( pvalue, decreasing=F ) ), y = pvalue, fill = group)) + 
    
    geom_bar( stat = "identity" ) + 
    
    scale_fill_gradient( low = "blue", high = "red", guide = FALSE ) + 
    
    scale_x_discrete( name = "Pathway names" ) +
    
    scale_y_continuous( name = "log10P-value" ) +
    
    coord_flip() + theme_bw() + theme( plot.title = element_text( hjust = 0.5 ) ) +
    
    ggtitle( "Pathway Enrichment" ) #生成图
  
  print( g_kegg )
  
  ggsave( g_kegg, filename = 'kegg_up_down.png' ) #保存到本地
  
}
![image.png](https://img.haomeiwen.com/i15510633/748a946fbee95dc4.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240)
### GO database analysis 道理和KEGG分析是相似的

g_list = list( gene_up = gene_up, gene_down = gene_down, gene_diff = gene_diff)



go_enrich_results <- lapply( g_list, function( gene ) {
  
  lapply( c( 'BP', 'MF', 'CC' ) , function( ont ) {
    
    cat( paste( 'Now process', ont ) )
    
    ego <- enrichGO( gene          =  gene,
                     
                     universe      =  gene_all,
                     
                     OrgDb         =  org.Hs.eg.db,
                     
                     ont           =  ont ,
                     
                     pAdjustMethod =  "BH",
                     
                     pvalueCutoff  =  0.99,
                     
                     qvalueCutoff  =  0.99,
                     
                     readable      =  TRUE)
    
    print( head( ego ) )
    
    return( ego )
    
  })
  
}) #运行需要一定的时间

save( go_enrich_results, file = 'go_enrich_results.Rdata' )

load("go_enrich_results.Rdata")

n1 = c( 'gene_up', 'gene_down', 'gene_diff' )

n2 = c( 'BP', 'MF', 'CC' ) 

for ( i in 1:3 ){
  
  for ( j in 1:3 ){
    
    fn = paste0( 'dotplot_', n1[i], '_', n2[j], '.png' )
    
    cat( paste0( fn, '\n' ) )
    
    png( fn, res = 150, width = 1080 )
    
    print( dotplot( go_enrich_results[[i]][[j]] ) )
    
    dev.off()
    
  }
  
} #一下子把BP、MF、CC的up down diff 图全部生成并保存 
gene_up_CC.png

参考来源:生信技能树

友情链接:

课程分享
生信技能树全球公益巡讲
https://mp.weixin.qq.com/s/E9ykuIbc-2Ja9HOY0bn_6g
B站公益74小时生信工程师教学视频合辑
https://mp.weixin.qq.com/s/IyFK7l_WBAiUgqQi8O7Hxw
招学徒:
https://mp.weixin.qq.com/s/KgbilzXnFjbKKunuw7NVfw

欢迎关注公众号:青岛生信菜鸟团

相关文章

网友评论

      本文标题:比较特异性乳腺癌相关成纤维细胞(CAFs)耐药与敏感患者的分子特

      本文链接:https://www.haomeiwen.com/subject/jwnpwqtx.html