作业来源于jimmy

#首先安装BiocManager
install.packages("BiocManager")
#安装org.Hs.eg.db
BiocManager::install("org.Hs.eg.db")
#我的思想是把g2s和g2e合并之后,再和query_id(所需要查询的基因ID)合并
#按照jimmy的提示
library(org.Hs.eg.db)
g2s=toTable(org.Hs.egSYMBOL)
g2e=toTable(org.Hs.egENSEMBL)
#看一下数据结构
> head(g2s)
gene_id symbol
1 1 A1BG
2 2 A2M
3 3 A2MP1
4 9 NAT1
5 10 NAT2
6 11 NATP
> head(g2e)
gene_id ensembl_id
1 1 ENSG00000121410
2 2 ENSG00000175899
3 3 ENSG00000256069
4 9 ENSG00000171428
5 10 ENSG00000156006
6 12 ENSG00000196136
#按gene_id合并
total_table <- merge(g2s,g2e,by="gene_id")
#再看
> head(total_table)
gene_id symbol ensembl_id
1 1 A1BG ENSG00000121410
2 10 NAT2 ENSG00000156006
3 100 ADA ENSG00000196839
4 1000 CDH2 ENSG00000170558
5 10000 AKT3 ENSG00000117020
6 10000 AKT3 ENSG00000275199
#我发现要查找的基因ID在这里没有?把小数点后的东西去掉
query_id <- data.frame(ensembl_id=c("ENSG00000000003","ENSG00000000005","ENSG00000000419","ENSG00000000457","ENSG00000000460","ENSG00000000938"))
#再merge
query_symbol <- merge(total_table,query_id,by="ensembl_id")
> head(query_symbol)
ensembl_id gene_id symbol
1 ENSG00000000003 7105 TSPAN6
2 ENSG00000000005 64102 TNMD
3 ENSG00000000419 8813 DPM1
4 ENSG00000000457 57147 SCYL3
5 ENSG00000000460 55732 C1orf112
6 ENSG00000000938 2268 FGR
#结束

#比上一个还简单,但这次我们换一个思路
#首先把要查找的gene_id做成文本框,一个个的输入很是麻烦,偷个懒,用shell
#直接从浏览器复制gene_id,shell里面vi一个文本文档,粘进去
(base) [jkyin@mn02 yjk]$ for i in `cat name.txt`;do echo \"$i\" | tr "\n" ",";done
"1053_at","117_at","121_at","1255_g_at","1316_at","1320_at","1405_i_at","1431_at","1438_at","1487_at","1494_f_at","1598_g_at","160020_at","1729_at","177_at",
#搞到R里面
gene_id <- data.frame(probe_id = c("1053_at","117_at","121_at","1255_g_at","1316_at","1320_at","1405_i_at","1431_at","1438_at","1487_at","1494_f_at","1598_g_at","160020_at","1729_at","177_at"))
#载入包
library(hgu133a.db)
ids=toTable(hgu133aSYMBOL)
#查找probe_id的交集index并从ids中取出即可
ids[match(gene_id$probe_id, ids$probe_id),]
probe_id symbol
1 1053_at RFC2
2 117_at HSPA6
3 121_at PAX8
4 1255_g_at GUCA1A
5 1316_at THRA
6 1320_at PTPN21
7 1405_i_at CCL5
8 1431_at CYP2E1
9 1438_at EPHB3
10 1487_at ESRRA
11 1494_f_at CYP2A6
12 1598_g_at GAS6
13 160020_at MMP14
14 1729_at TRADD
15 177_at PLD1
#结束

> rm(list=ls())
> suppressPackageStartupMessages(library(CLL))
> data(sCLLex)
> exprSet=exprs(sCLLex)
> library(hgu95av2.db)
> ids=toTable(hgu95av2SYMBOL)
> tp53 <- ids[ids$symbol=="TP53",]
> tp53_exprset <- exprSet[match(tp53$probe_id,rownames(exprSet)),]
> boxplot(t(tp53_exprset))

网友评论