美文网首页
R小作业[中级]

R小作业[中级]

作者: kkkkkkang | 来源:发表于2020-07-06 10:33 被阅读0次

作业来源于jimmy

作业1
#首先安装BiocManager
install.packages("BiocManager")
#安装org.Hs.eg.db
BiocManager::install("org.Hs.eg.db")
#我的思想是把g2s和g2e合并之后,再和query_id(所需要查询的基因ID)合并
#按照jimmy的提示
library(org.Hs.eg.db)
g2s=toTable(org.Hs.egSYMBOL)
g2e=toTable(org.Hs.egENSEMBL)
#看一下数据结构
> head(g2s)
  gene_id symbol
1       1   A1BG
2       2    A2M
3       3  A2MP1
4       9   NAT1
5      10   NAT2
6      11   NATP
> head(g2e)
  gene_id      ensembl_id
1       1 ENSG00000121410
2       2 ENSG00000175899
3       3 ENSG00000256069
4       9 ENSG00000171428
5      10 ENSG00000156006
6      12 ENSG00000196136
#按gene_id合并
total_table <- merge(g2s,g2e,by="gene_id")
#再看
> head(total_table)
  gene_id symbol      ensembl_id
1       1   A1BG ENSG00000121410
2      10   NAT2 ENSG00000156006
3     100    ADA ENSG00000196839
4    1000   CDH2 ENSG00000170558
5   10000   AKT3 ENSG00000117020
6   10000   AKT3 ENSG00000275199
#我发现要查找的基因ID在这里没有?把小数点后的东西去掉
query_id <- data.frame(ensembl_id=c("ENSG00000000003","ENSG00000000005","ENSG00000000419","ENSG00000000457","ENSG00000000460","ENSG00000000938"))
#再merge
query_symbol <- merge(total_table,query_id,by="ensembl_id")
> head(query_symbol)
       ensembl_id gene_id   symbol
1 ENSG00000000003    7105   TSPAN6
2 ENSG00000000005   64102     TNMD
3 ENSG00000000419    8813     DPM1
4 ENSG00000000457   57147    SCYL3
5 ENSG00000000460   55732 C1orf112
6 ENSG00000000938    2268      FGR
#结束
作业2
#比上一个还简单,但这次我们换一个思路
#首先把要查找的gene_id做成文本框,一个个的输入很是麻烦,偷个懒,用shell
#直接从浏览器复制gene_id,shell里面vi一个文本文档,粘进去
(base) [jkyin@mn02 yjk]$ for i in `cat name.txt`;do echo \"$i\" | tr "\n" ",";done

"1053_at","117_at","121_at","1255_g_at","1316_at","1320_at","1405_i_at","1431_at","1438_at","1487_at","1494_f_at","1598_g_at","160020_at","1729_at","177_at",

#搞到R里面
gene_id <- data.frame(probe_id = c("1053_at","117_at","121_at","1255_g_at","1316_at","1320_at","1405_i_at","1431_at","1438_at","1487_at","1494_f_at","1598_g_at","160020_at","1729_at","177_at"))
#载入包
library(hgu133a.db)
ids=toTable(hgu133aSYMBOL)
#查找probe_id的交集index并从ids中取出即可
ids[match(gene_id$probe_id, ids$probe_id),]
    probe_id symbol
1    1053_at   RFC2
2     117_at  HSPA6
3     121_at   PAX8
4  1255_g_at GUCA1A
5    1316_at   THRA
6    1320_at PTPN21
7  1405_i_at   CCL5
8    1431_at CYP2E1
9    1438_at  EPHB3
10   1487_at  ESRRA
11 1494_f_at CYP2A6
12 1598_g_at   GAS6
13 160020_at  MMP14
14   1729_at  TRADD
15    177_at   PLD1
#结束
image.png
> rm(list=ls())
> suppressPackageStartupMessages(library(CLL))
> data(sCLLex)
> exprSet=exprs(sCLLex) 
> library(hgu95av2.db)
> ids=toTable(hgu95av2SYMBOL)
> tp53 <- ids[ids$symbol=="TP53",]
> tp53_exprset <- exprSet[match(tp53$probe_id,rownames(exprSet)),]
> boxplot(t(tp53_exprset))
image.png

相关文章

网友评论

      本文标题:R小作业[中级]

      本文链接:https://www.haomeiwen.com/subject/ztcfqktx.html