rm(list=ls())
options(stringsAsFactors = F)
##载入测试数据
load('example.Rdata')
a
# GSM3781295 GSM3781296 GSM3781297 GSM3781298
#NM_214711 1.972502 1.939664 2.047662 2.208423
#NM_214710 2.688776 3.037042 5.280936 3.070719
#NM_214462 6.311992 1.704874 7.960758 1.901060
#NM_214461 6.102595 3.608155 6.625782 3.861742
library(reshape2)
aa <- a
aa$probe_id <- rownames(aa)
b <- melt(data = aa,
id.vars = c("probe_id"),
variable.name = "samples",
value.name = "exprs")
b$samples <- as.character(b$samples)
b
# probe_id samples exprs
#1 NM_214711 GSM3781295 1.972502
#2 NM_214710 GSM3781295 2.688776
#3 NM_214462 GSM3781295 6.311992
#4 NM_214461 GSM3781295 6.102595
#5 NM_214711 GSM3781296 1.939664
#6 NM_214710 GSM3781296 3.037042
#7 NM_214462 GSM3781296 1.704874
#8 NM_214461 GSM3781296 3.608155
#9 NM_214711 GSM3781297 2.047662
#10 NM_214710 GSM3781297 5.280936
#11 NM_214462 GSM3781297 7.960758
#12 NM_214461 GSM3781297 6.625782
#13 NM_214711 GSM3781298 2.208423
#14 NM_214710 GSM3781298 3.070719
#15 NM_214462 GSM3781298 1.901060
#16 NM_214461 GSM3781298 3.861742
save(b,file = 'b.Rdata')
melt_by_cat <- function(df) {
df_new <- data.frame()
for (i in 1:ncol(df)) {
tmp <- as.data.frame(df[,i])
colnames(tmp) <- c("exprs")
tmp$probe_id <- rownames(df)
tmp$samples <- colnames(df)[i]
df_new <- rbind(df_new,tmp)
}
index <- c('probe_id','samples','exprs')
df_new <- df_new[index]
return(df_new)
}
re1 <- melt_by_cat(a)
##判断re1和b是否一致
identical(b,re1)
#[1] TRUE
d <- dcast(data = b, probe_id ~samples)
d
# probe_id GSM3781295 GSM3781296 GSM3781297 GSM3781298
#1 NM_214461 6.102595 3.608155 6.625782 3.861742
#2 NM_214462 6.311992 1.704874 7.960758 1.901060
#3 NM_214710 2.688776 3.037042 5.280936 3.070719
#4 NM_214711 1.972502 1.939664 2.047662 2.208423
dcast_by_cat <- function(df0) {
sp_index <- unique(df0$samples)
create_df <- function(x,index,i) {
df <- x[x$samples == index[i],]
df <- df[,-2]
colnames(df)[2] <- index[i]
return(df)
}
dcast_df <- create_df(df0,sp_index,1)
for (i in 2:length(sp_index)) {
tmp <- create_df(df0,sp_index,i)
dcast_df <- merge(dcast_df,tmp)
}
return(dcast_df)
}
re2 <- dcast_by_cat(b)
##判断re2和d结果是否一致
identical(d,re2)
#[1] TRUE
收获:
-
学习了
melt()
和dcast()
最基本的用法
1)melt()
得到的数据框,总行数=原数据框行数*列数,
每行3个数据,分别对应原数据框每个单元格的行名,列名,数值
b <- melt(data = aa,id.vars = c("probe_id"),variable.name = "samples",value.name = "exprs")
2)
dcast()
得到的数据框,
d <- dcast(data = b,probe_id ~samples)
以~号前的列做行名,~号后为列名,重排数据
存在问题:
- 还是未能掌握apply函数的使用方法,尝试使用
apply()
改写melt()
,得到的结果是一个List
melt_by_cat_2 <- function(df) {
df_new <- data.frame()
id <- 0
df_new <- apply(df,2,function(x){
tmp <- as.data.frame(x)
colnames(tmp) <- c("exprs")
tmp$probe_id <- rownames(df)
id <- id + 1
tmp$samples <- colnames(df)[id]
tmp <- as.data.frame(tmp)
df_new <- as.data.frame(rbind(df_new,tmp))
})
return(df_new)
}
re <- melt_by_cat_2(a)
网友评论