tidyr核心函数
rm(list = ls())
options(stringsAsFactors = F)
if(!require(tidyr))install.packages("tidyr")
library(tidyr)
一、数据清理
# 原始数据
test <- data.frame(geneid = paste0("gene",1:4),
sample1 = c(1,4,7,10),
sample2 = c(2,5,0.8,11),
sample3 = c(0.3,6,9,12))
test

### 宽变长
test_gather <- gather(data = test,
key = sample_nm, #将sample重新形成新向量并命名sample_nm
value = exp #将test数据框里的表达数据形成新向量并命名exp
)
head(test_gather)

test_gather <- gather(data = test,
key = sample_nm, #将sample重新形成新向量并命名sample_nm
value = exp, #将test数据框里的表达数据形成新向量并命名exp
- geneid)
head(test_gather)

### 长变宽
test_re <- spread(data = test_gather,
key = sample_nm,
value = exp)
head(test_re)

二、分割和合并
### 原始数据
test <- data.frame(x = c( "a,b", "a,d", "b,c"));test

### 分割
test_seprate <- separate(test,x, c("X", "Y"),sep = ",");test_seprate

### 合并
test_re <- unite(test_seprate,"aaa",X,Y,sep = ":");test_re

三、处理NA
### 原始数据
X<-data.frame(X1 = LETTERS[1:5],X2 = 1:5)

X[2,2] <- NA
X[4,1] <- NA

### 1.去掉含有NA的行,可以选择只根据某一列来去除
drop_na(X)

drop_na(X,X1)

drop_na(X,X2)

### 2.替换NA
####方法1
X$X2[is.na(X$X2)]<-66

####方法2
replace_na(X$X2,0)

### 3.用上一行的值填充NA
X
fill(X,X2)

完整操作,查看小抄
RStudio Cheatsheets - RStudio
网友评论