有的时候,蛋白质互作关系会出现重复,比如说我们有两列数据框,蛋白质a和蛋白质b可能会出现多次。
> df <-data.frame(c("a", "a", "a", "c", "c", "c", 'b', 'b'),
+ c("a", "b", "c", "a", "b", "c", 'a', 'c'),
+ stringsAsFactors=F)
>
> colnames(df)<-c("c1", "c2")
>
> df
c1 c2
1 a a
2 a b
3 a c
4 c a
5 c b
6 c c
7 b a
8 b c
>
>
> apply(df, 1, sort)
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
[1,] "a" "a" "a" "a" "b" "c" "a" "b"
[2,] "a" "b" "c" "c" "c" "c" "b" "c"
>
> t(apply(df, 1, sort))
[,1] [,2]
[1,] "a" "a"
[2,] "a" "b"
[3,] "a" "c"
[4,] "a" "c"
[5,] "b" "c"
[6,] "c" "c"
[7,] "a" "b"
[8,] "b" "c"
>
> as.data.frame(t(apply(df, 1, sort)))
V1 V2
1 a a
2 a b
3 a c
4 a c
5 b c
6 c c
7 a b
8 b c
>
> unique(as.data.frame(t(apply(df, 1, sort))))
V1 V2
1 a a
2 a b
3 a c
5 b c
6 c c
>
>
> df_unique <- unique(as.data.frame(t(apply(df, 1, sort))))
> rownames(df_unique) <- 1:nrow(df_unique)
> df_unique
V1 V2
1 a a
2 a b
3 a c
4 b c
5 c c
df <-data.frame(c("a", "a", "a", "c", "c", "c", 'b', 'b'),
c("a", "b", "c", "a", "b", "c", 'a', 'c'),
stringsAsFactors=F)
colnames(df)<-c("c1", "c2")
df
apply(df, 1, sort)
t(apply(df, 1, sort))
as.data.frame(t(apply(df, 1, sort)))
unique(as.data.frame(t(apply(df, 1, sort))))
df_unique <- unique(as.data.frame(t(apply(df, 1, sort))))
rownames(df_unique) <- 1:nrow(df_unique)
df_unique
网友评论