美文网首页
R语言教程,去除重复的蛋白质、基因互作关系

R语言教程,去除重复的蛋白质、基因互作关系

作者: 柳叶刀与小鼠标 | 来源:发表于2020-08-30 14:39 被阅读0次

    有的时候,蛋白质互作关系会出现重复,比如说我们有两列数据框,蛋白质a和蛋白质b可能会出现多次。


    > df <-data.frame(c("a", "a", "a", "c", "c", "c", 'b', 'b'),
    +                 c("a", "b", "c", "a", "b", "c", 'a', 'c'), 
    +                 stringsAsFactors=F)
    > 
    > colnames(df)<-c("c1", "c2")
    > 
    > df
      c1 c2
    1  a  a
    2  a  b
    3  a  c
    4  c  a
    5  c  b
    6  c  c
    7  b  a
    8  b  c
    > 
    > 
    > apply(df, 1, sort)
         [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
    [1,] "a"  "a"  "a"  "a"  "b"  "c"  "a"  "b" 
    [2,] "a"  "b"  "c"  "c"  "c"  "c"  "b"  "c" 
    > 
    > t(apply(df, 1, sort))
         [,1] [,2]
    [1,] "a"  "a" 
    [2,] "a"  "b" 
    [3,] "a"  "c" 
    [4,] "a"  "c" 
    [5,] "b"  "c" 
    [6,] "c"  "c" 
    [7,] "a"  "b" 
    [8,] "b"  "c" 
    > 
    > as.data.frame(t(apply(df, 1, sort)))
      V1 V2
    1  a  a
    2  a  b
    3  a  c
    4  a  c
    5  b  c
    6  c  c
    7  a  b
    8  b  c
    > 
    > unique(as.data.frame(t(apply(df, 1, sort))))
      V1 V2
    1  a  a
    2  a  b
    3  a  c
    5  b  c
    6  c  c
    > 
    > 
    > df_unique <- unique(as.data.frame(t(apply(df, 1, sort))))
    > rownames(df_unique) <- 1:nrow(df_unique)
    > df_unique
      V1 V2
    1  a  a
    2  a  b
    3  a  c
    4  b  c
    5  c  c
    
    
    
    df <-data.frame(c("a", "a", "a", "c", "c", "c", 'b', 'b'),
                    c("a", "b", "c", "a", "b", "c", 'a', 'c'), 
                    stringsAsFactors=F)
    
    colnames(df)<-c("c1", "c2")
    
    df
    
    
    apply(df, 1, sort)
    
    t(apply(df, 1, sort))
    
    as.data.frame(t(apply(df, 1, sort)))
    
    unique(as.data.frame(t(apply(df, 1, sort))))
    
    
    df_unique <- unique(as.data.frame(t(apply(df, 1, sort))))
    rownames(df_unique) <- 1:nrow(df_unique)
    df_unique
    

    相关文章

      网友评论

          本文标题:R语言教程,去除重复的蛋白质、基因互作关系

          本文链接:https://www.haomeiwen.com/subject/tikesktx.html