美文网首页
R基础-向量,dataframe,dplyr操作

R基础-向量,dataframe,dplyr操作

作者: 余绕 | 来源:发表于2022-12-17 15:26 被阅读0次

    1. 向量操作

    A. creat a vector

    v1<-c(1,2,3,4,5,6,7,8,9) # creat a vector
    

    B. Get values

    v1[c(2)] # obtain a value
    [1] 2
    
    v1[c(3:5)] #obtain values
    [1] 3 4 5
    

    C. assign names to the vector elements

    names(v1)=c("one","two","three","four","five","six","seven","eight","night")
    
    v1 # content of v1
      one   two three  four  five   six seven eight night 
        1     2     3     4     5     6     7     8     9 
    

    D. get a value from its name

    v1["two"] 
    two 
      2 
    v1[v1>3] #get list content basing on logic values
    four  five   six seven eight night 
        4     5     6     7     8     9 
    
    

    2. Dataframe

    A. 创建dataframe

    df<-data.frame(row.names = c('张三','李四','王武','赵六','田七'),
                性别= c('男','女','女','男','女'),
                年龄= c( 39,65,34,28,76),
                成绩=c(89,99,65,39,10))
    
    df
        性别 年龄 成绩
    张三   男   39   89
    李四   女   65   99
    王武   女   34   65
    赵六   男   28   39
    田七   女   76   10
    

    B. 1st column,all rows

    df[1,]
     性别 年龄 成绩
    张三   男   39   89
    

    C. all columns, first row

    df[,1]
    > df[,1]
    [1] "男" "女" "女" "男" "女"
    

    D. select data basing on the score

    df[df$成绩>60,]
      性别 年龄 成绩
    张三   男   39   89
    李四   女   65   99
    王武   女   34   65
    

    E. modify values

    df
       性别 年龄 成绩
    张三   男   39   89
    李四   女   65   99
    王武   女   34   65
    赵六   男   28   39
    田七   女   76   10
    
    df$成绩=df$成绩+100
    
    df
        性别 年龄 成绩
    张三   男   39  189
    李四   女   65  199
    王武   女   34  165
    赵六   男   28  139
    田七   女   76  110
    

    dplyr软件包学习

    library(dplyr)
    

    A. import data

    de_test<-read.table("K:/生信学习/R语言基础绘图/R_basic/data/R_basic/de_result.txt",header=T)
    de_test
     gene_id logFC pvalue    FDR
    1   gene1   3.0  0.010 0.0300
    2   gene2   1.0  0.300 0.3800
    3   gene3   2.0  0.002 0.0025
    4   gene4  -3.0  0.002 0.0030
    5   gene5  -0.4  0.004 0.0048
    

    B. select data basing on specific criterion---------Basic operation in R

    de_test[de_test$logFC>=1 &de_test$pvalue<=0.05,]
      gene_id logFC pvalue    FDR
    1   gene1     3  0.010 0.0300
    3   gene3     2  0.002 0.0025
    

    using dplyr

    C. filter function #按行筛选

    filter(de_test,logFC>= 1 & pvalue<=0.05)
      gene_id logFC pvalue    FDR
    1   gene1     3  0.010 0.0300
    2   gene3     2  0.002 0.0025
    

    D. select #按列筛选

    select(de_test,gene_id,logFC,pvalue)
    gene_id logFC pvalue
    1   gene1   3.0  0.010
    2   gene2   1.0  0.300
    3   gene3   2.0  0.002
    4   gene4  -3.0  0.002
    5   gene5  -0.4  0.004
    select(de_test,-pvalue)
    gene_id logFC pvalue
    1   gene1   3.0  0.010
    2   gene2   1.0  0.300
    3   gene3   2.0  0.002
    4   gene4  -3.0  0.002
    5   gene5  -0.4  0.004
    

    E. add a new column---mutate

    mutate(de_test,FC=2**logFC)
    gene_id logFC pvalue    FDR        FC
    1   gene1   3.0  0.010 0.0300 8.0000000
    2   gene2   1.0  0.300 0.3800 2.0000000
    3   gene3   2.0  0.002 0.0025 4.0000000
    4   gene4  -3.0  0.002 0.0030 0.1250000
    5   gene5  -0.4  0.004 0.0048 0.7578583
    

    F. sort the dataframe

    arrange(de_test,logFC) #default is descending order
      gene_id logFC pvalue    FDR
    1   gene4  -3.0  0.002 0.0030
    2   gene5  -0.4  0.004 0.0048
    3   gene2   1.0  0.300 0.3800
    4   gene3   2.0  0.002 0.0025
    5   gene1   3.0  0.010 0.0300
    arrange(de_test,desc(logFC))  #ascending order 
     gene_id logFC pvalue    FDR
    1   gene1   3.0  0.010 0.0300
    2   gene3   2.0  0.002 0.0025
    3   gene2   1.0  0.300 0.3800
    4   gene5  -0.4  0.004 0.0048
    5   gene4  -3.0  0.002 0.0030
    

    G. 利用管道(using the pipe %>%)

    slected<-de_test %>% filter(logFC>= 1 & pvalue<=0.05) %>% select(-pvalue) %>% mutate(FC=2**logFC) %>% arrange(logFC)
    slected
      gene_id logFC    FDR FC
    1   gene3     2 0.0025  4
    2   gene1     3 0.0300  8
    

    Import data

    gene_fuction<-read.table("K:/生信学习/R语言基础绘图/R_basic/data/R_basic/gene_function.txt",header=T,sep="\t")
    gene_fuction
      gene_name annotation
    1     gene1        aaa
    2     gene2        bbb
    3     gene3        ccc
    4     gene5        ddd
    

    G. Merge dataframe basing on columns

    left_join(slected,gene_fuction,by = c('gene_id'='gene_name')) #以左表为标准
      gene_id logFC    FDR FC annotation
    1   gene3     2 0.0025  4        ccc
    2   gene1     3 0.0300  8        aaa
    right_join(slected,gene_fuction,by = c('gene_id'='gene_name'))#以右表为标准
    1   gene3     2 0.0025  4        ccc
    2   gene1     3 0.0300  8        aaa
    3   gene2    NA     NA NA        bbb
    4   gene5    NA     NA NA        dd
    

    H. 存在重复的情况

    gene_fuction<-edit(gene_fuction)
    
    gene_fuction
    gene_name annotation
    1     gene1        aaa
    2     gene2        bbb
    3     gene3        ccc
    4     gene5        ddd
    5     gene1        xxx
    6     gene3        new
    left_join(slected,gene_fuction,by = c('gene_id'='gene_name')) #以左表为标准
      gene_id logFC    FDR FC annotation
    1   gene3     2 0.0025  4        ccc
    2   gene3     2 0.0025  4        new
    3   gene1     3 0.0300  8        aaa
    4   gene1     3 0.0300  8        xxx
    

    相关文章

      网友评论

          本文标题:R基础-向量,dataframe,dplyr操作

          本文链接:https://www.haomeiwen.com/subject/pifsqdtx.html