美文网首页
快乐学习笔记

快乐学习笔记

作者: liyin_d64b | 来源:发表于2018-10-03 22:01 被阅读0次

    Linux学习笔记

    2018-10-3
    ls #list
    -l #长格式
    -h #human readable
    -rw------ #(r 读,w 写,x 执行)
    -a   #显示以.开头的隐藏文件
    . #表示当前目录
    .. #表示上一个目录
    cd ~gcdong  #到gcdong的家目录去看看发现他安装了R,miniconda等
    type #显示命令类型
    date #时间管理
    man #manual 查看命令 /keyword N 前一个 n下一个 q 退出
    
    • 马哥练习题:
    1. 查看echo是内部还是外部命令
    type echo
    
    1. 作用?显示注释作用,用于一些批命令中需要注释给用户看的地方,比如前一条命令执行会花很长时间,常规来会用echo显示一条信息让用户知道这个时候比较慢,稍微等待一会。之类的信息

    2. 如何换行

    echo -e “hello \nworld”
    
    1. 横向tab
    echo -t
    
    1. 纵向tab,
    echo -v
    
    1. Printf
    Printf “hello\n”
    Printf “hello"
    
    • 练习题
    1. 在任意文件夹下面创建形如 1/2/3/4/5/6/7/8/9 格式的文件夹系列。
    mkdir -p 1/2/3/4/5/6/7/8/9`
    -p means no error if existing, make parent directories as needed
    
    1. 在创建好的文件夹下面,比如我的是 /Users/jimmy/tmp/1/2/3/4/5/6/7/8/9 ,里面创建文本文件 me.txt
    pwd 
    touch me.txt
    
    1. 在文本文件 me.txt 里面输入内容:
    Vim me.txt
    Go to: http://www.biotrainee.com/
    I love bioinfomatics.
    And you ?
    :w
    :q
    More me.txt
    
    1. 删除上面创建的文件夹 1/2/3/4/5/6/7/8/9 及文本文件 me.txt
    Pwd
    Cd –
    Rm -r 1
    

    五、在任意文件夹下面创建 folder1~5这5个文件夹,然后每个文件夹下面继续创建 folder1~5这5个文件夹:

    Pwd
    Mkdir -p folder_{1..5}/folder_{1..5}
    Ls */
    

    R for data science

    > library(tidyverse)
    > library(nycflights13)
    > by_day <- group_by(flights, year, month, day)
    > summarise(by_day, delay = mean(dep_delay, na.rm = TRUE))
    # A tibble: 365 x 4
    # Groups:   year, month [?]
        year month   day delay
       <int> <int> <int> <dbl>
     1  2013     1     1 11.5 
     2  2013     1     2 13.9 
     3  2013     1     3 11.0 
     4  2013     1     4  8.95
     5  2013     1     5  5.73
     6  2013     1     6  7.15
     7  2013     1     7  5.42
     8  2013     1     8  2.55
     9  2013     1     9  2.28
    10  2013     1    10  2.84
    # ... with 355 more rows
    > by_dest <- group_by(flights, dest)
    > delay <- summarise(by_dest,
    +                    count=n(),
    +                    dist = mean(distance, na.rm = TRUE),
    +                    delay = mean(arr_delay, na.rm = TRUE)
    + )
    > delay
    # A tibble: 105 x 4
       dest  count  dist  delay
       <chr> <int> <dbl>  <dbl>
     1 ABQ     254 1826    4.38
     2 ACK     265  199    4.85
     3 ALB     439  143   14.4 
     4 ANC       8 3370   -2.5 
     5 ATL   17215  757.  11.3 
     6 AUS    2439 1514.   6.02
     7 AVL     275  584.   8.00
     8 BDL     443  116    7.05
     9 BGR     375  378    8.03
    10 BHM     297  866.  16.9 
    # ... with 95 more rows
    > 
    > myda <- read.table("1.txt",sep = "\t",header = T)
    > a <- group_by(myda, Hugo_Symbol)
    > a
    # A tibble: 4,630 x 3
    # Groups:   Hugo_Symbol [21]
       Hugo_Symbol Variant_Classification                              Tumor_Sample_Barcode        
       <fct>       <fct>                                               <fct>                       
     1 KEAP1       "3UTR\tTCGA-94-7557-01A-11D-2122-08\nSYNE1\t3UTR"   TCGA-85-7843-01A-11D-2122-08
     2 RYR2        "3UTR\tTCGA-56-8307-01A-11D-2293-08\nPDCD1\t3UTR"   TCGA-77-8009-01A-11D-2184-08
     3 FAM135B     "3UTR\tTCGA-37-3792-01A-01D-0983-08\nTTN\t3UTR"     TCGA-94-A5I6-01A-21D-A27K-08
     4 PIK3CA      "3UTR\tTCGA-63-A5MM-01A-11D-A26M-08\nKEAP1\t3UTR"   TCGA-56-8082-01A-11D-2244-08
     5 FAM135B     "3UTR\tTCGA-85-8352-01A-31D-2323-08\nMUC16\t3UTR"   TCGA-33-4533-01A-01D-1267-08
     6 CDKN2A      "3UTR\tTCGA-46-6025-01A-11D-1817-08\nCDKN2A\t3UTR"  TCGA-94-A5I4-01A-11D-A26M-08
     7 KMT2D       "3UTR\tTCGA-98-A53I-01A-31D-A25L-08\nFAM135B\t3UTR" TCGA-66-2782-01A-01D-1522-08
     8 KEAP1       "3UTR\tTCGA-43-2576-01A-01D-1522-08\nFAM135B\t3UTR" TCGA-43-5670-01A-21D-2122-08
     9 CDKN2A      "3UTR\tTCGA-85-8664-01A-11D-2395-08\nSYNE1\t3UTR"   TCGA-85-8071-01A-11D-2244-08
    10 PDCD1       "3UTR\tTCGA-77-A5GH-01A-11D-A27K-08\nCSMD3\t3UTR"   TCGA-22-4593-01A-21D-1817-08
    # ... with 4,620 more rows
    > my <- summarise(a,count=n())
    > my
    # A tibble: 21 x 2
       Hugo_Symbol count
       <fct>       <int>
     1 CD274           1
     2 CDKN2A         83
     3 CSMD3         369
     4 FAM135B       186
     5 HLA-A          10
     6 KEAP1          60
     7 KMT2D         150
     8 LRP1B         274
     9 MUC16         453
    10 NFE2L2         79
    # ... with 11 more rows
    > arrange(my,desc(count))
    # A tibble: 21 x 2
       Hugo_Symbol count
       <fct>       <int>
     1 TTN          1212
     2 MUC16         453
     3 TP53          421
     4 CSMD3         369
     5 RYR2          316
     6 SYNE1         290
     7 LRP1B         274
     8 USH2A         268
     9 ZFHX4         255
    10 FAM135B       186
    # ... with 11 more rows
    > 
    > 
    > 
    > myda <- read.table("1.txt",sep = "\t",header = T) %>%
    +   group_by(Hugo_Symbol) %>%
    +   summarise(count=n()) %>%
    +   arrange(desc(count))
    > myda
    # A tibble: 21 x 2
       Hugo_Symbol count
       <fct>       <int>
     1 TTN          1212
     2 MUC16         453
     3 TP53          421
     4 CSMD3         369
     5 RYR2          316
     6 SYNE1         290
     7 LRP1B         274
     8 USH2A         268
     9 ZFHX4         255
    10 FAM135B       186
    # ... with 11 more rows
    > 
    > delay
    # A tibble: 105 x 4
       dest  count  dist  delay
       <chr> <int> <dbl>  <dbl>
     1 ABQ     254 1826    4.38
     2 ACK     265  199    4.85
     3 ALB     439  143   14.4 
     4 ANC       8 3370   -2.5 
     5 ATL   17215  757.  11.3 
     6 AUS    2439 1514.   6.02
     7 AVL     275  584.   8.00
     8 BDL     443  116    7.05
     9 BGR     375  378    8.03
    10 BHM     297  866.  16.9 
    # ... with 95 more rows
    > 
    > by_dest <- group_by(flights, dest)
    > 
    > delay <- summarise(by_dest,
    +                    count = n(),
    +                    dist = mean(distance, na.rm = TRUE),
    +                    delay = mean(arr_delay, na.rm = TRUE)
    + )
    > delay <- filter(delay, count > 20, dest != "HNL")
    > delay
    # A tibble: 96 x 4
       dest  count  dist delay
       <chr> <int> <dbl> <dbl>
     1 ABQ     254 1826   4.38
     2 ACK     265  199   4.85
     3 ALB     439  143  14.4 
     4 ATL   17215  757. 11.3 
     5 AUS    2439 1514.  6.02
     6 AVL     275  584.  8.00
     7 BDL     443  116   7.05
     8 BGR     375  378   8.03
     9 BHM     297  866. 16.9 
    10 BNA    6333  758. 11.8 
    # ... with 86 more rows
    > 
    > ggplot(delay,mapping = aes(x=dist,y=delay))+geom_point(aes(size=count),alpha=1/4)+geom_smooth(se=FALSE)
    `geom_smooth()` using method = 'loess' and formula 'y ~ x'
    > 
    > 
    > 
    > 
    > 
    > 
    > flights %>% 
    +   group_by(year, month, day) %>% 
    +   summarise(mean = mean(dep_delay))
    # A tibble: 365 x 4
    # Groups:   year, month [?]
        year month   day  mean
       <int> <int> <int> <dbl>
     1  2013     1     1    NA
     2  2013     1     2    NA
     3  2013     1     3    NA
     4  2013     1     4    NA
     5  2013     1     5    NA
     6  2013     1     6    NA
     7  2013     1     7    NA
     8  2013     1     8    NA
     9  2013     1     9    NA
    10  2013     1    10    NA
    # ... with 355 more rows
    > flights %>% 
    +   group_by(year, month, day) %>% 
    +   summarise(mean = mean(dep_delay, na.rm = TRUE))
    # A tibble: 365 x 4
    # Groups:   year, month [?]
        year month   day  mean
       <int> <int> <int> <dbl>
     1  2013     1     1 11.5 
     2  2013     1     2 13.9 
     3  2013     1     3 11.0 
     4  2013     1     4  8.95
     5  2013     1     5  5.73
     6  2013     1     6  7.15
     7  2013     1     7  5.42
     8  2013     1     8  2.55
     9  2013     1     9  2.28
    10  2013     1    10  2.84
    # ... with 355 more rows
    > 
    > 
    > not_cancelled <- flights %>% 
    +   filter(!is.na(dep_delay), !is.na(arr_delay))
    > not_cancelled %>% 
    +   group_by(year, month, day) %>% 
    +   summarise(mean = mean(dep_delay))
    # A tibble: 365 x 4
    # Groups:   year, month [?]
        year month   day  mean
       <int> <int> <int> <dbl>
     1  2013     1     1 11.4 
     2  2013     1     2 13.7 
     3  2013     1     3 10.9 
     4  2013     1     4  8.97
     5  2013     1     5  5.73
     6  2013     1     6  7.15
     7  2013     1     7  5.42
     8  2013     1     8  2.56
     9  2013     1     9  2.30
    10  2013     1    10  2.84
    # ... with 355 more rows
    > 
    > 
    > 
    > 
    > delays <- not_cancelled %>% 
    +   group_by(tailnum) %>% 
    +   summarise(
    +     delay = mean(arr_delay)
    +   )
    > delays
    # A tibble: 4,037 x 2
       tailnum   delay
       <chr>     <dbl>
     1 D942DN   31.5  
     2 N0EGMQ    9.98 
     3 N10156   12.7  
     4 N102UW    2.94 
     5 N103US   -6.93 
     6 N104UW    1.80 
     7 N10575   20.7  
     8 N105UW   -0.267
     9 N107US   -5.73 
    10 N108UW   -1.25 
    # ... with 4,027 more rows
    > 
    > 
    > ggplot(data = delays, mapping = aes(x = delay))+ geom_freqpoly(binwidth = 10) 
    > 
    > 
    > 
    > a <- arrange(delays, desc(delay))
    > a
    # A tibble: 4,037 x 2
       tailnum delay
       <chr>   <dbl>
     1 N844MH   320 
     2 N911DA   294 
     3 N922EV   276 
     4 N587NW   264 
     5 N851NW   219 
     6 N928DN   201 
     7 N7715E   188 
     8 N654UA   185 
     9 N665MQ   175.
    10 N427SW   157 
    # ... with 4,027 more rows
    > 
    > 
    > delays <- not_cancelled %>% 
    +   group_by(tailnum) %>% 
    +   summarise(
    +     delay = mean(arr_delay, na.rm = TRUE),
    +     n = n()
    +   )
    > delays
    # A tibble: 4,037 x 3
       tailnum   delay     n
       <chr>     <dbl> <int>
     1 D942DN   31.5       4
     2 N0EGMQ    9.98    352
     3 N10156   12.7     145
     4 N102UW    2.94     48
     5 N103US   -6.93     46
     6 N104UW    1.80     46
     7 N10575   20.7     269
     8 N105UW   -0.267    45
     9 N107US   -5.73     41
    10 N108UW   -1.25     60
    # ... with 4,027 more rows
    > 
    > 
    > ggplot(data = delays, mapping = aes(x = delay, y = n)) + 
    +   geom_point(alpha = 1/10)
    

    相关文章

      网友评论

          本文标题:快乐学习笔记

          本文链接:https://www.haomeiwen.com/subject/awstaftx.html