百迈克R语言培训笔记Day3

作者: ShawnMagic | 来源:发表于2019-04-02 09:12 被阅读0次

百迈克R语言培训笔记Day3
百迈克R语言培训笔记Day2
百迈克R语言培训笔记Day1
学习小组Day4笔记--呛
学习小组Day4--呛
2020-05-14
学习小组Day4笔记——冬梅
学习小组Day4笔记--行
学习小组Day5笔记--慧美
百度切片下载百度静态图

忙着赶中期，没时间整理了，中期完了再整理吧 😂😂😂
主要是ggplot2的实操，画了几个比较经典的图，ggplot2可折腾性还是挺高的...

title: "BMK_trainning_Day3"
author: "Dr Shawn Wang"
date: "2019/4/1"
output: html_document

ggplot2数据实操

stat = "identity"绘图函数里的stat参数表示对样本点做统计的方式，默认为identity，表示一个x对应一个y，同时还可以是bin，表示一个x对应落到该x的样本数。”说白了就是，identity提取横坐标x对应的y值，bin提取横坐标x的频数

################# 样品COG功能分类 ##########################
# load file
data <- read.delim("class/barplot/Sample_Cog.classify.stat",# read.delim可以读取 # 注释掉的信息。 
                   header = T,
                   sep = "\t", 
                   check.names = F, # 不检查列名，因为ID前面有#，被注释掉了
                   stringsAsFactors = F)
# check structure
str(data)
head(data)
# new data frame clearing data
df <- data.frame(group = data$`#ID`,
                 Frequency = data$Numbers)
# paste $ID and $annotation as labels
labels = paste(data$`#ID`, data$Class_Name, sep = ": ")
library(ggplot2)
# base graphic
p <- ggplot(data = df,
            aes(x = group, y = Frequency))
# as a barplot 
p <- p + geom_bar(stat = "identity",
                  mapping = aes(color = group))
# another plot manner
p <- ggplot(df, aes(x = group,
                    y = Frequency,
                    fill = group))
p <- p + geom_bar(stat = 'identity')
# legend modification
p <- p + scale_fill_discrete(name = "",# delete the name of legend
                             breaks = sort(df[, 1]),# break the keys, because the keys are not continuous variations
                             labels = sort(labels),# change labels of keys
                             guide = guide_legend(ncol = 1))# guide 图例
p
# new theme of graphic
p <- p + theme(
  axis.title = element_text(face = 2, size = 12),
  axis.text = element_text(face = 2, size = 10),
  legend.text = element_text(face = 2, size = 10),
  legend.key.size = unit(0.5, "cm"), 
  plot.title = element_text(face = 2, size = 14, hjust = 0.5)# hjust 
)
# lables of titles
p1 <- p + labs(x = "Function Class",
              title = "COG Function Classification of consensus sequence")
p1

折线图

setwd("class/lineplot/")
files <- list.files(path = ".", pattern = ".r2$", 
                    all.files = T, full.names = T)# 正则$表示已r2结尾的, all.flie 所有文件，路径名
data<-NULL##初始化
for (i in files) {
  tmp<-read.table(file = i,header = T,sep = '\t',stringsAsFactors = F)
  id<-basename(i)
  print(id)
  id<-sub(pattern = '.r2$',replacement = '',x = id)
  print(id)
  tmp$x<-tmp$Total_dis*1000/tmp$Total_num
  tmp$y<-tmp$Total_R2/tmp$Total_num
  tmp$type<-rep(id,nrow(tmp))
  data<-rbind(data,tmp)
}
head(data)
library(ggplot2)
# 绘图
p = ggplot(data = data)
p = p + geom_line(aes(x = x, y = y, color = type), 
                  size = 1.5)
p = p + scale_x_continuous(expand = c(0.01,0))
p = p + scale_y_continuous(expand = c(0.01,0))
p = p + scale_color_discrete(name = "")
p = p + labs(x = "Distance(kb)",
             y = expression(mean~~R^2))
p = p + theme(
  axis.title = element_text(face = 2, size = 16),
  axis.text = element_text(face = 2, size = 12),
  axis.line = element_line(size = 0.8, lineend = "round"),
  panel.grid = element_blank(),
  panel.background = element_blank(),
  legend.position = c(.85,.85),#画布0-1
  legend.key = element_blank(),
  legend.text = element_text(size = 8, face = 2),
  legend.key.size = unit(0.8,"cm"),
  legend.direction = "vertical",
  legend.background = element_blank()
)
p
#dirname()路径

Pie

library(plotrix)
pie = read.table("../pieplot/pie.txt", 
                 header = T, sep = "\t",
                 stringsAsFactors = F)

str(pie)
head(pie)
pie$ration = round(pie$total/sum(pie$total),4)# round 保留4位有效数字
pie
pie3D(pie$ration, col = rainbow(6), 
      border = NA,
      labels = lab,
      labelcex = 0.5,
      explode = 0.1,
      height = 0.08)
lab = paste(pie$type,":",pie$ration,"%", sep = "")

title: "Day3_heatmap"
author: "Dr Shawn Wang"
date: "2019/4/1"
output: html_document

library("ggplot2")
getwd()
 setwd("../../")
m3 <- read.csv("heatmap(1).csv", header = T)
head(m3)
# y轴排序
m3$State<- factor(m3$State, levels = rev(levels(m3$State)))
m3$IncidenceFactor = cut(m3$Incidence,# 将一组向量按照指定的规则生成标签
                         breaks = c(-1,0,10,100,50,500,1000,max(m3$Incidence, na.rm = T)),
                         labels = c("0", "0-1", "1-10", "10-100", "100-500","500-1000", ">1000"))
m3$IncidenceFactor<- factor(m3$IncidenceFactor, levels = rev(levels(m3$IncidenceFactor)))
p = ggplot(m3,
           aes(x = factor(Year), y =State, fill = Incidence)) + 
  geom_tile(color = "white", size = 0.25)
p = p + labs(x = "",
             y = "")

p + scale_y_discrete(expand = c(0,0)) +
        scale_x_discrete(expand = c(0,0), breaks = c("1930", "1940", "1950", "1960", "1970", "1980","1990", "2000")) + 
  coord_fixed(ratio = 1) + #固定长宽比
  theme_gray(base_size = 8)+
  theme(axis.title = element_text(face = "bold", size = 8)) + 
  theme(axis.ticks = element_line(size = 0.7),
        plot.background = element_blank(),
        panel.background = element_blank())
textcol <- "grey40"# 用变量保存常用代码

p <- ggplot(m3, 
            aes(x = factor(Year), y =State, fill = IncidenceFactor))+
  geom_tile(color = "white", size = 0.25) + 
  labs(x = "", 
       y = "",
       title = "Incidence of Measles in American")+
  scale_y_discrete(expand = c(0,0)) +
        scale_x_discrete(expand = c(0,0), breaks = c("1930", "1940", "1950", "1960", "1970", "1980","1990", "2000")) + 
  coord_fixed(ratio = 1) + #固定长宽比
  theme_gray(base_size = 8)+
  theme(axis.title = element_text(face = "bold", size = 8)) + 
  theme(axis.ticks = element_line(size = 0.7),
        plot.background = element_blank(),
        panel.background = element_blank()) + 
  scale_fill_manual(values = c("#d53e4f", "#f46d43", 
                               "#fdae61", "#fee08b",
                               "#e6ff98", "#abdda4", 
                               "#ddf1da"), na.value = "grey90") +
  theme_grey(base_size = 10) + 
  theme(legend.position = "right",
        legend.direction = "vertical",
        legend.title = element_blank(),
        legend.text = element_text(colour = textcol,
                                   size = 7,
                                   face = 2),
        legend.key.height = grid::unit(0.8,"cm"),
        legend.key.width  = grid::unit(0.2,"cm"),
        axis.text.x = element_text(size = 10, color = textcol),
        axis.text.y = element_text(vjust = 0.2, colour = textcol),
        axis.ticks = element_line(size = 0.4),
        plot.title = element_text(colour = textcol,
                                  size = "bold",
                                  hjust = 0.5))
p

# install.packages("gridExtra","ggpubr")
library(gridExtra)
library(ggpubr)
library(magrittr)
# 数据清洗
mtcars$name <- rownames(mtcars)
mtcars$cyl <- as.factor(mtcars$cyl)
ToothGrowth$dose <- factor(ToothGrowth$dose)
head(mtcars[,c("name", "wt","mpg", "cyl")])
bxp<- ggplot(data = ToothGrowth, 
             mapping = aes(x = factor(dose),
                           y = len,
                           color = dose)) +
  geom_boxplot()
dp <- ggplot(ToothGrowth,
             aes(x = factor(dose),
                 y = len,
                 color = factor(dose),
                 fill = factor(dose)))+
  geom_dotplot(binwidth = 1, 
               binaxis = "y",
               stackdir = "center",
               position = "dodge")
bp <- ggbarplot(mtcars,
                x = "name",
                y = "mpg", 
                fill = "cyl", 
                color = "white",
                palette = "jco",
                sort.val = "asc",
                sort.by.groups = TRUE,
                x.text.angle = 90)
bp + font("x.text", size = 8)
ggarrange(bxp,dp,bp + rremove("x.text"),
          labels = c("A","B","C"), ncol = 1,nrow = 3)
ggarrange(bxp, dp, labels= c("A","B"), common.legend = T)

# 图叠加
sp <-ggscatter(data = iris,
               x = "Sepal.Length",
               y = "Sepal.width",
               color = "Species",
               palette = "jco",
               size = 3,
               alpha = 0.6) + border()
"Sepal.Length",
                   fill = "Species",
                   palette = "jco"

ggarrange(bxp,dp,bp+rremove("x.text"),labels = c("A","B","C"), ncol = 1,nrow = 3)
ggarrange(bxp, dp, labels= c("A","B"), common.legend = T)
sp
sp <- ggscatter(data = iris,
               x = "Sepal.Length",
               y = "Sepal.Width",
               color = "Species",
               palette = "jco",
               size = 3,
               alpha = 0.6) + border()
xplot <- ggdensity(iris, "Sepal.Length",
                   fill = "Species",
                   palette = "jco")
xplot <- xplot +clean_theme()
yplot <- ggdensity(iris, "Sepal.Length",
                   fill = "Species",
                   palette = "jco") + rotate()
yplot <- yplot + clean_theme()
ggarrange(xplot, NULL,
          sp,
          yplot,
          ncol = 2,
          nrow = 2,
          common.legend = T,
          align = "hv",
          widths = c(2,1), heights = c(1,2))
density.p <- xplot
stable <- desc_statby(iris, measure.var = "Sepal.Length", 
                      grps = "Species")
stable
stable <- stable[,c("Species", "length", "mean", "sd")]
stable.p <- ggtexttable(stable, rows = NULL, theme = ttheme("mOrange"))
text <- paste("figure 1: wppsgsjb","wppybcl","wppbzdzmb","wpphzcscw","wppjwbhcj","wppsglsj",sep = " ")
text.p <- ggparagraph(text = text, face = "italic", size = 11,
                      color = "black")
text.p
density.p 
stable.p
text.p
ggarrange(density.p, stable.p, text.p, 
          ncol = 1,
          nrow = 3,
          heights = c(1,0.5,0.3))
ggsave("123.png")

ToothGrowth$dose = factor(ToothGrowth$dose)
ToothGrowth$supp = factor(ToothGrowth$supp)
p <- ggplot(ToothGrowth,
            aes(x = dose,
                y = len,
                fill = dose))+
  geom_boxplot(notch = T, outlier.colour = "red",
               outlier.shape = 9,
               outlier.size = 2)# 中位数置信区间
p# 调数据 aes， 调整体在geom
p + theme(legend.position = "none")