美文网首页生物信息学R语言ChIP-seq
③ valr实例计算ChIP-seq中Upstream + Ge

③ valr实例计算ChIP-seq中Upstream + Ge

作者: 热衷组培的二货潜 | 来源:发表于2018-11-26 22:14 被阅读6次

    这里只考虑正向的,如果有反向的可以先转为正向。

    # Summarizing interval coverage across genomic features
    bedfile <- valr_example("genes.hg19.chr22.bed.gz")
    genomefile <- valr_example("hg19.chrom.sizes.gz")
    bgfile  <- valr_example("hela.h3k4.chip.bg.gz")
    
    genes <- read_bed(bedfile, n_fields = 6)
    genome <- read_genome(genomefile)
    y <- read_bedgraph(bgfile)
    # generate 1 bp TSS intervals, "+" strand only
    tss <- genes %>%
      filter(strand == "+") %>%
      mutate(end = start + 1)
    
    # 1000 bp up and downstream
    region_size <- 2000
    # 50 bp windows
    win_size <- 100
    bin_number <- region_size *2 /win_size
    # add slop to the TSS, break into windows and add a group
    # bed_slop函数扩展区域空间,延长区间上下游,这里延长上下游1KB
    # bed_makewindows 扩展后的bed区间以50bp为一个bin进行划分
    x <- tss %>%
      bed_slop(genome, both = region_size) %>%
      bed_makewindows(genome, win_size)
    # map signals to TSS regions and calculate summary statistics.
    # 使用bed_map函数统计每一个bin中的总reads数目
    res <- bed_map(x, y, win_sum = sum(value, na.rm = TRUE)) %>%
      group_by(.win_id) %>%
      summarize(win_mean = mean(win_sum, na.rm = TRUE),
                win_sd = sd(win_sum, na.rm = TRUE))
    
    library(ggplot2)
    
    x_labels <- seq(-region_size, region_size, by = win_size * 5)
    x_breaks <- seq(1, bin_number + 1, by = 5)
    
    sd_limits <- aes(ymax = win_mean + win_sd, ymin = win_mean - win_sd)
    
    p <- ggplot(res, aes(x = .win_id, y = win_mean)) +
      geom_point(size = 0.25) + geom_pointrange(sd_limits, size = 0.1) +
      scale_x_continuous(labels = x_labels, breaks = x_breaks) +
      xlab("Position (bp from TSS)") + ylab("Signal") +
      theme_classic()
    
    TSS_Plot <- ggplot(res, aes(x = .win_id, y = win_mean)) +
      geom_point(size = 0.25) + geom_line(size = 1) + 
      scale_x_continuous(labels = c("-2Kb","TSS", "2Kb"), breaks = seq(0,40, by = bin_number/2))  +
      xlab("Position (bp from TSS)") + ylab("Signal") +
      theme_bw() + 
      theme(legend.key=element_rect(linetype='dashed',color="white"),
            axis.text.y = element_text(size=20),axis.text.x = element_text(size=20),
            legend.title = element_text(size=15),legend.text = element_text(size=15),
            legend.key.height=unit(1.2,'cm')) 
    
    
    ## 绘制TSS + GeneBody + TES 
    ## 生成genebody的counts数
    gene_body <- genes %>%
      filter(strand == "+")
    
    gene_body_x <- gene_body %>%
      bed_makewindows(genome, num_win = bin_number/2)
    
    gene_body_res <- bed_map(gene_body_x, y, win_sum = sum(value, na.rm = TRUE)) %>%
      group_by(.win_id) %>%
      summarize(win_mean = mean(win_sum, na.rm = TRUE),
                win_sd = sd(win_sum, na.rm = TRUE))
    gene_body_res[,1] <- c((bin_number/2 + 1):bin_number)
    up_stream <- res[1:bin_number/2,]
    # down_stream <- matrix(data = NA, ncol = 3, nrow = 20)
    down_stream <- res[(bin_number/2 + 1):bin_number,]
    down_stream[, 1] <- c((bin_number + 1):(bin_number/2 + bin_number))
    new_data <- rbind(up_stream , gene_body_res, down_stream)
    ## ggplot2进行可视化
    # 1000 bp up and downstream
    region_size <- 2000
    # 50 bp windows
    win_size <- 100
    
    
    ggplot(new_data, aes(x = .win_id, y = win_mean)) +
      geom_point(size = 0.25) + geom_line(size = 1)  +
      scale_x_continuous(labels = c("-2Kb","TSS", "TES", "2Kb"), breaks = seq(0,bin_number*3/2, by = bin_number/2)) +
      theme_bw() + xlab("") + ylab("Signal") +
      theme(legend.key=element_rect(linetype='dashed',color="white"),
            axis.text.y = element_text(size=20),axis.text.x = element_text(size=20),
            legend.title = element_text(size=15),legend.text = element_text(size=15),
            legend.key.height=unit(1.2,'cm')) 
    
    
    
    image.png

    从上图明显可以看出此信号明显的富集在TSS区右侧

    相关文章

      网友评论

        本文标题:③ valr实例计算ChIP-seq中Upstream + Ge

        本文链接:https://www.haomeiwen.com/subject/ziiiqqtx.html