如何对sequenza的结果进行LOH定义
# Load and process Sequenza-style seg file
# chromosome start.pos end.pos Bf N.BAF sd.BAF depth.ratio
# N.ratio sd.ratio CNt A B LPP
process_sequenza_seg <- function(seg, include_loh, cn_style) {
print(paste("Loading Sequenza seg file ", seg))
seg1 <- read.csv(seg, stringsAsFactors = FALSE, sep = '\t')
# Remove chr prefix if it exists
test_chr <- seg1[1, "chromosome"]
if (grepl("chr", test_chr)) {
# 'chr' prefix exists
seg1[, "chromosome"] <- sub("^chr", "", seg1[, "chromosome"])
}
chroms_a <- seg1[, "chromosome"]
chroms <- as.numeric(chroms_a)
keep_chrom <- !is.na(chroms < 23)
loh_string = "no_LOH_"
loh_snv_data <- NULL
if (include_loh > 0) {
if (include_loh == 1) {
neut <- seg1[, "CNt"] == 2
loh1 <- seg1[, "A"] == 2
keep_loh <- neut & loh1 & keep_chrom
loh_string <- "neut_LOH_"
} else if (include_loh == 2) {
#don't use these other two options!
del <- seg1[, "CNt"] == 1
loh1 <- seg1[, "A"] + seg1[, "B"] == 1
keep_loh <- del & loh1
loh_string <- "del_LOH_"
} else if (include_loh == 3) {
loh_string <- "any_LOH"
not_amp <- seg1[, "CNt"] <= 2
loh1 <- seg1[, "B"] == 0
keep_loh <- not_amp & loh1
}
sequenza_loh <- seg1[keep_loh, ]
vaf_loh_event <- 1- sequenza_loh[, "Bf"]
# the Expands method expects VAFs that were increased relative to the normal
# but Sequenza reports BAF relative to lower Reference_Allele
loh_snv_data <- matrix(nrow = length(vaf_loh_event), ncol = 7,
dimnames = list(c(), c("chr", "startpos", "endpos", "REF", "ALT", "AF_Tumor", "PN_B")))
loh_snv_data[, "chr"] <- as.numeric(sequenza_loh[, "chromosome"])
loh_snv_data[, "startpos"] <- as.numeric(sequenza_loh[, "start.pos"] + 1)
loh_snv_data[, "endpos"] <- as.numeric(sequenza_loh[, "start.pos"] + 1)
loh_snv_data[, "AF_Tumor"] <- as.numeric(vaf_loh_event)
loh_snv_data[, "REF"] <- as.numeric(rep(65, length(vaf_loh_event)))
loh_snv_data[, "ALT"] <- as.numeric(rep(45, length(vaf_loh_event)))
loh_snv_data[, "PN_B"] <- as.numeric(rep(1, length(vaf_loh_event)))
# loh_snv_data[,"PN_B"] = as.numeric(rep(0,length(vaf_loh_event)))
# don't set as LOH because it works better if you don't. Unsure why
}
keep_seg <- seg1[, "end.pos"] - seg1[, "start.pos"] > 1
keep_both <- keep_chrom & keep_seg
# Make input CNV matrix for EXPANDS
seg2 <- matrix(nrow = dim(seg1[keep_both, ][1]), ncol = 4)
colnames(seg2) <- c("chr", "startpos","endpos","CN_Estimate")
seg2[, "chr"] <- as.numeric(seg1[keep_both, "chromosome"])
seg2[, "startpos"] <- as.numeric(seg1[keep_both, "start.pos"])
seg2[, "endpos"] <- as.numeric(seg1[keep_both, "end.pos"])
if(cn_style == 1) {
seg2[, 4] <- as.numeric(seg1[keep_both, "CNt"])
} else if (cn_style == 2) {
logratio <- log2(as.numeric(seg1[keep_both, "depth.ratio"]))
seg2[, 4] <- 2*2^logratio
}
output <- list(seg2, loh_snv_data)
return(output)
}
网友评论