美文网首页药物预测
cellMiner药物敏感性分析

cellMiner药物敏感性分析

作者: 小洁忘了怎么分身 | 来源:发表于2023-07-21 16:38 被阅读0次

    cellMiner药物敏感性分析

    1.数据下载

    https://discover.nci.nih.gov/cellminer/home.do



    下载并解压,放在工作目录下

    2.药物数据整理

    rm(list=ls())
    library(stringr)
    library(rio)
    drug0 = rio::import("DTP_NCI60_ZSCORE.xlsx", skip = 8)
    drug0 = drug0[,-c(67,68)]
    k1 = drug0$`Drug name`!="-";table(k1)
    
    ## k1
    ## FALSE  TRUE 
    ##  3766 21087
    
    table(drug0$`FDA status`)
    
    ## 
    ##              - Clinical trial   FDA approved 
    ##          23993            546            314
    
    k2 = drug0$`FDA status`!="-";table(k2)
    
    ## k2
    ## FALSE  TRUE 
    ## 23993   860
    
    drug0 = drug0[k1&k2,]
    drug = apply(drug0[,-(1:6)],2,as.numeric)
    rownames(drug)= drug0$`Drug name`
    
    drug[1:4,1:4]
    
    ##                  BR:MCF7 BR:MDA-MB-231 BR:HS 578T BR:BT-549
    ## METHOTREXATE        0.70         -1.22      -1.89     -0.88
    ## 6-THIOGUANINE       0.48         -0.31      -1.09     -0.44
    ## 6-MERCAPTOPURINE    0.70         -0.44      -0.55     -1.44
    ## Colchicine          0.32            NA         NA        NA
    
    # 缺失值处理
    
    library(impute)
    library(limma)
    
    drug = impute.knn(drug)$data #报错
    
    ## Error in impute.knn(drug): a column has more than 80 % missing values!
    
    a = apply(drug, 2, function(x){sum(is.na(x))/length(x)})
    tail(sort(a),10) # 确实有超过80%NA 的列,要删掉
    
    ##    RE:A498  RE:CAKI-1 BR:HS 578T   LE:K-562  LC:HOP-92  BR:BT-549   BR:T-47D 
    ## 0.04651163 0.04883721 0.05348837 0.05465116 0.05697674 0.06046512 0.06860465 
    ##      LE:SR    LC:EKVX   ME:MDA-N 
    ## 0.07906977 0.19186047 0.84534884
    
    drug = drug[,-which.max(a)]
    drug = impute.knn(drug)$data 
    drug = avereps(drug)
    

    3.表达矩阵整理

    exp0 = rio::import("RNA__RNA_seq_composite_expression.xls",skip = 10)
    exp = as.matrix(exp0[,-(1:6)])
    rownames(exp) = exp0$`Gene name d`
    exp = avereps(exp)
    exp = exp[,-which.max(a)] #上面删掉了一列,这里也必须删掉
    identical(colnames(exp),colnames(drug))
    
    ## [1] TRUE
    

    5.药敏分析

    其实本质上就是基因表达量与药物IC50值的相关性分析

    tinyarray 里的cor.one函数支持一列与所有列的相关性分析

    g = "EGFR"
    dat = t(rbind(exp[g,],drug))
    colnames(dat)[1] = g
    dat[1:4,1:4]
    
    ##                EGFR METHOTREXATE 6-THIOGUANINE 6-MERCAPTOPURINE
    ## BR:MCF7       0.242         0.70          0.48             0.70
    ## BR:MDA-MB-231 3.913        -1.22         -0.31            -0.44
    ## BR:HS 578T    2.428        -1.89         -1.09            -0.55
    ## BR:BT-549     3.215        -0.88         -0.44            -1.44
    
    library(tinyarray)
    re = cor.one(dat,g)
    k = re$p.value<0.05 & abs(re$cor)>0.5;table(k)
    
    ## k
    ## FALSE  TRUE 
    ##   752    15
    
    remini = re[k,]
    remini
    
    ##                    p.value        cor obsnumber
    ## Noscapine     1.740620e-05 -0.5278020        59
    ## Tamoxifen     1.920620e-06 -0.5748512        59
    ## ciclosporin   2.433162e-05 -0.5199893        59
    ## auranofin     1.013795e-07 -0.6280054        59
    ## Staurosporine 6.910490e-06  0.5483959        59
    ## Dasatinib     5.932590e-07  0.5972467        59
    ## XAV-939       3.766265e-05  0.5095013        59
    ## Sapitinib     1.906967e-05  0.5256921        59
    ## Pipamperone   1.515924e-05 -0.5309705        59
    ## BMS-690514    3.847482e-06  0.5607794        59
    ## Bafetinib     3.823063e-05 -0.5091358        59
    ## spebrutinib   8.142489e-07  0.5913741        59
    ## S-63845       5.427220e-06 -0.5535653        59
    ## AZD-5991      5.042577e-05 -0.5022987        59
    ## BLU-667       5.958846e-07  0.5971656        59
    

    可以根据相关系数与p值筛选药物-基因对啦

    6.可视化

    相关性点图

    library(ggpubr)
    pdat = data.frame(dat[,c("Tamoxifen","EGFR")],
                      check.names = F)
    colnames(pdat)
    
    ## [1] "Tamoxifen" "EGFR"
    
    sp1 <- ggscatter(pdat, x = "Tamoxifen", y = "EGFR",
                     add = "reg.line",  # Add regressin line 
                     add.params = list(color = "blue", fill = "lightgray"), # Customize reg. line
                     conf.int = TRUE # Add confidence interval
    ) + stat_cor(method = "pearson")
    sp1
    

    箱线图

    pdat$group = ifelse(pdat$EGFR>median(pdat$EGFR),"high","low")
    
    ggboxplot(pdat,"group","Tamoxifen",fill = "group")+
      stat_compare_means(comparisons = list(c("high","low")),
                         aes(label = after_stat(p.signif)))+
      scale_fill_manual(values = c("#2874C5", "#f87669"))
    

    相关文章

      网友评论

        本文标题:cellMiner药物敏感性分析

        本文链接:https://www.haomeiwen.com/subject/tipkudtx.html