美文网首页
病案首页汇总数据_15Sep2020

病案首页汇总数据_15Sep2020

作者: liang_rujiang | 来源:发表于2020-09-15 17:26 被阅读0次

    项目开始于: 10Sep2020 最后更新于: 14Sep2020

    目的:

    为完成2020年“NCIS医疗质量控制数据收集系统”数据上传,计算“2019年三级、二级综合医院:医疗质量管理控制情况调查表”某些指标而建立

    前提条件

    您需要安装

    操作

    • 如果您的机器已设置环境变量,双击 “_run.bat”即可运行,等待黑框框自动消失,在"res.txt"查看结果,注意该文件生成的时间应当是当前系统时间

    • 如果您的机器未设置环境变量,双击 “quality_data_analysis_RJL_10Sep2020.Rproj”,运行do_part0_run.R后,等待运行结果,在"res.txt"查看结果,注意该文件生成的时间应当是当前系统时间

    文件夹和文件说明(文件夹树见在后面)

    • "res.txt"中查看结果
    • “变量含义对照表.txt”中记录了本次分析中所使用到的变量名及其意义,来自于“HQMS数据对接接口标准.pdf”文档
    • 因为提供的数据是分月的,在“data_source”文件夹中,进行和合并(添加行),“mydata_after_stacking.RData”为合并后的数据(您可以转化后用其他软件做其他的分析)
    • 以".R"结束的代码为真正的分析过程代码,应当很明了,分了不同的版块
    • “do_part0_run.R”中可以设置运行哪些版块,有时候我们并不需要计算所有指标
    • 我成功运行的平台,信息如下
    R version 4.0.2 (2020-06-22)
    Platform: x86_64-w64-mingw32/x64 (64-bit)
    Running under: Windows 7 x64 (build 7601) Service Pack 1
    
    Matrix products: default
    
    locale:
    [1] LC_COLLATE=Chinese (Simplified)_People's Republic of China.936 
    [2] LC_CTYPE=Chinese (Simplified)_People's Republic of China.936   
    [3] LC_MONETARY=Chinese (Simplified)_People's Republic of China.936
    [4] LC_NUMERIC=C                                                   
    [5] LC_TIME=Chinese (Simplified)_People's Republic of China.936    
    
    attached base packages:
    [1] stats     graphics  grDevices utils     datasets  methods  
    [7] base     
    
    other attached packages:
    [1] forcats_0.5.0   stringr_1.4.0   dplyr_1.0.1     purrr_0.3.4    
    [5] readr_1.3.1     tidyr_1.1.1     tibble_3.0.3    ggplot2_3.3.2  
    [9] tidyverse_1.3.0
    
    loaded via a namespace (and not attached):
     [1] Rcpp_1.0.5       cellranger_1.1.0 pillar_1.4.6     compiler_4.0.2  
     [5] dbplyr_1.4.4     tools_4.0.2      jsonlite_1.7.0   lubridate_1.7.9 
     [9] lifecycle_0.2.0  gtable_0.3.0     pkgconfig_2.0.3  rlang_0.4.7     
    [13] reprex_0.3.0     cli_2.0.2        DBI_1.1.0        rstudioapi_0.11 
    [17] haven_2.3.1      xfun_0.16        withr_2.2.0      xml2_1.3.2      
    [21] httr_1.4.2       fs_1.5.0         generics_0.0.2   vctrs_0.3.2     
    [25] hms_0.5.3        grid_4.0.2       tidyselect_1.1.0 glue_1.4.1      
    [29] R6_2.4.1         fansi_0.4.1      readxl_1.3.1     modelr_0.1.8    
    [33] blob_1.2.1       magrittr_1.5     backports_1.1.7  scales_1.1.1    
    [37] ellipsis_0.3.1   rvest_0.3.6      assertthat_0.2.1 colorspace_1.4-1
    [41] utf8_1.1.4       tinytex_0.25     stringi_1.4.6    munsell_0.5.0   
    [45] broom_0.7.0      crayon_1.3.4 
    

    文件夹结构

    quality_data_analysis_RJL_10Sep2020
     ├── data_source
     │   ├── HQMS_db_10_1718.csv
     │   ├── HQMS_db_11_1907.csv
     │   ├── HQMS_db_12_1891.csv
     │   ├── HQMS_db_1_1974.csv
     │   ├── HQMS_db_2 1521.csv
     │   ├── HQMS_db_3_2131.csv
     │   ├── HQMS_db_4_1903.csv
     │   ├── HQMS_db_5_1792.csv
     │   ├── HQMS_db_6_1863.csv
     │   ├── HQMS_db_7_1995.csv
     │   ├── HQMS_db_8_1919.csv
     │   └── HQMS_db_9_1726.csv
     ├── do_part0_functions.R
     ├── do_part0_run.R
     ├── do_part0_setupAndGetData.R
     ├── do_part1_20MainDiseases.R
     ├── do_part2_20MainOpreations.R
     ├── do_part3_16MainTumorsWithoutOpreatins.R
     ├── do_part4_14MainTumorsWithOpreatins.R
     ├── HQMS数据对接接口标准.pdf
     ├── mydata_after_stacking.RData
     ├── quality_data_analysis_RJL_10Sep2020.Rproj
     ├── README.txt
     ├── res.txt
     ├── _run.bat
     ├── 变量含义对照表.txt
     └── 需核对项目.rtf
    

    代码

    _run.bat

    Rscript do_part0_run.R
    exit
    

    do_part0_run.R

    source("do_part0_functions.R", echo = F)
    source("do_part0_setupAndGetData.R", echo = F)
    
    res2file <- T
    if (res2file) sink("res.txt")
    
    my_br(date())
    
    if (T) my_br(1); source("do_part1_20MainDiseases.R", echo = T)
    if (T) my_br(2); source("do_part2_20MainOpreations.R", echo = T)
    if (T) my_br(3); source("do_part3_16MainTumorsWithoutOpreatins.R", echo = T)
    if (T) my_br(4); source("do_part4_16MainTumorsWithOpreatins.R", echo = T)
    
    if (res2file) sink()
    
    

    do_part0_functions.R

    # functions ---------------------------------------------------------------
    
    list2df <- function(list) {
      as_tibble(do.call(rbind, list))
    }
    
    toreg <- function(x) str_c("^", str_replace(x, "\\.", "\\\\."))
    
    mul_detect <- function(mat, strs) {
      out <- list(length = length(strs))
      for (i in  seq_along(strs)) {
        out[[i]] <- str_detect(mat, strs[i])
      }
      reduce(out, `|`)
    }
    
    mymulfilter <- function(df, nm, cond_strs, keep = TRUE) {
      mat_data <- as.matrix(df[, nm])
      vec_logic <- mul_detect(mat_data, cond_strs)
      mat_logic <- matrix(vec_logic, nrow(mat_data), ncol(mat_data))
      ind <- apply(mat_logic,1, any, na.rm = TRUE)
      if (keep) df[ind, ] else df[!ind, ] 
    }
    
    mysum <- function(df) {
      summarise(df, 
                number_cases = n(),
                death_cases = sum(.data$P741 == "5"), 
                days_hopital = sum(P27), 
                cost = sum(P782, na.rm = T))
    }
    
    my_br <- function(number) {
      print("---------------------------------------------------------")
      print(str_c("*************************part ", number, "**************************"))
      print("---------------------------------------------------------")
    }
    

    do_part0_setupAndGetData.R

    
    # setup and prepare data --------------------------------------------------
    
    pri_diag <- "P321"
    diagnosis <- c("P321", "P324", "P327", "P3291", "P3294", 
                   "P3297", "P3281", "P3284", "P3287", "P3271", "P3274")
    op <- c("P490", "P4911", "P4922", "P4533", "P4544", 
            "P45002", "P45014", "P45026", "P45038", "P45050")
    
    
    library(tidyverse)
    file_nms <- dir("./data_source")
    
    out <- list(length = length(file_nms))
    for (i in seq_along(file_nms)) {
      out[[i]] <- read.csv(str_c("./data_source/", file_nms[i]))
    }
    
    mydata <- list2df(out)
    save(mydata, file = "mydata_after_stacking.RData")
    
    mydata2 <- mydata %>% select(P3, P4, P26, P27, P741, 
                                 P321, 
                                 P324, P327, P3291, P3294, P3297, P3281, P3284, P3287, P3271, P3274,
                                 P490, P4911, P4922, P4533, P4544, P45002, P45014, P45026, P45038, P45050,
                                 P782)
    names(mydata2) # please check the api document to confirm
    

    do_part1_20MainDiseases.R

    tempfil <- function(df) {
      mymulfilter(df, pri_diag, keep_diag, TRUE) %>% 
        mymulfilter(diagnosis, "Z37", FALSE) %>%
        mymulfilter(op, drop_op, FALSE)
    }
    
    # filter the interested rows and compute statistics------------------------
    
    keep_diag <- c(str_c("I21.", 0:3), "I21.4", "I21.9") %>% toreg
    drop_op <- c("74.0", "74.1", "74.2", "74.4", "74.99", "37.2") %>% toreg
    mydata2 %>% tempfil %>% mysum
    # ---
    keep_diag <- c("I105", "I106", "I107", "I108", "I109", "I11", "I12", "I13", "I20")
    drop_op <- c("74.0", "74.1", "74.2", "74.4", "74.99", "35", 
                 "36", "37", "38", "39") %>% toreg
    mydata2 %>%
      mymulfilter(diagnosis, keep_diag, TRUE) %>% 
      mymulfilter(diagnosis, "Z37", FALSE) %>%
      mymulfilter(op, drop_op, FALSE) %>%
      mysum
    # ---
    keep_diag <- c("I60", "I61", "I62", "I63")
    drop_op <- c("74.0", "74.1", "74.2", "74.4", "74.99") %>% toreg
    mydata2 %>% tempfil %>% mysum
    # ---
    keep_diag <- c("S06")
    mydata2 %>% tempfil %>% mysum
    # ---
    keep_diag <- c("K25.0", "K25.2", "K25.4", "K25.6",
                   "K26.0", "K26.2", "K26.4", "K26.6", 
                   "K27.0", "K27.2", "K27.4", "K27.6",
                   "K28.0", "K28.2", "K28.4", "K28.6",
                   "K29.0", "K29.2") %>% toreg()
    mydata2 %>% tempfil %>% mysum
    
    # ---
    (keep_diag <- str_c("T0", 1:7) %>% toreg())
    mydata2 %>% tempfil %>% mysum
    # ---
    (keep_diag <- c(str_c("J", 12:16), "J18") %>% toreg())
    mydata2 %>% tempfil %>% mysum
    # ---
    (keep_diag <- "J44" %>% toreg())
    mydata2 %>% tempfil %>% mysum
    # ---
    (keep_diag <- c(str_c("E1", 0:4, ".1"), str_c("E1", 0:4, ".0")) %>% toreg())
    mydata2 %>% tempfil %>% mysum
    # ---
    (keep_diag <- c(str_c("E10", ".", 2:8),
                    str_c("E11", ".", 2:8),
                    str_c("E12", ".", 2:8),
                    str_c("E13", ".", 2:8),
                    str_c("E14", ".", 2:8)
    ) %>% toreg())
    
    mydata2 %>% tempfil %>% mysum
    # ---
    (keep_diag <- "E04."  %>% toreg())
    mydata2 %>% tempfil %>% mysum
    # ---
    (keep_diag <- c("K35.0", "K35.1")  %>% toreg())
    mydata2 %>% tempfil %>% mysum
    # ---
    (keep_diag <- "N40"  %>% toreg())
    
    mydata2 %>% mymulfilter(pri_diag, keep_diag, TRUE) %>%  mysum
    # ---
    (keep_diag <- c("N17","N18", "N19")  %>% toreg())
    mydata2 %>% tempfil %>% mysum
    # ---
    (keep_diag <- c("A40","A41", "A22.7", "A26.7", "A28.001", "A32.7", "B37.7")  %>% toreg())
    mydata2 %>% tempfil %>% mysum
    # ---
    (keep_diag <- str_c("I1", 0:5)  %>% toreg())
    mydata2 %>% tempfil %>% mysum
    # ---
    (keep_diag <- "K85"  %>% toreg())
    mydata2 %>% tempfil %>% mysum
    # ---
    (keep_diag <- c("Z51.1", "Z51.2", "Z51.8")  %>% toreg())
    mydata2 %>% tempfil %>% mysum
    # ---
    (keep_diag <- c("S71", "S72", "S73", "S82", "S83")  %>% toreg())
    mydata2 %>% tempfil %>% mysum
    # ---
    (keep_diag <- c("J45", "J46")  %>% toreg)
    mydata2 %>% tempfil %>% mysum
    # ---
    
    # --- the last disease do not need to compute, Cheng gives it to me
    

    do_part2_20MainOpreations.R

    # operation we interested -------------------------------------------------
    
    (keep_op <- c(str_c("00.", 70:77), str_c("00.", 80:83), str_c("81.", 51:55)))
    mydata2 %>% mymulfilter(op, keep_op) %>% mysum
    
    # --- ok
    
    myfil_sum <- function(keep_op1) {
      mysum(mymulfilter(
        mydata2, 
        c("P490", "P4911", "P4922", "P4533", "P4544", "P45002", "P45014", "P45026", "P45038", "P45050"),
        keep_op1,
        TRUE))
    }
    
    c(str_c("03.0", 1:9), str_c("03.", 40:79), str_c("81.0", 1:9), 
      str_c("81.", 10:38), str_c("81.", 62:66), str_c("84.", 61:68)) %>% toreg %>% myfil_sum
    
    # --- ok
    c(str_c("79.", 31:39), str_c("79.8", 1:9)) %>% toreg %>% myfil_sum()
    
    # --- ok
    c(str_c("01.", 21:59), str_c("02.0", 1:9), str_c("02.", 10:99)) %>%
      toreg %>%
      myfil_sum 
    
    # ---
    
    rfs <- function(str) {
      print(str %>% toreg) 
      str %>% toreg %>% myfil_sum
    }
    
    str_c("00.6", 1:5) %>% rfs  #ok
    str_c("36.1", 0:7) %>% rfs  #ok
    c("00.66", "36.06", "36.07") %>% rfs #ok
    str_c("35.2", 1:8) %>% rfs #ok
    str_c("42.", 41:65) %>% rfs #ok
    str_c("32.", 20:60) %>% rfs #ok
    str_c("52.", 51:96) %>% rfs #ok
    str_c("43.", 50:99) %>% rfs #ok
    str_c("48.", 40:69) %>% rfs #ok
    seq(51.03, 51.99, .01) %>% sprintf("%.2f", .) %>% as.character %>% rfs #ok
    seq(85.21, 85.89, .01) %>% sprintf("%.2f", .) %>% as.character %>% rfs #ok
    c(seq(55.40, 55.69, .01), seq(60.21, 60.69, .01)) %>% sprintf("%.2f", .) %>% as.character %>% rfs #ok
    c(seq(38.02, 38.18, .01), seq(38.30, 38.89, .01), seq(39.00, 39.59, .01)) %>% sprintf("%.2f", .) %>% as.character %>% rfs #ok
    scrfs <- function(str_) str_ %>% sprintf("%.2f", .) %>% as.character %>% rfs
    seq(68.41, 68.90, .01) %>% scrfs()
    c("74.0", "74.1", "74.2", "74.4", "74.99") %>% rfs
    c(seq(72.0, 72.29, .01), seq(73.01, 73.21, .01), seq(73.40, 73.94, .01)) %>% 
      sprintf("%.2f", .) %>%
      toreg %>%
      mymulfilter(mydata2, op, ., T) %>%
      mymulfilter(., diagnosis, "Z37", T) %>%
      mysum() # the last disease do not need to compute, Cheng gives it to me
    

    do_part3_16MainTumorsWithoutOpreatins.R

    t_sum <- function(str) {
      print(str)
      mymulfilter(df = mydata2, nm = diagnosis, cond_strs = toreg(str)) %>%
        mymulfilter(nm = diagnosis, 
                    cond_strs = toreg(c("Z51.001", "Z51.002", "Z51.003", "Z51.101", 
                                        "Z51.102", "Z51.103", "Z51.202", "Z51.203", 
                                        "Z51.204", "Z51.205", "Z51.206", "Z51.207", 
                                        "Z51.502"))) %>%
        mysum
    }
    
    list(
      "C34",
      c("C18", "C19", "C29"),
      "C16",
      "C50",
      "C22",
      "C15",
      "C25",
      "C67",
      "C64",
      c("C54", "D06"),
      "C73",
      "C32",
      "C56",
      "C61",
      "C11",
      str_c("C", 81:85)
    ) %>%
      map(t_sum)
    

    do_part4_14MainTumorsWithOpreatins.R

    to_sum <- function(str_diseases, str_opreations) {
      print(
        c(
          str_diseases, 
          "op ---->", str_opreations)
        )
      
      mymulfilter(df = mydata2, nm = diagnosis, cond_strs = toreg(str_diseases), keep = T) %>%
      mymulfilter(nm = op, cond_strs = toreg(str_opreations), keep = T) %>%
      mysum
    }
    
    mymap2 <- function(myfun, x, y) map2(x, y, myfun)
    
    to_sum %>% 
      mymap2(
        list(
          "C34",
          c("C18", "C19", "C20"),
          "C16",
          "C50",
          "C22",
          "C15",
          "C25",
          "C67",
          "C64",
          c("C53", "D06"),
          "C73",
          "C32",
          "C56"
        ),
        
        list(
          c("32.4", "32.5", "32.6"),
          c("45.7", "48.4", "48.5", "48.6"),
          c("43.5", "43.6", "43.7", "43.9"),
          c("85.4", "85.21"),
          c("50.2", "50.3", "50.4", "50.5"),
          c("42.5", "42.6"),
          c("52.5", "52.7"),
          "57.7",
          c("55.3", "55.5"),
          c("40.59", "65.6", "67.2", "68.4"),
          str_c("06.", 2:5),
          c("30.3", "30.4"),
          c("65.6", "40.59")
        )
      ) # the last disease(13) do not need to compute, Cheng gives it to me, 4 cases
    
    mydata %>%
      dplyr::filter(P7 >= 18) %>%
      mymulfilter(nm = diagnosis, cond_strs = "C61" %>% toreg, keep = T) %>%
      mymulfilter(nm = op, cond_strs = "60.5" %>% toreg, keep = T) %>%
      mysum
    
    # item 13
    mydata2 %>%
      filter(P3 %in% c("00119459", "00116203", "00122887", "00114805")) # no such cases
    

    相关文章

      网友评论

          本文标题:病案首页汇总数据_15Sep2020

          本文链接:https://www.haomeiwen.com/subject/syvfyktx.html