数据准备:所有数据按cycle分模块保存,MORT_data为生存数据
image.png
数据提取函数
常规数据提取
merge_files <- function(names){ #"*rhq*.xpt"
dir_name <- Sys.glob(paste("../data/*/*/", pattern=names, sep=''))
for (i in 1:length(dir_name)){
files1 <- read_xpt(dir_name[i])
files1$cycle <- substr(dir_name[i],9,17)
if (i==1){
files <- files1
} else{
files <- dplyr::bind_rows(files, files1)
}
}
return(files)
}
如提取人口模块数据使用:
dir.demo <- merge_files("*demo*.xpt")
备注:查看对应路径下所有数据文件名称,无法一次性匹配的应单独进行提取
Sys.glob(paste("../data/*/*/",pattern="*demo*.xpt",sep=''))
生存数据提取函数:
mergeSurveys <- function(names){ #*MORT*.dat
dir_name <- Sys.glob(paste("../../data/*/", pattern=names, sep=''))
for (i in 1:length(dir_name)){
files1 <- read_fwf(file=dir_name[i],
col_types="iiiiiiii",
fwf_cols(SEQN=c(1,6),
ELIGSTAT=c(15,15),
MORTSTAT=c(16,16),
UCOD_LEADING=c(17,19),
DIABETES=c(20,20),
HYPERTEN=c(21,21),
PERMTH_INT=c(43,45),
PERMTH_EXM=c(46,48)
),
na = c("", ".")
)
files1$cycle <- gsub("_", "-", substr(dir_name[i],29,37))
if (i==1){
files <- files1
} else{
files <- dplyr::bind_rows(files, files1)
}
}
return(files)
}
生存数据各项含义查看:
table(dsn$eligstat)
#1 = "Eligible"
#2 = "Under age 18, not available for public release"
#3 = "Ineligible"
#MORTSTAT: Final Mortality Status
table(dsn$mortstat, useNA="ifany")
# 0 = Assumed alive
# 1 = Assumed deceased
# <NA> = Ineligible or under age 18
#UCOD_LEADING: Underlying Cause of Death: Recode
table(dsn$ucod_leading, useNA="ifany")
# 1 = Diseases of heart (I00-I09, I11, I13, I20-I51)
# 2 = Malignant neoplasms (C00-C97)
# 3 = Chronic lower respiratory diseases (J40-J47)
# 4 = Accidents (unintentional injuries) (V01-X59, Y85-Y86)
# 5 = Cerebrovascular diseases (I60-I69)
# 6 = Alzheimer's disease (G30)
# 7 = Diabetes mellitus (E10-E14)
# 8 = Influenza and pneumonia (J09-J18)
# 9 = Nephritis, nephrotic syndrome and nephrosis (N00-N07, N17-N19, N25-N27)
# 10 = All other causes (residual)
# <NA> = Ineligible, under age 18, assumed alive, or no cause of death data available
#DIABETES: Diabetes Flag from Multiple Cause of Death (MCOD)
table(dsn$diabetes, useNA="ifany")
# 0 = No - Condition not listed as a multiple cause of death
# 1 = Yes - Condition listed as a multiple cause of death
# <NA> = Assumed alive, under age 18, ineligible for mortality follow-up, or MCOD not available
#HYPERTEN: Hypertension Flag from Multiple Cause of Death (MCOD)
table(dsn$hyperten, useNA="ifany")
# 0 = No - Condition not listed as a multiple cause of death
# 1 = Yes - Condition listed as a multiple cause of death
# <NA> = Assumed alive, under age 18, ineligible for mortality follow-up, or MCOD not available
网友评论