中国家庭追踪调查会采集父母对孩子的职业期望、孩子对自身的职业期望,同时存在某轮次调查没采集到职业期望的问题。可以从官网下载数据。
我们的分析对象是2010年至2020年父母对孩子的职业期望、孩子对自身的职业期望,变量如下:
职业期望宽表形式的变量名展示我们的目标是,将这个13列的数据集转换为一条记录就仅是pid和对应的某年份的职业期望文本的形式。
方式一-利用data步将宽表数据转换为长表:
/*先统一各个变量的format和length*/
data job_expectancy1;
format expect1 $80. expect2 $80. expect3 $80. expect4 $80. expect5 $80. expect6 $80. expect7 $80. expect8 $80. expect9 $80. expect10 $80. expect11 $80. expect12 $80.;
set job_expectancy;
length expect1 $80 expect2 $80 expect3 $80 expect4 $80 expect5 $80 expect6 $80 expect7 $80 expect8 $80 expect9 $80 expect10 $80 expect11 $80 expect12 $80;
run;
/*将职业期望数据重命名*/
data job_expectancy1;
set job_expectancy1;
expect1=wd101_2010;expect2=wm601_2010;expect3=ks801_2012;expect4=wd101_2012;expect5=ks801_2014;expect6=wd101_2014;expect7=ks801_2016;expect8=wd101_2016;expect9=qs801_2018;expect10=wd101_2018;expect11=qs801_2020;expect12=wd101_2020;
keep pid expect1 expect2 expect3 expect4 expect5 expect6 expect7 expect8 expect9 expect10 expect11 expect12;
run;
/*宽表转长表*/
data job_expectancy1;
set job_expectancy1;
array expectarray[12] expect1 - expect12;
do i = 1 to 12;
expect = expectarray[i];
IF expect NE "" THEN OUTPUT;
end;
keep pid expect;
run;
port data= job_expectancy1;
by pid;run;
方式二-利用proc步将宽表数据转换为长表:
/*各每行标记行号*/
data job_expectancy_a;
rom_num+1;
set job_expectancy;
run;
/*宽表转长表*/
proc transpose data=job_expectancy_a out=job_expectancy_b (rename=(_name_=Location col1=expect));
by row_num pid;
var wd101_2010 wm601_2010 ks801_2012 wd101_2012 ks801_2014 wd101_2014 ks801_2016 wd101_2016 qs801_2018 wd101_2018 qs801_2020 wd101_2020;
run;
/*分解“Location”变量,即拆分需要转换的12个变量的变量名。scan(string,i,"char"),表示从字串string中以char为分隔符提取第i个字串。
*/
data job_expectancy_b;
set job_expectancy_b;
var=scan(location,1,'_');
year=scan(location,2,'_');
run;
/*去除无效值*/
data job_expectancy_b;
set job_expectancy_b;
if expect^=-8&expect^=-2&expect^=-1;
if strip(expect)^="."&expect^="";
run;
通过方式二,最终得到的长表如下:
我们生成的长表(by 方式二)
网友评论