前面文章SAS编程:Compare结果输出方式介绍 中,介绍了Compare过程步的4种输出比较结果的方式。各家公司基于此,一般都有相关的Compare宏程序。这类宏程序的思路大体有两种:
第一种:结果输出到SAS日志
第二种:结果输出到SAS数据集
公司宏程序一般不会简单地输出对上或没对上的信息,而是基于自动宏变量SYSINFO的返回值,输出具体的Compare结果。下面简单介绍,这两种思路的实现思路。
1. 结果输出到SAS日志
上一篇文章已经介绍过将自动宏变量SYSINFO信息输出到SAS日志,稍微改动一下就可以作为宏程序。在没有完全对上时,输出信息前添加Error
字段,方便检查识别。
data base;
set sashelp.class;
run;
data comp;
set sashelp.class;
if _n_ = 1 then height =100;
label weight = "W";
run;
proc compare base = base comp=comp out=df outbase outcomp outnoequal outdif;
run;
%let rc=&sysinfo;
data _null_;
put '<<<< Proc Compare Results: ';
/* 0. No differences */
if &rc = '0'b then
put '<<<< No differences identified';
/* 1. Test for data set label */
if &rc = '1'b then
put '<<<< ' 'Err' 'or: ' 'Data sets have different labels';
/* 2. Test for data set types */
if &rc = '1.'b then
put '<<<< ' 'Err' 'or: ' 'Data set types differ';
/* 3. Test for variable informats */
if &rc = '1..'b then
put '<<<< ' 'Err' 'or: ' 'Variable has different informat';
/* 4. Test for variable formats */
if &rc = '1...'b then
put '<<<< ' 'Err' 'or: ' 'Variable has different format';
/* 5. Test for length */
if &rc = '1....'b then
put '<<<< ' 'Err' 'or: ' 'Variable has different lengths between the base data set
and the comparison data set';
/* 6. Test for label */
if &rc = '1.....'b then
put '<<<< ' 'Err' 'or: ' 'Variable has different label';
/* 7. Test for base observation */
if &rc = '1......'b then
put '<<<< ' 'Err' 'or: ' 'Base data set has observation not in comparison data set';
/* 8. Test for comparison observation */
if &rc = '1.......'b then
put '<<<< ' 'Err' 'or: ' 'Comparison data set has observation not in base';
/* 9. Test for base BY group */
if &rc = '1........'b then
put '<<<< ' 'Err' 'or: ' 'Base data set has BY group not in comparison';
/* 10. Test for comparison BY group */
if &rc = '1.........'b then
put '<<<< ' 'Err' 'or: ' 'Comparison data set has BY group not in base';
/* 11. Variable in base data set not in compare data set */
if &rc ='1..........'b then
put '<<<< ' 'Err' 'or: ' 'Variable in base data set not found in comparison data set';
/* 12. Comparison data set has variable not in base data set */
if &rc = '1...........'b then
put '<<<< ' 'Err' 'or: ' 'Comparison data set has variable not contained in the
base data set';
/* 13. Test for values */
if &rc = '1............'b then
put '<<<< ' 'Err' 'or: ' 'A value comparison was unequal';
/* 14. Conflicting variable types */
if &rc ='1.............'b then
put '<<<< ' 'Err' 'or: ' 'Conflicting variable types between the two data sets
being compared';
/* 15. Test for BY variables */
if &rc = '1..............'b then
put '<<<< ' 'Err' 'or: ' 'BY variables do not match';
/* 16. Fatal error*/
if &rc ='1...............'b then
put '<<<< ' 'Err' 'or: ' 'Fatal error: comparison not done';
run;
以上程序运行后,日志输出结果如下,两个数据集不一致信息都会输出到日志。同时,Error
字段标记,方便日志检查。
如果两个数据集是完全比对上的,日志输出结果如下:
Log2. 结果输出到SAS数据集
将Compare结果输出到SAS数据集,跟输出到Log有一些不同。
日志信息只是针对单个数据集的比对结果,即一个Log对应一个比对结果;而数据集记录可以不断追加补充,可以将文件夹中的所有比对结果都输出到数据集中。日志中是按行输出不一致的信息,为了方便查阅,日志信息最好输出到同一行记录中。多次比对时,日志文件会自动替换,数据集的记录需要考虑如何替换更新。
我这里提供一个实现思路。如果结果数据集不存在,新建一个数据集保存比对信息。对于所有可能的不一致信息,先保存在不同的变量中,最后有一个变量进行拼接汇总。对于多次比对的情况,结果数据集中保存程序运行时间较晚的那一条记录。
实现宏程序如下:
data base;
set sashelp.class;
run;
data comp;
set sashelp.class;
if _n_ = 1 then height =100;
label weight = "W";
run;
%macro Compare(base=, comp=, outres=, outdif=);
proc compare base=&base. comp=&comp. out=&outdif outbase outcomp outnoequal outdif;
run;
**Get compare reslult code;
%let rc=&sysinfo;
**Get libname and memename of dataset
*Base;
%if %index(&base., .) %then %do;
%let lib_base = %sysfunc(upcase( %scan(&base., 1, .) ));
%let mem_base = %sysfunc(lowcase(%scan(&base., 2, .) ));
%end;
%else %do;
%let lib_base = WORK;
%let mem_base = %sysfunc(lowcase(&base.));
%end;
*comp;
%if %index(&comp., .) %then %do;
%let lib_comp = %sysfunc(upcase(%scan(&comp., 1, .) ));
%let mem_comp = %sysfunc(lowcase(%scan(&comp., 2, .) ));
%end;
%else %do;
%let lib_comp = WORK;
%let mem_comp = %sysfunc(lowcase(&comp. ));
%end;
data _tmp1;
retain lib_base mem_base lib_comp mem_comp comp_dtm comp_code comp_res;
length lib_base lib_comp $8 mem_base mem_comp $64 com_res $500 c0-c16 $50;
lib_base = "&lib_base.";
lib_comp = "&lib_comp.";
mem_base = "&mem_base.";
mem_comp = "&mem_comp.";
/* 0. No differences */
if &rc = 0 then
c0 = 'No differences identified';
/* 1. Test for data set label */
if &rc = '1'b then
c1 = '01: Data sets have different labels';
/* 2. Test for data set types */
if &rc = '1.'b then
c2 = '02: Data set types differ';
/* 3. Test for variable informats */
if &rc = '1..'b then
c3 = '03: Variable has different informat';
/* 4. Test for variable formats */
if &rc = '1...'b then
c4 = '04: Variable has different format';
/* 5. Test for length */
if &rc = '1....'b then
c5 = '05: Variable has different lengths between the base data set
and the comparison data set';
/* 6. Test for label */
if &rc = '1.....'b then
c6 = '06: Variable has different label';
/* 7. Test for base observation */
if &rc = '1......'b then
c7 = '07: Base data set has observation not in comparison data set';
/* 8. Test for comparison observation */
if &rc = '1.......'b then
c8 = '08: Comparison data set has observation not in base';
/* 9. Test for base BY group */
if &rc = '1........'b then
c9 = '09: Base data set has BY group not in comparison';
/* 10. Test for comparison BY group */
if &rc = '1.........'b then
c10 = '10: Comparison data set has BY group not in base';
/* 11. Variable in base data set not in compare data set */
if &rc ='1..........'b then
c11 = '11: Variable in base data set not found in comparison data set';
/* 12. Comparison data set has variable not in base data set */
if &rc = '1...........'b then
c12 = '12: Comparison data set has variable not contained in the
base data set';
/* 13. Test for values */
if &rc = '1............'b then
c13 = '13: A value comparison was unequal';
/* 14. Conflicting variable types */
if &rc ='1.............'b then
c14 = '14: Conflicting variable types between the two data sets
being compared';
/* 15. Test for BY variables */
if &rc = '1..............'b then
c15 = '15: BY variables do not match';
/* 16. Fatal error*/
if &rc ='1...............'b then
c16 = '16: Fatal error: comparison not done';
format comp_dtm e8601dt.;
comp_dtm = datetime();
comp_code = &rc;
comp_res = catx("; ", of c0-c16);
keep lib_base mem_base lib_comp mem_comp comp_dtm comp_code comp_res;
run;
**Create compare result dataset;
%if %sysfunc(exist(&outres.)) %then %do;
data _tmp2;
set &outres. _tmp1;
run;
**Keep the latest record for one dataset;
proc sql noprint;
create table &outres. as
select *
from _tmp2
group by lib_base, mem_base
having comp_dtm = max(comp_dtm)
;
quit;
%end;
%else %do;
data &outres.;
set _tmp;
run;
%end;
%mend;
%compare(
base = base
,comp = comp
,outres = comp_res
,outdif = df
);
总结
就我个人而言,我认为最简洁的方式就是Compare过程步的Error
选项,简洁明了。知晓没对上之后,直接查看out=
选项输出的结果。不过,毕竟各家公司都有自己的SOP,需要按照自家公司流程进行。
希望以上内容能够帮助读者理解Compare流程。
感谢阅读, 欢迎关注!
若有疑问,欢迎评论交流!
网友评论