美文网首页
SAS编程-宏:Compare宏程序的两种思路

SAS编程-宏:Compare宏程序的两种思路

作者: 野藤_ | 来源:发表于2022-06-09 18:15 被阅读0次

前面文章SAS编程:Compare结果输出方式介绍 中,介绍了Compare过程步的4种输出比较结果的方式。各家公司基于此,一般都有相关的Compare宏程序。这类宏程序的思路大体有两种:

第一种:结果输出到SAS日志
第二种:结果输出到SAS数据集

公司宏程序一般不会简单地输出对上或没对上的信息,而是基于自动宏变量SYSINFO的返回值,输出具体的Compare结果。下面简单介绍,这两种思路的实现思路。

1. 结果输出到SAS日志

上一篇文章已经介绍过将自动宏变量SYSINFO信息输出到SAS日志,稍微改动一下就可以作为宏程序。在没有完全对上时,输出信息前添加Error字段,方便检查识别。

data base;
  set sashelp.class;
run;

data comp;
  set sashelp.class;

  if _n_ = 1 then height =100;
  label weight = "W";
run;

proc compare base = base comp=comp out=df outbase outcomp outnoequal outdif; 
run;

%let rc=&sysinfo;

data _null_;

put '<<<< Proc Compare Results: ';

/* 0. No differences */
 if &rc = '0'b then
      put '<<<< No differences identified';
/* 1. Test for data set label */
   if &rc = '1'b then
      put '<<<< ' 'Err' 'or: '  'Data sets have different labels';
/* 2. Test for data set types */
   if &rc = '1.'b then
      put '<<<< ' 'Err' 'or: '  'Data set types differ';
/* 3. Test for variable informats */
   if &rc = '1..'b then
      put '<<<< ' 'Err' 'or: '  'Variable has different informat';
/* 4. Test for variable formats */
   if &rc = '1...'b then
      put '<<<< ' 'Err' 'or: '  'Variable has different format';
/* 5. Test for length */
   if &rc = '1....'b then
      put '<<<< ' 'Err' 'or: '  'Variable has different lengths between the base data set 
      and the comparison data set';
/* 6. Test for label */
   if &rc = '1.....'b then
      put '<<<< ' 'Err' 'or: '  'Variable has different label';
/* 7. Test for base observation */
if &rc = '1......'b then
      put '<<<< ' 'Err' 'or: '  'Base data set has observation not in comparison data set';
/* 8. Test for comparison observation */
   if &rc = '1.......'b then
      put  '<<<< ' 'Err' 'or: '  'Comparison data set has observation not in base';
/* 9. Test for base BY group */
if &rc = '1........'b then
      put '<<<< ' 'Err' 'or: '  'Base data set has BY group not in comparison';
/* 10. Test for comparison BY group */
   if &rc = '1.........'b then
      put  '<<<< ' 'Err' 'or: '  'Comparison data set has BY group not in base';
/* 11. Variable in base data set not in compare data set */
   if &rc ='1..........'b then 
      put  '<<<< ' 'Err' 'or: '  'Variable in base data set not found in comparison data set';
/* 12. Comparison data set has variable not in base data set */
   if &rc = '1...........'b then
      put  '<<<< ' 'Err' 'or: '  'Comparison data set has variable not contained in the 
      base data set';
/* 13. Test for values */
   if &rc = '1............'b then
      put '<<<< ' 'Err' 'or: '  'A value comparison was unequal';
/* 14. Conflicting variable types */
   if &rc ='1.............'b then
      put  '<<<< ' 'Err' 'or: '  'Conflicting variable types between the two data sets 
      being compared';
/* 15. Test for BY variables */
   if &rc = '1..............'b then
      put '<<<< ' 'Err' 'or: '  'BY variables do not match';
/* 16. Fatal error*/
   if &rc ='1...............'b then
      put  '<<<< ' 'Err' 'or: '  'Fatal error: comparison not done';
run;

以上程序运行后,日志输出结果如下,两个数据集不一致信息都会输出到日志。同时,Error字段标记,方便日志检查。

Log

如果两个数据集是完全比对上的,日志输出结果如下:

Log

2. 结果输出到SAS数据集

将Compare结果输出到SAS数据集,跟输出到Log有一些不同。

日志信息只是针对单个数据集的比对结果,即一个Log对应一个比对结果;而数据集记录可以不断追加补充,可以将文件夹中的所有比对结果都输出到数据集中。日志中是按行输出不一致的信息,为了方便查阅,日志信息最好输出到同一行记录中。多次比对时,日志文件会自动替换,数据集的记录需要考虑如何替换更新。

我这里提供一个实现思路。如果结果数据集不存在,新建一个数据集保存比对信息。对于所有可能的不一致信息,先保存在不同的变量中,最后有一个变量进行拼接汇总。对于多次比对的情况,结果数据集中保存程序运行时间较晚的那一条记录。

实现宏程序如下:

data base;
  set sashelp.class;
run;

data comp;
  set sashelp.class;

  if _n_ = 1 then height =100;
  label weight = "W";
run;

%macro Compare(base=, comp=, outres=, outdif=);

proc compare base=&base. comp=&comp. out=&outdif outbase outcomp outnoequal outdif;
run;

**Get compare reslult code;
%let rc=&sysinfo;

**Get libname and memename of dataset

*Base;
%if %index(&base., .) %then %do;
  %let lib_base = %sysfunc(upcase( %scan(&base., 1, .)  ));
  %let mem_base = %sysfunc(lowcase(%scan(&base., 2, .) ));
%end;
%else %do;
  %let lib_base = WORK;
  %let mem_base = %sysfunc(lowcase(&base.));  
%end;

*comp;
%if %index(&comp., .) %then %do;
  %let lib_comp = %sysfunc(upcase(%scan(&comp., 1, .) ));
  %let mem_comp = %sysfunc(lowcase(%scan(&comp., 2, .) ));
%end;
%else %do;
  %let lib_comp = WORK;
  %let mem_comp = %sysfunc(lowcase(&comp. ));  
%end;

data _tmp1;

  retain lib_base  mem_base lib_comp mem_comp comp_dtm comp_code comp_res;
  length lib_base lib_comp $8 mem_base mem_comp $64 com_res $500 c0-c16 $50;

  lib_base = "&lib_base.";
  lib_comp = "&lib_comp.";

  mem_base = "&mem_base.";
  mem_comp = "&mem_comp.";

/* 0. No differences */
 if &rc = 0 then
      c0 = 'No differences identified';
/* 1. Test for data set label */
   if &rc = '1'b then
      c1 = '01: Data sets have different labels';
/* 2. Test for data set types */
   if &rc = '1.'b then
      c2 = '02: Data set types differ';
/* 3. Test for variable informats */
   if &rc = '1..'b then
      c3 = '03: Variable has different informat';
/* 4. Test for variable formats */
   if &rc = '1...'b then
      c4 = '04: Variable has different format';
/* 5. Test for length */
   if &rc = '1....'b then
      c5 = '05: Variable has different lengths between the base data set 
      and the comparison data set';
/* 6. Test for label */
   if &rc = '1.....'b then
      c6 = '06: Variable has different label';
/* 7. Test for base observation */
if &rc = '1......'b then
      c7 = '07: Base data set has observation not in comparison data set';
/* 8. Test for comparison observation */
   if &rc = '1.......'b then
      c8 = '08: Comparison data set has observation not in base';
/* 9. Test for base BY group */
if &rc = '1........'b then
      c9 = '09: Base data set has BY group not in comparison';
/* 10. Test for comparison BY group */
   if &rc = '1.........'b then
      c10 = '10: Comparison data set has BY group not in base';
/* 11. Variable in base data set not in compare data set */
   if &rc ='1..........'b then 
      c11 = '11: Variable in base data set not found in comparison data set';
/* 12. Comparison data set has variable not in base data set */
   if &rc = '1...........'b then
      c12 = '12: Comparison data set has variable not contained in the 
      base data set';
/* 13. Test for values */
   if &rc = '1............'b then
      c13 = '13: A value comparison was unequal';
/* 14. Conflicting variable types */
   if &rc ='1.............'b then
      c14 =   '14: Conflicting variable types between the two data sets 
      being compared';
/* 15. Test for BY variables */
   if &rc = '1..............'b then
      c15 = '15: BY variables do not match';
/* 16. Fatal error*/
   if &rc ='1...............'b then
      c16 = '16: Fatal error: comparison not done';
  
  format comp_dtm e8601dt.;
  comp_dtm = datetime();

  comp_code = &rc;
  comp_res = catx("; ", of c0-c16);

  keep lib_base  mem_base lib_comp mem_comp comp_dtm comp_code comp_res;
run;

**Create compare result dataset;
%if %sysfunc(exist(&outres.)) %then %do;
data _tmp2;
  set &outres. _tmp1;
run;

**Keep the latest record for one dataset;
proc sql noprint;
  create table &outres. as
    select * 
    from _tmp2
    group by lib_base, mem_base
    having comp_dtm = max(comp_dtm)
  ;
quit;

%end;
%else %do;
 data &outres.;
  set _tmp;
run;
%end;

%mend;

%compare(
  base = base
  ,comp = comp
  ,outres = comp_res
  ,outdif = df
);

总结

就我个人而言,我认为最简洁的方式就是Compare过程步的Error选项,简洁明了。知晓没对上之后,直接查看out=选项输出的结果。不过,毕竟各家公司都有自己的SOP,需要按照自家公司流程进行。

希望以上内容能够帮助读者理解Compare流程。

感谢阅读, 欢迎关注!
若有疑问,欢迎评论交流!

相关文章

网友评论

      本文标题:SAS编程-宏:Compare宏程序的两种思路

      本文链接:https://www.haomeiwen.com/subject/higpmrtx.html