美文网首页
SAS编程-宏:Compare宏程序的两种思路

SAS编程-宏:Compare宏程序的两种思路

作者: 野藤_ | 来源:发表于2022-06-09 18:15 被阅读0次

    前面文章SAS编程:Compare结果输出方式介绍 中,介绍了Compare过程步的4种输出比较结果的方式。各家公司基于此,一般都有相关的Compare宏程序。这类宏程序的思路大体有两种:

    第一种:结果输出到SAS日志
    第二种:结果输出到SAS数据集

    公司宏程序一般不会简单地输出对上或没对上的信息,而是基于自动宏变量SYSINFO的返回值,输出具体的Compare结果。下面简单介绍,这两种思路的实现思路。

    1. 结果输出到SAS日志

    上一篇文章已经介绍过将自动宏变量SYSINFO信息输出到SAS日志,稍微改动一下就可以作为宏程序。在没有完全对上时,输出信息前添加Error字段,方便检查识别。

    data base;
      set sashelp.class;
    run;
    
    data comp;
      set sashelp.class;
    
      if _n_ = 1 then height =100;
      label weight = "W";
    run;
    
    proc compare base = base comp=comp out=df outbase outcomp outnoequal outdif; 
    run;
    
    %let rc=&sysinfo;
    
    data _null_;
    
    put '<<<< Proc Compare Results: ';
    
    /* 0. No differences */
     if &rc = '0'b then
          put '<<<< No differences identified';
    /* 1. Test for data set label */
       if &rc = '1'b then
          put '<<<< ' 'Err' 'or: '  'Data sets have different labels';
    /* 2. Test for data set types */
       if &rc = '1.'b then
          put '<<<< ' 'Err' 'or: '  'Data set types differ';
    /* 3. Test for variable informats */
       if &rc = '1..'b then
          put '<<<< ' 'Err' 'or: '  'Variable has different informat';
    /* 4. Test for variable formats */
       if &rc = '1...'b then
          put '<<<< ' 'Err' 'or: '  'Variable has different format';
    /* 5. Test for length */
       if &rc = '1....'b then
          put '<<<< ' 'Err' 'or: '  'Variable has different lengths between the base data set 
          and the comparison data set';
    /* 6. Test for label */
       if &rc = '1.....'b then
          put '<<<< ' 'Err' 'or: '  'Variable has different label';
    /* 7. Test for base observation */
    if &rc = '1......'b then
          put '<<<< ' 'Err' 'or: '  'Base data set has observation not in comparison data set';
    /* 8. Test for comparison observation */
       if &rc = '1.......'b then
          put  '<<<< ' 'Err' 'or: '  'Comparison data set has observation not in base';
    /* 9. Test for base BY group */
    if &rc = '1........'b then
          put '<<<< ' 'Err' 'or: '  'Base data set has BY group not in comparison';
    /* 10. Test for comparison BY group */
       if &rc = '1.........'b then
          put  '<<<< ' 'Err' 'or: '  'Comparison data set has BY group not in base';
    /* 11. Variable in base data set not in compare data set */
       if &rc ='1..........'b then 
          put  '<<<< ' 'Err' 'or: '  'Variable in base data set not found in comparison data set';
    /* 12. Comparison data set has variable not in base data set */
       if &rc = '1...........'b then
          put  '<<<< ' 'Err' 'or: '  'Comparison data set has variable not contained in the 
          base data set';
    /* 13. Test for values */
       if &rc = '1............'b then
          put '<<<< ' 'Err' 'or: '  'A value comparison was unequal';
    /* 14. Conflicting variable types */
       if &rc ='1.............'b then
          put  '<<<< ' 'Err' 'or: '  'Conflicting variable types between the two data sets 
          being compared';
    /* 15. Test for BY variables */
       if &rc = '1..............'b then
          put '<<<< ' 'Err' 'or: '  'BY variables do not match';
    /* 16. Fatal error*/
       if &rc ='1...............'b then
          put  '<<<< ' 'Err' 'or: '  'Fatal error: comparison not done';
    run;
    

    以上程序运行后,日志输出结果如下,两个数据集不一致信息都会输出到日志。同时,Error字段标记,方便日志检查。

    Log

    如果两个数据集是完全比对上的,日志输出结果如下:

    Log

    2. 结果输出到SAS数据集

    将Compare结果输出到SAS数据集,跟输出到Log有一些不同。

    日志信息只是针对单个数据集的比对结果,即一个Log对应一个比对结果;而数据集记录可以不断追加补充,可以将文件夹中的所有比对结果都输出到数据集中。日志中是按行输出不一致的信息,为了方便查阅,日志信息最好输出到同一行记录中。多次比对时,日志文件会自动替换,数据集的记录需要考虑如何替换更新。

    我这里提供一个实现思路。如果结果数据集不存在,新建一个数据集保存比对信息。对于所有可能的不一致信息,先保存在不同的变量中,最后有一个变量进行拼接汇总。对于多次比对的情况,结果数据集中保存程序运行时间较晚的那一条记录。

    实现宏程序如下:

    data base;
      set sashelp.class;
    run;
    
    data comp;
      set sashelp.class;
    
      if _n_ = 1 then height =100;
      label weight = "W";
    run;
    
    %macro Compare(base=, comp=, outres=, outdif=);
    
    proc compare base=&base. comp=&comp. out=&outdif outbase outcomp outnoequal outdif;
    run;
    
    **Get compare reslult code;
    %let rc=&sysinfo;
    
    **Get libname and memename of dataset
    
    *Base;
    %if %index(&base., .) %then %do;
      %let lib_base = %sysfunc(upcase( %scan(&base., 1, .)  ));
      %let mem_base = %sysfunc(lowcase(%scan(&base., 2, .) ));
    %end;
    %else %do;
      %let lib_base = WORK;
      %let mem_base = %sysfunc(lowcase(&base.));  
    %end;
    
    *comp;
    %if %index(&comp., .) %then %do;
      %let lib_comp = %sysfunc(upcase(%scan(&comp., 1, .) ));
      %let mem_comp = %sysfunc(lowcase(%scan(&comp., 2, .) ));
    %end;
    %else %do;
      %let lib_comp = WORK;
      %let mem_comp = %sysfunc(lowcase(&comp. ));  
    %end;
    
    data _tmp1;
    
      retain lib_base  mem_base lib_comp mem_comp comp_dtm comp_code comp_res;
      length lib_base lib_comp $8 mem_base mem_comp $64 com_res $500 c0-c16 $50;
    
      lib_base = "&lib_base.";
      lib_comp = "&lib_comp.";
    
      mem_base = "&mem_base.";
      mem_comp = "&mem_comp.";
    
    /* 0. No differences */
     if &rc = 0 then
          c0 = 'No differences identified';
    /* 1. Test for data set label */
       if &rc = '1'b then
          c1 = '01: Data sets have different labels';
    /* 2. Test for data set types */
       if &rc = '1.'b then
          c2 = '02: Data set types differ';
    /* 3. Test for variable informats */
       if &rc = '1..'b then
          c3 = '03: Variable has different informat';
    /* 4. Test for variable formats */
       if &rc = '1...'b then
          c4 = '04: Variable has different format';
    /* 5. Test for length */
       if &rc = '1....'b then
          c5 = '05: Variable has different lengths between the base data set 
          and the comparison data set';
    /* 6. Test for label */
       if &rc = '1.....'b then
          c6 = '06: Variable has different label';
    /* 7. Test for base observation */
    if &rc = '1......'b then
          c7 = '07: Base data set has observation not in comparison data set';
    /* 8. Test for comparison observation */
       if &rc = '1.......'b then
          c8 = '08: Comparison data set has observation not in base';
    /* 9. Test for base BY group */
    if &rc = '1........'b then
          c9 = '09: Base data set has BY group not in comparison';
    /* 10. Test for comparison BY group */
       if &rc = '1.........'b then
          c10 = '10: Comparison data set has BY group not in base';
    /* 11. Variable in base data set not in compare data set */
       if &rc ='1..........'b then 
          c11 = '11: Variable in base data set not found in comparison data set';
    /* 12. Comparison data set has variable not in base data set */
       if &rc = '1...........'b then
          c12 = '12: Comparison data set has variable not contained in the 
          base data set';
    /* 13. Test for values */
       if &rc = '1............'b then
          c13 = '13: A value comparison was unequal';
    /* 14. Conflicting variable types */
       if &rc ='1.............'b then
          c14 =   '14: Conflicting variable types between the two data sets 
          being compared';
    /* 15. Test for BY variables */
       if &rc = '1..............'b then
          c15 = '15: BY variables do not match';
    /* 16. Fatal error*/
       if &rc ='1...............'b then
          c16 = '16: Fatal error: comparison not done';
      
      format comp_dtm e8601dt.;
      comp_dtm = datetime();
    
      comp_code = &rc;
      comp_res = catx("; ", of c0-c16);
    
      keep lib_base  mem_base lib_comp mem_comp comp_dtm comp_code comp_res;
    run;
    
    **Create compare result dataset;
    %if %sysfunc(exist(&outres.)) %then %do;
    data _tmp2;
      set &outres. _tmp1;
    run;
    
    **Keep the latest record for one dataset;
    proc sql noprint;
      create table &outres. as
        select * 
        from _tmp2
        group by lib_base, mem_base
        having comp_dtm = max(comp_dtm)
      ;
    quit;
    
    %end;
    %else %do;
     data &outres.;
      set _tmp;
    run;
    %end;
    
    %mend;
    
    %compare(
      base = base
      ,comp = comp
      ,outres = comp_res
      ,outdif = df
    );
    

    总结

    就我个人而言,我认为最简洁的方式就是Compare过程步的Error选项,简洁明了。知晓没对上之后,直接查看out=选项输出的结果。不过,毕竟各家公司都有自己的SOP,需要按照自家公司流程进行。

    希望以上内容能够帮助读者理解Compare流程。

    感谢阅读, 欢迎关注!
    若有疑问,欢迎评论交流!

    相关文章

      网友评论

          本文标题:SAS编程-宏:Compare宏程序的两种思路

          本文链接:https://www.haomeiwen.com/subject/higpmrtx.html