SAS Replace Row if value is greater than what exists and add any new rows

84 Views Asked by At

I have monthly datasets that all have a column MIA_CURR I want to set each month on top of each other so that there is only one row per unique_id. Adding in any new unique_ids that aren't in the first dataset and replacing the row if the column VAR1 is <= 0 in the first dataset.

The below is what I've tried but I can't quite seem to get it to work, hoping someone might be able to help:

PROC SQL;
CREATE TABLE DATA3 AS 
SELECT 
*
FROM 
DATA1
UNION
SELECT 
*
FROM 
DATA2 AS B
WHERE 
    NOT EXISTS
    ( SELECT *
    FROM DATA1 AS A
    WHERE A.UNIQUE_ID= B.UNIQUE_ID or a.VAR1>0
    )
    
;
QUIT;

EDIT: Here's an example:
Data1:

> Unique_ID  VAR1   VAR2    VAR3    VAR4    VAR5
  00001       0       3      w       1       0
  00002     -2.22     2      e       0       9
  00003       3       2      f       0       8
  00004       1       2      v       0       9

Data2:
Unique_ID    VAR1   VAR2    VAR3    VAR4    VAR5
  00001       1      3       w       1      0
  00005    -1.012    2       t       0      7
  00006       0      2       t       1      6
  00002       1      2       f       0      9

Data3:
Unique_ID    VAR1   VAR2    VAR3    VAR4    VAR5
  00001        1      3      w        1      0
  00002        1      2      f        0      9
  00003        3      2      f        0      8
  00004        1      2      v        0      9
  00005      -1.012   2      t        0      7
  00006        0      2      t        1      6

 
1

There are 1 best solutions below

1
Richard On BEST ANSWER

You can use the MODIFY statement to replace, append or delete records in an existing data set.

Example:

Each month is processed in turn against some master result data set.

data _2023_01;
input id var1-var5;
format id z5.;
datalines;
  00001       0       3      w       1       0
  00002     -2.22     2      e       0       9
  00003       3       2      f       0       8
  00004       1       2      v       0       9
data _2023_02;
input id var1-var5;
format id z5.;
datalines;
  00001       1      3       w       1      0
  00005    -1.012    2       t       0      7
  00006       0      2       t       1      6
  00002       1      2       f       0      9
;
data _2023_03;
input id var1-var5;
format id z5.;
datalines;
  00001        1      3      w        1      0
  00002        1      2      f        0      9
  00003        3      2      f        0      8
  00004        1      2      v        0      9
  00005      -1.012   2      t        0      7
  00006        0      2      t        1      6
;

* establish main data set;
data master (index=(id));
  set _2023_01;
run;

%macro modify_with(transaction_data);
  * change data according to rules;
  data master;
    modify 
      master 
      &transaction_data (rename=var1=new_var1);
    ;
    by id;

    select (_iorc_);
      when (%sysrc(_sok)) do; /* existing id */
        if var1 < 0 then do;
          putlog 'replacing ' id= var1= new_var1=;
          var1 = new_var1;
          replace;
        end;
      end;
      when (%sysrc(_dsenmr)) do; /* new id */
        putlog 'adding new' id=;
        var1 = new_var1;
        output;
        _error_ = 0;
      end;
      otherwise do;
        msg = sysmsg();
        putlog msg;
      end;
    end;
  run;
%mend;

%modify_with(_2023_02)
%modify_with(_2023_03)