Question

I have 1 data set with content description for a school contents:

num       description
content1  math
content2  spanish
content3  geography
content4  chemistry
content5  history

in another data set (students) i have the array content1-content5 and i use a flag to indicate content that have each student.

students

name age content1 content2 content3 content4 content5 
BOB  15     1        1        1                 1   
BRYA 16
CARL 15              1                          1  
SUE  17                       1        1        1
LOU  15                                         1 

if i use a code like this:

    data students1;
    set students;
    array content[5];
    format allcontents $100.;
    do i=1 to dim(content);
    if content[i]=1 then do;
    allcontents=cat(vname(content[i]),',',allcontents); 
    end;
    end;
    run;

the result is:

name age content1 content2 content3 content4 content5 allcontents 
BOB  15     1        1        1                 1     content1,content2,content3,content5,
BRYA 16
CARL 15              1                          1     content2,content5,
SUE  17                       1        1        1     content3,content4,content5,
LOU  15                                         1     content5

1) i want to use the name of the lookup table (data set contents) to use the name of the content and not the arrays names of content[1-5] in the variable allcontents. how can i do that?

2) and later i want the result by content description, not by student, like this:

description  name age
math         BOB  15
spanish      BOB  15
geography    BOB  15
history      BOB  15
spanish      CARL 15
history      CARL 15
spanish      SUE  17
chemistry    SUE  17 
history      SUE  17
history      LOU  15 

is it possible?

thanks.

Was it helpful?

Solution

First, grab the %create_hash() macro from this post.

Use the hash table to look up the values.

data students1;
set students
array content[5];
format num $32. description $16.;
if _n_ = 1 then do;
   %create_hash(cnt,num,description,"contents");
end;
do i=1 to 5;
   if content[i]=1 then do;
      num = vname(content[i]);
      rc = cnt.find();
      output;
   end;
end;
keep description name age;
run;

OTHER TIPS

I find proc transpose suitable. Doing once is enough for question 2) and twice for renaming the variables contents1-5 (hence question 1). The key is the ID statement in proc transpose which automatically rename variables by their corresponding transposed orders.

The code below should give you the desired answers (albeit the name are ordered alphabetically, which may not be the same as your original ordering).

/* original data sets */
data names;
    input num $ description $;
    cards;
content1 math
content2 spanish
content3 geography
content4 chemistry
content5 history
;run;

data students;
    input name $ age content1 content2 content3 content4 content5;
    cards;
BOB  15     1        1        1        .        1
BRYA 16     .        .        .        .        .
CARL 15     .        1        .        .        1
SUE  17     .        .        1        1        1
LOU  15     .        .        .        .        1
;run;

/* transpose */
proc sort data=students out=tmp_sorted;
    by name age;
run;

proc transpose data=tmp_sorted out=tmp_transposed;
    by name age;
run;

/* merge the names of content1-5 */
* If you want to preserve ordering from contents1-contents5
* instead of alphabetical ordering of "description" column
* from a-z, do not drop the "num" column for further use.;
proc sql;
    create table tmp_merged as
    select B.description, A.name, A.age, B.num, A.COL1
    from tmp_transposed as A
    left join names as B
        on A._NAME_=B.num
    order by A.name, B.num;
quit;

/* transpose again */
proc transpose data=tmp_merged(drop=num) out=tmp_renamed(drop=_name_);
    by name age;
    ID description; *name the transposed variables;
run;

/* answer (1) */
data ans1;
    set tmp_renamed;
    array content[5] math--history;
    format allcontents $100.;
    do i=1 to dim(content);
        * better use cats (cat does not seem to work);
        if content[i]=1 then allcontents=cats(allcontents,',',vname(content[i])); 
    end;
    *kill the leading comma;
    allcontents=substr(allcontents,2,99);
run;

/* answer (2) */
data ans2(drop=num col1);
    set tmp_merged;
    where col1=1;
run;

*cleanup;
proc datasets lib=work nolist;
    delete tmp_:;
quit;
Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top