100% found this document useful (1 vote)
251 views

SAS Programs For Making SDTM DM and EX Datasets: Yupeng Wang, PH.D., Data Scientist

This document provides SAS programs for creating SDTM DM and EX datasets from raw clinical trial data. The programs clean and transform raw demographic, dosing, and other data; derive SDTM variables; create empty SDTM datasets based on metadata; and output final DM, SUPPDM, and EX datasets to a target library. The programs perform tasks like merging datasets, calculating derived dates, applying formats, and sorting by specified variables.

Uploaded by

Bhavana
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
100% found this document useful (1 vote)
251 views

SAS Programs For Making SDTM DM and EX Datasets: Yupeng Wang, PH.D., Data Scientist

This document provides SAS programs for creating SDTM DM and EX datasets from raw clinical trial data. The programs clean and transform raw demographic, dosing, and other data; derive SDTM variables; create empty SDTM datasets based on metadata; and output final DM, SUPPDM, and EX datasets to a target library. The programs perform tasks like merging datasets, calculating derived dates, applying formats, and sorting by specified variables.

Uploaded by

Bhavana
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 10

Clinical Trials – Make SDTM DM and EX datasets

SAS programs for making SDTM DM and EX datasets


Yupeng Wang, Ph.D., Data Scientist

Raw data

1. Metadata

1
Clinical Trials – Make SDTM DM and EX datasets

2. DM raw data

3. Dosing raw data

2
Clinical Trials – Make SDTM DM and EX datasets

Programs

Program 1: make_empty_dataset.sas

/*make_empty_dataset.sas creates a zero record dataset based on a dataset metadata


spreadsheet. The dataset created is called EMPTY_ where "" is the name of the
dataset. This macro also creates a global macro variable called KEEPSTRING that
holds the dataset variables desired and listed in the order they should appear.
[The variable order is dictated by VARNUM in the metadata spreadsheet.]

MACRO PARAMETERS:
metadatafile = the MS Excel file containing the dataset metadata
dataset = the dataset or domain name you want to extract*/

%macro make_empty_dataset(metadatafile=,dataset=);

proc import
datafile="&metadatafile"
out=_temp
dbms=xlsx
replace;
sheet="VARIABLE_METADATA";
run;

** sort the dataset by expected specified variable order;


proc sort
data=_temp;
where domain = "&dataset";
by varnum;
run;

** create keepstring macro variable and load metadata


** information into macro variables;
%global &dataset.KEEPSTRING;
data _null_;
set _temp nobs=nobs end=eof;

if _n_=1 then
call symput("vars", compress(put(nobs,3.)));

call symputx('var' || compress(put(_n_, 3.)), variable);


call symputx('label' || compress(put(_n_, 3.)), label);
call symputx('length' || compress(put(_n_, 3.)), put(length, 3.));

** valid ODM types include TEXT, INTEGER, FLOAT, DATETIME,


** DATE, TIME and map to SAS numeric or character;
if upcase(type) in ("INTEGER", "FLOAT") then
call symputx('type' || compress(put(_n_, 3.)), "");
else if upcase(type) in ("TEXT", "DATE", "DATETIME",
"DATE", "TIME") then
call symputx('type' || compress(put(_n_, 3.)), "$");
else
put "ERR" "OR: not using a valid ODM type. " type=;

** create **KEEPSTRING macro variable;


length keepstring $ 32767;

3
Clinical Trials – Make SDTM DM and EX datasets

retain keepstring;
keepstring = compress(keepstring) || "|" || left(variable);
if eof then
call symputx(upcase(compress("&dataset" || 'KEEPSTRING')),
left(trim(translate(keepstring," ","|"))));
run;

** create a 0-observation template data set used for assigning


** variable attributes to the actual data sets;
data EMPTY_&dataset;
%do i=1 %to &vars;
attrib &&var&i label="&&label&i" length=&&type&i.&&length&i...;
%if &&type&i=$ %then
retain &&var&i '';
%else
retain &&var&i .;
;
%end;
if 0;
run;

%mend make_empty_dataset;

Program 2: dm.sas

/*Process the raw demographic data from a CSV file*/

%include '/folders/myfolders/test1/common.sas';
%common;

filename infl '/folders/myfolders/test1/dm.csv';

proc format;
value trt
1 = "Active"
0 = "Placebo";
value gender
1 = "M"
2 = "F"
3 = "U";
value race
1 = "White"
2 = "Black"
3 = "Other";
run;

data source.demographic;
infile infl dlm='2C0D'x dsd missover;
length dob1 $10
randdt1 $10;
input subject trt gender race orace $ dob1 $ randdt1 $;
dob=input(dob1,mmddyy10.);
randdt=input(randdt1,mmddyy10.);
format dob randdt mmddyy10.;
uniqueid = 'UNI' || put(subject,3.);
gender1=put(gender,gender.);

4
Clinical Trials – Make SDTM DM and EX datasets

race1=put(race,race.);
trt1=put(trt,trt.);
label subject = "Subject Number"
trt = "Treatment"
gender = "Gender"
race = "Race"
orace = "Oher Race Specify"
dob = "Date of Birth"
uniqueid = "Company Wide Subject ID"
randdt = "Randomization Date";
drop dob1 randdt1 gender race;
rename gender1=gender race1=race;
run;

Program 3: ds.sas

/*Process the raw dose data from an Excel file.


Key points: 1) make up missing dates; 2) convert Excel dates to SAS dates*/

%include '/folders/myfolders/test1/common.sas';
%common;

libname ds xlsx "/folders/myfolders/test1/ds.xlsx";

data source.dosing;
set ds.Sheet1;
if find(startdt,'/') then
do;
array var1(3) $4.;
do i=1 to 3;
var1(i)=scan(startdt,i,"/");
if var1(i)=' ' then
var1(i)='1';
end;
startdt1=mdy(input(var1(1),$4.), input(var1(2),$4.) , input(var1(3),$4.));
end;
else
startdt1=input(startdt,$10.)-21916;
if find(enddt,'/') then
do;
array var2(3) $4.;
do i=1 to 3;
var2(i)=scan(enddt,i,"/");
if var2(i)=' ' then
var2(i)='1';
end;
enddt1=mdy(input(var2(1),$4.), input(var2(2),$4.) , input(var2(3),$4.));
end;
else
enddt1=input(enddt,$10.)-21916;
uniqueid = 'UNI' || put(subject,3.);
format startdt1 enddt1 mmddyy10.;
drop i startdt enddt var11-var13 var21-var23;
rename startdt1=startdt enddt1=enddt;
run;

5
Clinical Trials – Make SDTM DM and EX datasets

Program 4: make_sort_order.sas

/* make_sort_order.sas creates a global macro variable called SORTSTRING where ** is the name of the dataset that
contains the metadata specified sort order for a given dataset.

MACRO PARAMETERS:
metadatafile = the file containing the dataset metadata
dataset = the dataset or domain name*/

%macro make_sort_order(metadatafile=,dataset=);

proc import
datafile="&metadatafile"
out=_temp
dbms=xlsx
replace;
sheet="TOC_METADATA";
run;

** create **SORTSTRING macro variable;


%global &dataset.SORTSTRING;
data _null_;
set _temp;

where name = "&dataset";

call symputx(compress("&dataset" || "SORTSTRING"),


translate(domainkeys," ",","));
run;

%mend make_sort_order;

Program 5: STDM_DM.sas

/* STDM_DM.sas creates the SDTM DM and SUPPDM datasets and saves them as permanent SAS datasets to the target
libref */

%include '/folders/myfolders/test1/common.sas';
%common;

**** CREATE EMPTY DM DATASET CALLED EMPTY_DM;


%include '/folders/myfolders/test1/make_empty_dataset.sas';
%make_empty_dataset(metadatafile=/folders/myfolders/test1/SDTM_METADATA.xlsx,dataset=DM);

**** GET FIRST AND LAST DOSE DATE FOR RFSTDTC AND RFENDTC;
proc sort
data=source.dosing(keep=subject startdt enddt)
out=dosing;
by subject startdt;
run;

**** FIRSTDOSE=FIRST DOSING AND LASTDOSE=LAST DOSING;


data dosing;
set dosing;
by subject;

6
Clinical Trials – Make SDTM DM and EX datasets

format firstdose lastdose mmddyy10.;


retain firstdose lastdose;

if first.subject then
do;
firstdose = .;
lastdose = .;
end;

firstdose = min(firstdose,startdt,enddt);
lastdose = max(lastdose,startdt,enddt);
drop startdt enddt;
if last.subject;
run;

**** GET DEMOGRAPHICS DATA;


proc sort
data=source.demographic
out=demographic;
by subject;
run;

data demog_dose;
merge demographic
dosing;
by subject;
run;

**** DERIVE THE MAJORITY OF SDTM DM VARIABLES;


data dm;
set EMPTY_DM
demog_dose;
studyid = 'XYZ123';
domain = 'DM';
usubjid = left(uniqueid);
subjid = put(subject,3.);
rfstdtc = put(firstdose,yymmdd10.);
rfendtc = put(lastdose,yymmdd10.);
siteid = substr(subjid,1,1) || "00";
brthdtc = put(dob,yymmdd10.);
age = floor ((intck('month',dob,firstdose) -
(day(firstdose) < day(dob))) / 12);
if age ne . then
ageu = 'YEARS';
country = "USA";
sex=gender;
arm=trt1;
armcd=put(trt,3.);
drop gender trt trt1;
run;

%include '/folders/myfolders/test1/make_sort_order.sas';
%make_sort_order(metadatafile=/folders/myfolders/test1/SDTM_METADATA.xlsx,dataset=DM);

proc sort
data=dm(keep = &DMKEEPSTRING)

7
Clinical Trials – Make SDTM DM and EX datasets

out=target.dm;
by &DMSORTSTRING;
run;

**** CREATE EMPTY SUPPDM DATASET CALLED EMPTY_DM;


%make_empty_dataset(metadatafile=/folders/myfolders/test1/SDTM_METADATA.xlsx,dataset=SUPPDM);

data suppdm;
set EMPTY_SUPPDM
dm;

keep &SUPPDMKEEPSTRING;

**** OUTPUT OTHER RACE AS A SUPPDM VALUE;


if orace ne '' then
do;
rdomain = 'DM';
qnam = 'RACEOTH';
qlabel = 'Race, Other';
qval = left(orace);
qorig = 'CRF';
output;
end;

**** OUTPUT RANDOMIZATION DATE AS SUPPDM VALUE;


if randdt ne . then
do;
rdomain = 'DM';
qnam = 'RANDDTC';
qlabel = 'Randomization Date';
qval = left(put(randdt,yymmdd10.));
qorig = 'CRF';
output;
end;
run;

%make_sort_order(metadatafile=/folders/myfolders/test1/SDTM_METADATA.xlsx,dataset=SUPPDM);

proc sort
data=suppdm
out=target.suppdm;
by &SUPPDMSORTSTRING;
run;

Program 6: STDM_EX.sas
/* STDM_EX.sas creates the SDTM EX dataset and saves it as a permanent SAS dataset to the target libref */

%include '/folders/myfolders/test1/common.sas';
%common;

**** CREATE EMPTY DM DATASET CALLED EMPTY_DM;


%include '/folders/myfolders/test1/make_empty_dataset.sas';

8
Clinical Trials – Make SDTM DM and EX datasets

%make_empty_dataset(metadatafile=/folders/myfolders/test1/SDTM_METADATA.xlsx,datase
t=EX);
%include '/folders/myfolders/test1/make_sdtm_dy2.sas';
%include '/folders/myfolders/test1/make_sort_order.sas';

**** DERIVE THE MAJORITY OF SDTM EX VARIABLES;


data ex;
set EMPTY_EX
source.dosing;

studyid = 'XYZ123';
domain = 'EX';
usubjid = left(uniqueid);
exdose = dailydose;
exdostot = dailydose;
exdosu = 'mg';
exdosfrm = 'TABLET, COATED';
exstdtc=put(startdt,yymmdd10.);
exendtc=put(enddt,yymmdd10.);
run;

proc sort
data=ex;
by usubjid;
run;

**** CREATE SDTM STUDYDAY VARIABLES AND INSERT EXTRT;


data ex;
merge ex(in=inex) target.dm(keep=usubjid rfstdtc arm);
by usubjid;

if inex;

%make_sdtm_dy(refdate=rfstdtc,date=exstdtc);
%make_sdtm_dy(refdate=rfstdtc,date=exendtc);

**** in this simplistic case all subjects received the treatment they were
randomized to;
extrt = arm;
run;

**** CREATE SEQ VARIABLE;


proc sort
data=ex;
by studyid usubjid extrt exstdtc;
run;

OPTIONS MISSING = ' ';


data ex;
retain STUDYID DOMAIN USUBJID EXSEQ EXTRT EXDOSE EXDOSU EXDOSFRM EXDOSTOT
EXSTDTC EXENDTC EXSTDY EXENDY;
set ex(drop=exseq);
by studyid usubjid extrt exstdtc;

if not (first.exstdtc and last.exstdtc) then


put "WARN" "ING: key variables do not define an unique record. " usubjid=;

9
Clinical Trials – Make SDTM DM and EX datasets

retain exseq;
if first.usubjid then
exseq = 1;
else
exseq = exseq + 1;

label exseq = "Sequence Number";


run;

**** SORT EX ACCORDING TO METADATA AND SAVE PERMANENT DATASET;


%make_sort_order(metadatafile=/folders/myfolders/test1/SDTM_METADATA.xlsx,dataset=E
X);

proc sort
data=ex(keep = &EXKEEPSTRING)
out=target.ex;
by &EXSORTSTRING;
run;

10

You might also like

pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy