# REDCapAPI_ToDDI.R - Generate a DDI 3.1 file from the metadata for a longitudinal project # exported from REDCap via the API facility # Larry Hoyle, Institute for Policy and Social Research, University of Kansas # This work is licensed under the Creative Commons Attribution 3.0 United States License. # To view a copy of this license, visit http://creativecommons.org/licenses/by/3.0/us/ # or send a letter to Creative Commons, 444 Castro Street, Suite 900, Mountain View, California, 94041, USA. # note that each time this runs it generates a new set of random UUIDs. # If replication is needed use the set.seed() function to initialize a specific sequence REDCapAPI_ToDDI <- function(secret_token ="", # the token for the user&project DDIinstanceFile="", # the output DDI file EchoDDI=FALSE, # if TRUE, print the generated DDI to the console IncludeData=TRUE, # if TRUE, include PhysicalRecordSegment and output StudyTitle="Study Title goes here", # the title of the study agency="example.org", # the agency responsible for the data organizationSchemeNameText="Organization scheme", # the name for the Organization Scheme organizationNameText="Organization name", # the organization name CurrentVersion="1.0.0", # the default current version number EmbargoEndDateValue="2112-01-01T01:01:01" # a dateTime at which the embargo on Personally Identifiable data will expire ){ if (secret_token=="" || DDIinstanceFile==""){ if (secret_token=="")print("REDCapAPI_ToDDI requires a value for the aqrgument CSVfile") if (DDIinstanceFile=="")print("REDCapAPI_ToDDI requires a value for the aqrgument DDIinstanceFile") return() } else { # Load needed libraries # --> NOTE: RCurl is dependent on bitops library(bitops) library(RCurl) library(Hmisc) library(xtable) library(XML) library(hash) # Set secret token specific to your REDCap project secret_token = '325347F271F375292924CF3EFA4CBAC4' # Set the url to the api (ex. https://YOUR_REDCAP_INSTALLATION/api/) api_url = 'https://redcap.ittc.ku.edu/api/' ## If in R for Windows ## --> NOTE: need to do this because the Curl C libraries are not "default programs" used (unlike on a Mac) curl_handle = getCurlHandle() curlSetOpt(ssl.verifypeer = FALSE, curl = curl_handle) # ---------------------------------------------------------------------------------- # Read all of the metadata tables available (except for files attached to data rows) # ---------------------------------------------------------------------------------- ## Read formEventMapping from REDCap formEventMapping_Response <- postForm(api_url, token = secret_token, content = 'formEventMapping', format = 'csv', curl = curl_handle) formEventMapping <- read.table(file = textConnection(formEventMapping_Response), header = TRUE, sep = ",", na.strings = "NA", stringsAsFactors = FALSE, colClasses="character") ## Read Event from REDCap Events_Response <- postForm(api_url, token = secret_token, content = 'event', format = 'csv', curl = curl_handle) Events <- read.table(file = textConnection(Events_Response), header = TRUE, sep = ",", na.strings = "NA", stringsAsFactors = FALSE, colClasses="character") ## Read Records from REDCap - single value per cell Records_Response_eav <- postForm(api_url, token = secret_token, content = 'record', format = 'csv', type = 'eav', curl = curl_handle) Records_eav <- read.table(file = textConnection(Records_Response_eav), header = TRUE, sep = ",", na.strings = "NA", stringsAsFactors = FALSE, colClasses="character") ## Read Records from REDCap - flat file Records_Response_flat <- postForm(api_url, token = secret_token, content = 'record', format = 'csv', type = 'flat', curl = curl_handle) Records_flat <- read.table(file = textConnection(Records_Response_flat), header = TRUE, sep = ",", na.strings = "NA", stringsAsFactors = FALSE, colClasses="character") ## Read Metadata from REDCap Metadata_Response <- postForm(api_url, token = secret_token, content = 'metadata', format = 'csv', curl = curl_handle) DataDictionaryRaw <- read.table(file = textConnection(Metadata_Response), header = TRUE, sep = ",", na.strings = "NA", stringsAsFactors = FALSE, colClasses="character") # Simplify comparisons by converting # field_type # text_validation_type_or_show_slider_number # identifier # required_field # to lower case DataDictionary <- DataDictionaryRaw DataDictionary$field_type <- tolower(DataDictionaryRaw$field_type) DataDictionary$text_validation_type_or_show_slider_number <- tolower(DataDictionaryRaw$text_validation_type_or_show_slider_number) DataDictionary$identifier <- tolower(DataDictionaryRaw$identifier) DataDictionary$required_field <- tolower(DataDictionaryRaw$required_field) ## Read Users from REDCap Users_Response <- postForm(api_url, token = secret_token, content = 'user', format = 'csv', curl = curl_handle) Users <- read.table(file = textConnection(Users_Response), header = TRUE, sep = ",", na.strings = "NA", stringsAsFactors = FALSE, colClasses="character") ## Read Arms from REDCap Arms_Response <- postForm(api_url, token = secret_token, content = 'arm', format = 'csv', curl = curl_handle) Arms <- read.table(file = textConnection(Arms_Response), header = TRUE, sep = ",", na.strings = "NA", stringsAsFactors = FALSE, colClasses="character") rm(secret_token, formEventMapping_Response, Records_Response_eav, Records_Response_flat, Metadata_Response, Users_Response, Arms_Response, Events_Response) # formEventMapping # Events # Records_eav # Records_flat # DataDictionaryRaw # DataDictionary # Users # Arms # setwd("C:/DDrive/projects/various/DDI-Naddi/REDCAP_HoyleAnd_VanRoekel/R/outputs") # write.table(formEventMapping, file="formEventMapping.txt", quote=FALSE, sep="\t", na=" ", row.names=FALSE, col.names=TRUE) # write.table(Events, file="Events.txt", quote=FALSE, sep="\t", na=" ", row.names=FALSE, col.names=TRUE) # write.table(Records_eav, file="Records_eav.txt", quote=FALSE, sep="\t", na=" ", row.names=FALSE, col.names=TRUE) # write.table(Records_flat, file="Records_flat.txt", quote=FALSE, sep="\t", na=" ", row.names=FALSE, col.names=TRUE) # write.table(DataDictionary, file="Metadata.txt", quote=FALSE, sep="\t", na=" ", row.names=FALSE, col.names=TRUE) # write.table(Users, file="Users.txt", quote=FALSE, sep="\t", na=" ", row.names=FALSE, col.names=TRUE) # write.table(Users, file="Users.txt", quote=FALSE, sep="\t", na=" ", row.names=FALSE, col.names=TRUE) # ---------------------------------------------------------------------------------- # Create a DDI instance containing all of the metadata # ---------------------------------------------------------------------------------- # vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv rUUID <- function(...){ # Generates a version 4 UUID as described in http://en.wikipedia.org/wiki/Universally_unique_identifier # "Version 4 UUIDs use a scheme relying only on random numbers. # This algorithm sets the version number as well as two reserved bits. # All other bits are set using a random or pseudorandom data source. # Version 4 UUIDs have the form xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx # where x is any hexadecimal digit # and y is one of 8, 9, A, or B. e.g. f47ac10b-58cc-4372-a567-0e02b2c3d479." # Parameters p1 <- paste(sprintf("%04X",ceiling(runif(1,0,(2^16)-1))), sprintf("%04X",ceiling(runif(1,0,(2^16)-1))) , sep = "" ) p2 <- sprintf("%04X",ceiling(runif(1,0,(2^16)-1))) p3 <- paste("4", sprintf("%03X",ceiling(runif(1,0,(2^12)-1))) , sep = "") yChoices <- c("8","9","A","B") y <- yChoices[ceiling(runif(1,0,4))] p4 <- paste(y, sprintf("%03X",ceiling(runif(1,0,(2^12)-1))) , sep = "") p5 <- paste(sprintf("%04X",ceiling(runif(1,0,(2^16)-1))), sprintf("%04X",ceiling(runif(1,0,(2^16)-1))) , sprintf("%04X",ceiling(runif(1,0,(2^16)-1))) , sep = "" ) UUID <- paste(p1, p2, p3, p4, p5, sep="-") return(UUID) } # ends function definition rUUID # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv addTextNode <- function(NodeName, NodeText, ParentNode, ContainingXmlHashTree ){ TextNode <- addNode(xmlNode(NodeName), ParentNode, ContainingXmlHashTree) TextNodeText <- addNode(xmlTextNode(NodeText), TextNode, ContainingXmlHashTree) return(TextNode) } # ends function addTextNode # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv addSchemeReference <- function(SchemeID, agency, CurrentVersion, ParentNode, ContainingXmlHashTree ){ # adds the elements needed for a Scheme reference to the node "ParentNode" in the xmlHashTree "ContainingXmlHashTree" # The parent could be a Scheme element in a reference to an identifiable element or # a direct reference to a Scheme as with a CodeSchemeReference # SchemeID - the ID of the Scheme referenced # agency - the agency of the Scheme referenced # CuddentVersion - the version of the Scheme referenced SchemeReferenceID <- addTextNode("r:ID", SchemeID, ParentNode, ContainingXmlHashTree ) SchemeReferenceIdentifyingAgency <- addTextNode("r:IdentifyingAgency", agency, ParentNode, ContainingXmlHashTree ) SchemeReferenceVersion <- addTextNode("r:Version",CurrentVersion, ParentNode, ContainingXmlHashTree ) ParentNode } # ends function definition addSchemeReference # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv addIdentifiableReference <- function(SchemeID, agency, CurrentVersion, IdentifiableID, ParentNode, ContainingXmlHashTree ){ # adds the elements needed for a reference to an identifiable element. # the node "ParentNode" in the xmlHashTree "ContainingXmlHashTree" is the reference # The parent couuld be, for example, a QuestionReference # SchemeID - the ID of the Scheme referenced # agency - the agency of the Scheme referenced # CuddentVersion - the version of the Scheme referenced # IdentifiableID - the ID of the identifiable element SchemeElement <- addNode(xmlNode("r:Scheme"), ParentNode, ContainingXmlHashTree) LastSchemeReferenceReturned <- addSchemeReference(SchemeID, agency, CurrentVersion, SchemeElement, DDI ) IdentifiableReferenceID <- addTextNode("r:ID", IdentifiableID, ParentNode, ContainingXmlHashTree ) IdentifiableReferenceIdentifyingAgency <- addTextNode("r:IdentifyingAgency",agency, ParentNode, ContainingXmlHashTree ) IdentifiableReferenceVersion <- addTextNode("r:Version", CurrentVersion, ParentNode, ContainingXmlHashTree ) ParentNode } # ends function definition addIdentifiableReference # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # ----------------------------------------------------------- # First: Make a shell of the basic required DDI3.1 elements. # ------------------------------------------------------------ DDI = xmlHashTree() DDIInstance <- addNode(xmlNode("ddi:DDIInstance", attrs=c(id = rUUID(), version = CurrentVersion, versionDate = format(Sys.time(), "%Y-%m-%d"), agency = agency, isMaintainable = "true" ), namespaceDefinitions = c(ddi="ddi:instance:3_1", r="ddi:reusable:3_1" , g="ddi:group:3_1", dc="ddi:dcelements:3_1", dc2="http://purl.org/dc/elements/1.1/", s="ddi:studyunit:3_1", c="ddi:conceptualcomponent:3_1", d="ddi:datacollection:3_1", l="ddi:logicalproduct:3_1", p="ddi:physicaldataproduct:3_1", pi="ddi:physicalinstance:3_1", ds="ddi:dataset:3_1", a="ddi:archive:3_1", cm="ddi:comparative:3_1", xsi="http://www.w3.org/2001/XMLSchema-instance", xhtml="http://www.w3.org/1999/xhtml" ) ), character(), DDI) # ----------------------------------------------------------- #ResourcePackage # ----------------------------------------------------------- # Much of the structure of this survey will go in this ResourcePackage and be used by reference from the StudyUnit ResourcePackageID <- rUUID() ResourcePackage <- addNode(xmlNode("g:ResourcePackage", attrs=c(id = ResourcePackageID, version = CurrentVersion, agency = agency) ), DDIInstance, DDI ) ResourcePackageCitation <- addNode(xmlNode("r:Citation"),ResourcePackage,DDI) ResourcePackageTitle <- addTextNode("r:Title", "Resource Package Containing Organization", ResourcePackageCitation, DDI ) ResourcePackagePurpose <- addNode(xmlNode("g:Purpose",attrs=c(id = rUUID())), ResourcePackage,DDI) ResourcePackagePurposeContent <- addTextNode("r:Content", "Resource package needed for an organization referenced by Embargo", ResourcePackagePurpose,DDI) GroupDataCollection <- addNode(xmlNode("g:DataCollection" ), ResourcePackage, DDI ) DataCollectionID <- rUUID() DataCollection <- addNode(xmlNode("d:DataCollection", attrs=c(id = DataCollectionID, version = CurrentVersion, agency = agency) ), GroupDataCollection, DDI ) #-------------- ConrolConstructScheme(s) -------------------------- # this ControlConstructScheme references Questions and includes statements preceding and following them # Statements include Section Header, Field Note, and Descriptive Field Labels ControlConstructSchemeObjectsID <-rUUID() ControlConstructSchemeObjects <- addNode(xmlNode("d:ControlConstructScheme", attrs=c(id = ControlConstructSchemeObjectsID, version = CurrentVersion, agency = agency) ), DataCollection, DDI ) ControlConstructSchemeObjectsName <- addTextNode("d:ControlConstructSchemeName", "QuestionConstructs and associated StatementConstructs", ControlConstructSchemeObjects, DDI ) ControlConstructSchemeObjectsLabel <- addTextNode("r:Label", "This ControlConstructScheme contains a sequential list of REDCap Section Headers, Descriptive Fields, Questions, and Field Notes. These will be referenced in the FormSequence", ControlConstructSchemeObjects, DDI) # Check to see if there are any sub-sequences, if so create a ControlConstructScheme to contain them if(!identical(DataDictionary$branching_logic,rep("",length(DataDictionary$branching_logic)))){ # this ControlConstructScheme contains sub-Sequences # this may include a Section Header, a question, and a Field Note # This also contains IfThenElse constructs pointing to those Sequences ControlConstructSchemeSubSequencesID <-rUUID() ControlConstructSchemeSubSequences <- addNode(xmlNode("d:ControlConstructScheme", attrs=c(id = ControlConstructSchemeSubSequencesID, version = CurrentVersion, agency = agency) ), DataCollection, DDI ) ControlConstructSchemeSubSequencesName <- addTextNode("d:ControlConstructSchemeName", "Sub-Sequences: QuestionConstructs and associated StatementConstructs", ControlConstructSchemeSubSequences, DDI ) ControlConstructSchemeSubSequencesLabel <- addTextNode("r:Label", "This ControlConstructScheme contains sub sequences where a question has associated branching logic. When the question's l;ogical expression is false the question and surrounding statements are not displayed ", ControlConstructSchemeSubSequences, DDI) } # this ControlConstructScheme contains a sequence for each form # (named in FormNames with the hash FormNamesHash allowing lookup of a form number) # Each sequence is a series of references to other ControlConstructs # if there is no branching logic these will be individual objects in ControlConstructSchemeObjects # If there is branching logic this will be a reference to an IfThenElse that may consist of a Section Header, a question and a Field Note # the sequence for each form will be referenced in an Instrument for each form ControlConstructSchemeSequencesID <-rUUID() ControlConstructSchemeSequences <- addNode(xmlNode("d:ControlConstructScheme", attrs=c(id = ControlConstructSchemeSequencesID, version = CurrentVersion, agency = agency) ), DataCollection, DDI ) ControlConstructSchemeSequencesName <- addTextNode("d:ControlConstructSchemeName", "ControlConstructSchemeForFormss", ControlConstructSchemeSequences, DDI ) ControlConstructSchemeSequencesLabel <- addTextNode("r:Label", "This ControlConstructScheme contains a sequence for each form for the REDCap survey", ControlConstructSchemeSequences, DDI) # The column DataDictionary$form_name contains the name of the form to which each variable belongs # There will be one Instrument for each form FormNames <- as.character(unique(DataDictionary$form_name)) # Create a hash to associate a form number with the FormNames FormNamesHash <- hash() .set(FormNamesHash, FormNames, 1:length(FormNames) ) # test extraction # FormNamesHash[[ as.character(DataDictionary$form_name[2]) ]] # One FormSequence for each Form Name, create an array for the IDs and for the nodes # first create the arrays the right length FormSequenceID <- rep("foo",length(FormNames)) FormSequence <- rep(xmlNode("DUMMY"), length(FormNames) ) # One Instrument for each form - create arrays for the IDs and nodes InstrumentID <- rep("foo",length(FormNames)) Instrument <- rep(xmlNode("DUMMY"), length(FormNames) ) for (ixForm in 1:length(FormNames)) { FormSequenceID[ixForm] <- rUUID() FormSequence[[ixForm]] <- addNode(xmlNode("d:Sequence", attrs=c(id = FormSequenceID[ixForm], version = CurrentVersion) ), ControlConstructSchemeSequences, DDI ) LastFormSequenceName <- addTextNode("d:ConstructName", paste("Sequence for form ",FormNames[ixForm]), FormSequence[[ixForm]], DDI ) LastFormSequenceLabel <- addTextNode("r:Label", paste("Form ",FormNames[ixForm],". This Sequence orders REDCap Section Headers, Questions, and Field Notes. It includes conditional branches. In redcap a question may be hidden if a logical expression is false"), FormSequence[[ixForm]], DDI) InstrumentID[ixForm] <- rUUID() Instrument[[ixForm]] <- addNode(xmlNode("d:Instrument", attrs=c(id = InstrumentID[ixForm], version = CurrentVersion, agency = agency) ), DataCollection, DDI ) InstrumentName <- addTextNode("d:InstrumentName" , paste("Instrument for form ",FormNames[ixForm]), Instrument[[ixForm]], DDI ) InstrumentSoftware <- addNode(xmlNode("r:Software", attrs=c(id = rUUID()) ), Instrument[[ixForm]], DDI ) InstrumentSoftwareName <- addTextNode("r:Name" , "REDCap Survey Questionnaire", InstrumentSoftware, DDI ) InstrumentSoftwareVersion <- addTextNode("r:Name" , "????", InstrumentSoftware, DDI ) InstrumentSoftwareDescription <- addTextNode("r:Name" , "REDCap Online Survey Questionnaire designer", InstrumentSoftware, DDI ) InstrumentControlConstructReference <- addNode(xmlNode("d:ControlConstructReference" ), Instrument[[ixForm]], DDI ) InstrumentControlConstructReferenceReturned <- addIdentifiableReference(ControlConstructSchemeSequencesID, agency, CurrentVersion, FormSequenceID[ixForm], InstrumentControlConstructReference, DDI ) } # ends for (ixForm in 1:length(FormNames)) ProcessingEventID <- rUUID() CodingID <- rUUID() ProcessingEvent <- addNode(xmlNode("d:ProcessingEvent", attrs=c(id = ProcessingEventID) ), DataCollection, DDI ) OrganizationSchemeID <- rUUID() OrganizationScheme <- addNode(xmlNode("a:OrganizationScheme", attrs=c(id = OrganizationSchemeID, version = CurrentVersion, agency = agency) ), ResourcePackage, DDI) OrganizationSchemeName <- addTextNode("a:OrganizationSchemeName", organizationSchemeNameText, OrganizationScheme,DDI) OrganizationID <- rUUID() Organization <- addNode(xmlNode("a:Organization", attrs=c(id = OrganizationID, version = CurrentVersion) ), OrganizationScheme, DDI ) OrganizationName <- addTextNode("a:OrganizationName", organizationNameText, Organization,DDI) UniverseSchemeID <- rUUID() UniverseScheme <- addNode(xmlNode("c:UniverseScheme", attrs=c(id = UniverseSchemeID, version = CurrentVersion, agency = agency) ), ResourcePackage, DDI ) UniverseSchemeName <- addTextNode("c:UniverseSchemeName" , "Universes for this study", UniverseScheme, DDI ) UniverseID <- rUUID() Universe <- addNode(xmlNode("c:Universe", attrs=c(id = UniverseID, version = CurrentVersion) ),UniverseScheme, DDI ) UniverseName <- addTextNode("c:UniverseName" , "DefaultUniverse", Universe, DDI ) UniverseHumanReadable <- addTextNode("c:HumanReadable", "This is a default universe, included because a UniverseReference is required", Universe, DDI ) QuestionSchemeID <- rUUID() QuestionScheme <- addNode(xmlNode("d:QuestionScheme", attrs=c(id = QuestionSchemeID, version = CurrentVersion, agency = agency) ), ResourcePackage, DDI ) # ----------------------------------------------------------- # StudyUnit # ----------------------------------------------------------- StudyUnitID <- rUUID() StudyUnit <- addNode(xmlNode("s:StudyUnit", attrs=c(id = StudyUnitID, version = CurrentVersion, agency = agency) ), DDIInstance, DDI ) Citation <- addNode(xmlNode("r:Citation"),StudyUnit,DDI) Title <- addTextNode("r:Title", StudyTitle, Citation, DDI ) Abstract <- addNode(xmlNode("s:Abstract",attrs=c(id = rUUID() )), StudyUnit,DDI) AbstractContent <- addNode(xmlNode("r:Content"),Abstract,DDI) UniverseReference <- addNode(xmlNode("r:UniverseReference"),StudyUnit,DDI) UniverseReferenceReturned <- addIdentifiableReference(UniverseSchemeID, agency, CurrentVersion, UniverseID, UniverseReference, DDI ) StudyUnitPurpose <- addNode(xmlNode("s:Purpose",attrs=c(id = rUUID())), StudyUnit,DDI) StudyUnitPurposeContent <- addNode(xmlNode("r:Content"),StudyUnitPurpose,DDI) KindOfData <- addTextNode("s:KindOfData", "survey data", StudyUnit,DDI) EmbargoID <- rUUID() Embargo <- addNode("r:Embargo",attrs=c(id = EmbargoID) ,StudyUnit,DDI) EmbargoName <- addTextNode("r:EmbargoName", "Personally Identifiable Data", Embargo, DDI) EmbargoLabel <- addTextNode("r:Label", "Variable flagged in REDCap to be suppressed for containing personally identifiable data", Embargo, DDI) EmbargoDate <- addNode("r:Date", Embargo, DDI) EmbargoDateStartDate <- addTextNode("r:StartDate", Sys.Date(), EmbargoDate, DDI) EmbargoDateEndDate <- addTextNode("r:EndDate", EmbargoEndDateValue, EmbargoDate, DDI) EmbargoRationale <- addTextNode("r:Rationale", "Persnally identifiable data are not generally open.", Embargo, DDI) EmbargoAgencyOrganizationReference <- addNode(xmlNode("r:AgencyOrganizationReference"), Embargo, DDI ) EmbargoAgencyOrganizationReferenceReturned <- addIdentifiableReference(OrganizationSchemeID, agency, CurrentVersion, OrganizationID, EmbargoAgencyOrganizationReference, DDI ) ConceptualComponent <- addNode(xmlNode("c:ConceptualComponent", attrs=c(id = rUUID(), version = CurrentVersion, agency = agency) ), StudyUnit, DDI ) # -- LogicalProduct -- LogicalProductID <- rUUID() LogicalProduct <- addNode(xmlNode("l:LogicalProduct", attrs=c(id = LogicalProductID, version = CurrentVersion, agency = agency) ), StudyUnit, DDI ) LogicalProductName <- addTextNode("l:LogicalProductName", paste("Logical Product for ",StudyTitle), LogicalProduct, DDI) LogicalProductLabel <- addTextNode("r:Label", paste("Logical Product label for ",StudyTitle), LogicalProduct, DDI) LogicalProductDescription <- addTextNode("r:Description", paste("Logical Product label for ",StudyTitle, " contains references to the Categories, Codes, and Variables described in the ResourcePackage in this DDIInstance"),LogicalProduct, DDI) DataRelationshipID <- rUUID() DataRelationship <- addNode(xmlNode("l:DataRelationship", attrs=c(id = DataRelationshipID, version = CurrentVersion) ), LogicalProduct, DDI ) LogicalRecordID <- rUUID() LogicalRecord <- addNode(xmlNode("l:LogicalRecord", attrs=c(hasLocator="false", id = LogicalRecordID) ), DataRelationship, DDI ) VariablesInRecord <- addNode(xmlNode("l:VariablesInRecord", attrs=c(allVariablesInLogicalProduct="true") ), LogicalRecord, DDI ) # Note references to Code, Category and Variable Scheme references will be added as they are created # ---------------------------------------------------------------------------------------------------------------------------------------------------- # ----------------------------------------------------------- # Optional - Show the empty shell. # ------------------------------------------------------------ # DDI # categories and codes are in DataDictionary$select_choices_or_calculations # REDCap is like SPSS in that questions (variables) do not use a set of codes and categories by reference but # instead it repeats the definition for each question # for the DDI instance, schemes will be used be reference from the variables and questions # For calculation questions (field_type=="calc") there is a formula in select_choices_or_calculations # create a vector with the calculation formula in the appropriate element CalcCol <- as.character(DataDictionary$select_choices_or_calculations) CalcCol[which(DataDictionary$field_type!="calc")] <- "" # The CodeAndCategoriesHash will be used to look up the index number used to # reference the UUID arrays and the split pairs of codes and categories CodeAndCategoriesHash <- hash() # collect the unique code and category definitions. There are some predefined categories: # NOTE: these need to be added to CodeAndCategories first so that their order is known # the first will be the default for yes/no data types # the second definition will be the default for true/false data types # the third will be for check boxes, used for multiple choice and stop action data types # the fourth will be for the completion categories # values for the DataDictionary$select_choices_or_calculations are implied for the four default data types above # adding them explicitly here simplifies the code below by giving them coded types # checkbox fields must be handled differently - the values in the select_choices_or_calculations for them are # essentially variable labels and thier value labels are "0, Unchecked | 1, Checked" # the "0, Incomplete | 1, Unverified | 2, Complete" labels are for the automatic variable "survey_complete" which REDCap adds # to the end of the dataset LabelCol <- as.character(DataDictionary$select_choices_or_calculations) LabelCol[which(DataDictionary$field_type=="yesno")] <- "1, Yes | 0, No" # yesno fileds are automatically labelled this way LabelCol[which(DataDictionary$field_type=="truefalse")] <- "1, True | 0, False" # truefalse fields are automatically labelled this way # add values to the Slider label. Slider runs from 0 to 100 # so 50 is the midpoint LabelCol[which(DataDictionary$field_type=="slider")] <- sub("^([^|]*\\|)([^|]*\\|)", "0, \\1 50, \\2 100, ", LabelCol[which(DataDictionary$field_type=="slider")], perl=TRUE) LabelCol[which(DataDictionary$field_type=="calc")] <- "" # delete the formulas from the calc column categories and labels DataDictionary$select_choices_or_calculations <- factor(LabelCol) # put the updated column categories and labels back in the DataDictionarydata frame # optionally show it # DataDictionary$select_choices_or_calculations # make a unique set with the two default categories taht don't associate with a field_type in the first two positions # so that they can be found if necessary # NOTE: the generation of completion variables depends on # "0, Incomplete | 1, Unverified | 2, Complete" being the second entry CodeAndCategories <- c( "0, Unchecked | 1, Checked", "0, Incomplete | 1, Unverified | 2, Complete") CodeAndCategories <- c(CodeAndCategories, as.character(unique(DataDictionary$select_choices_or_calculations) ) ) CodeAndCategories <- CodeAndCategories[CodeAndCategories != ''] CodeAndCategories <- CodeAndCategories[!is.na(CodeAndCategories)] CodeSchemeID <- sapply(1:length(CodeAndCategories),rUUID) # generate IDs for each of the CodeSchemes in advance CategorySchemeID <- sapply(1:length(CodeAndCategories),rUUID) # generate IDs for each of the CategorySchemes in advance CodingID <- sapply(1:length(DataDictionary$field_type),rUUID) # generate IDs for each of the possible Codings in advance # ----------------------------- # Category Schemes # ----------------------------- # Create one Category Scheme XML Node for each CodeAndCategories pair and add them to the DDI tree # first, create the CategoryScheme list, these elements will be replaced CategoryScheme <- rep(xmlNode("l:CategoryScheme", attrs=c(id = CategorySchemeID[1], version = CurrentVersion, agency = agency) ), length(CodeAndCategories) ) # then for each add it to the tree for (ixCat in 1:length(CodeAndCategories) ) { CategoryScheme[[ixCat]] <- addNode( xmlNode("l:CategoryScheme", attrs=c(id = CategorySchemeID[ixCat], version = CurrentVersion, agency = agency) ), ResourcePackage, DDI ) LastCategorySchemeName <- addTextNode("l:CategorySchemeName", paste("CatScheme",ixCat, sep=''), CategoryScheme[[ixCat]], DDI ) # the LogicalProduct needs a reference to this CategoryScheme LastLPCategorySchemeRef <- addNode(xmlNode("l:CategorySchemeReference"),LogicalProduct,DDI) LastLPCategorySchemeRefReturned <- addSchemeReference(CategorySchemeID[ixCat], agency, CurrentVersion, LastLPCategorySchemeRef, DDI ) } # ends (ixCat in 1:length(CodeAndCategories) ) # ----------------------------- # Code Schemes # ----------------------------- # Create one Code Scheme XML Node for each CodeAndCategories pair and add them to the DDI tree # first, create the CodeScheme vector, this first element will be replaced CodeScheme <- rep(xmlNode("l:CodeScheme", attrs=c(id = CodeSchemeID[1], version = CurrentVersion, agency = agency) ), length(CodeAndCategories) ) # also, create the CodeSchemeReference vector CodeSchemeReference <- rep(xmlNode("r:CodeSchemeReference"), length(CodeAndCategories) ) # then for each add it to the DDI tree # >>>>>>>> if the appropriate field type <<<<<<< for (ixCode in 1:length(CodeAndCategories) ){ CodeScheme[[ixCode]] <- addNode( xmlNode("l:CodeScheme", attrs=c(id = CodeSchemeID[ixCode], version = CurrentVersion, agency = agency) ), ResourcePackage, DDI ) LastCodeSchemeName <- addTextNode("l:CodeSchemeName", paste("CodeScheme",ixCode, sep=''), CodeScheme[[ixCode]], DDI ) # the CodeScheme needs a reference to a CategoryScheme LastCategorySchemeRef <- addNode(xmlNode("l:CategorySchemeReference"),CodeScheme[[ixCode]],DDI) LastCategorySchemeRefReturned <- addSchemeReference(CategorySchemeID[ixCode], agency, CurrentVersion, LastCategorySchemeRef, DDI ) # the LogicalProduct needs a reference to this CodeScheme LastLPCodeSchemeRef <- addNode(xmlNode("l:CodeSchemeReference"),LogicalProduct,DDI) LastLPCodeSchemeRefReturned <- addSchemeReference(CodeSchemeID[ixCode], agency, CurrentVersion, LastLPCodeSchemeRef, DDI ) } # ends for (ixCode in 1:length(CodeAndCategories) ) # ----------------------------- # Variable Scheme # ----------------------------- VariableSchemeID <- rUUID() VariableScheme <- addNode(xmlNode("l:VariableScheme", attrs=c(id = VariableSchemeID, version = CurrentVersion, agency = agency) ), ResourcePackage, DDI ) # the LogicalProduct needs a reference to this VariableScheme LastLPVariableSchemeRef <- addNode(xmlNode("l:VariableSchemeReference"),LogicalProduct,DDI) LastLPVariableSchemeRefReturned <- addSchemeReference(VariableSchemeID, agency, CurrentVersion, LastLPVariableSchemeRef, DDI ) # this hash will allow lookup of variable IDs by the names of variables # to be used when listing the order of variables in the output dataset VariableIDhash <- hash() # ------------------------------------------------------------------------------------ # Hash to find code and category schemes from Column 6 of the exported data dictionary # aka DataDictionary$select_choices_or_calculations # ------------------------------------------------------------------------------------- # this hash allows the lookup of the index number used to reference the ID arrays and the split apirs # by the REDCap select_choices_or_calculations value .set(CodeAndCategoriesHash, CodeAndCategories, 1:length(CodeAndCategories) ) # optionally display the hash # CodeAndCategoriesHash # test extraction # CodeAndCategoriesHash[[ as.character(DataDictionary$select_choices_or_calculations[2]) ]] # ------------------------------------------------------------------- # produce a list of code and category pairs separated by a comma. # Each list element corresponds to a set of code and category schemes # ------------------------------------------------------------------- # CodeAndCategories is a character vector with each element looking like: # "1, Choice One | 2, Choice Two | 3, Choice Three | 4, Choice Four" # or # "1, bad, or worse | 2, good, or better" # Vertical bars separate code/category pairs and the first comma separates # the code from the category label. Note that there category label can # contain a comma. CodeCatPairs<-strsplit(CodeAndCategories,"|", fixed=TRUE) # produce a list of lists. # Each sublist corresponds to a code and a category scheme, # It is a list of code category character vectors # The first elemet of the vector is a code # The second element is the associated category !!! NOTE !!! REDCap allows commas in the category label # Note since labels (categories) can contain commas first change the first comma in CodeCatPairs into a # vertical bar and split on that. for (ixCCP in 1:length(CodeCatPairs) ) { for (ixCCPStr in 1:length(CodeCatPairs[[ixCCP]]) ){ CodeCatPairs[[ixCCP]][ixCCPStr] <- sub("^([^,]*), *", "\\1\\|", CodeCatPairs[[ixCCP]][ixCCPStr], perl=TRUE ) } } # ends (ixCCP in 1:length(CodeCatPairs) ) CodeCatSchemes <- lapply(CodeCatPairs, strsplit, '|', fixed=TRUE) # if the second element of the pair is NA then there were no commas to split on and the list contains # categories in the first element instead of the second # switch them if the first pair has no second element for (ixCCS in 1:length(CodeCatSchemes)){ if(is.na(CodeCatSchemes[[ixCCS]][[1]][2])) for (ixPair in 1:length(CodeCatSchemes[[ixCCS]])){ CodeCatSchemes[[ixCCS]][[ixPair]][2] <- CodeCatSchemes[[ixCCS]][[ixPair]][1] CodeCatSchemes[[ixCCS]][[ixPair]][1] <- NA } } # Examples: # CodeCatSchemes[[1]][[1]][1] # first scheme, first pair, code # CodeCatSchemes[[1]][[1]][2] # first scheme, first pair, category # test- second question, third code # CodeCatSchemes[[CodeAndCategoriesHash[[ as.character(DataDictionary$select_choices_or_calculations[2]) ]] ]] [[3]][1] # CodeSchemeID[CodeAndCategoriesHash[[ as.character(DataDictionary$select_choices_or_calculations[2]) ]] ] # test- second question, third category # CodeCatSchemes[[CodeAndCategoriesHash[[ as.character(DataDictionary$select_choices_or_calculations[2]) ]] ]] [[3]][2] # CategorySchemeID[CodeAndCategoriesHash[[ as.character(DataDictionary$select_choices_or_calculations[2]) ]] ] # ---------------------------------------------------------------------------------------- # Loop through each item in the CodeCatSchemes and fill out the Code and Category Schemes # ---------------------------------------------------------------------------------------- # initialize the CategoryElements List of lists for (ixCCS in 1:length(CodeCatSchemes)){ for (ixPair in 1:length(CodeCatSchemes[[ixCCS]])){ CategoryID <- rUUID() # Category LastCategoryElement <- addNode(xmlNode("l:Category",attrs=c(id = CategoryID, version = CurrentVersion) ), CategoryScheme[[ixCCS]], DDI ) LastCategoryLabel <- addTextNode("r:Label", CodeCatSchemes[[ ixCCS ]] [[ixPair]] [2], LastCategoryElement, DDI ) # Code LastCodeElement <- addNode(xmlNode("l:Code" ), CodeScheme[[ixCCS]], DDI ) LastCodeElementCategoryReference <- addNode(xmlNode("l:CategoryReference" ), LastCodeElement, DDI ) LastCodeElementCategoryReferenceReturned <- addIdentifiableReference(CategorySchemeID[ixCCS], agency, CurrentVersion, CategoryID, LastCodeElementCategoryReference, DDI ) LastCodeValue <- addTextNode("l:Value", CodeCatSchemes[[ ixCCS ]] [[ixPair]] [1], LastCodeElement, DDI ) } # ends ixPair in 1:length(CodeCatSchemes[[ixCCS]]) } # ends ixCCS in 1:length(CodeCatSchemes) # ------------------------------------------------- # Loop through each element in the data dictionary # generate variables and questions # ------------------------------------------------- # initialize some vectors QuestionItemID <- rep("foo", length(DataDictionary[,1])) QuestionItem <- rep(xmlNode("DUMMY"), length(DataDictionary[,1])) QuestionItemName <- rep(xmlNode("DUMMY"), length(DataDictionary[,1])) QuestionItemQuestionText <- rep(xmlNode("DUMMY"), length(DataDictionary[,1])) # QuestionItem contains QuestionText element QuestionItemQuestionTextLiteralText <- rep(xmlNode("DUMMY"), length(DataDictionary[,1])) # QuestionText contains LiteralText element QuestionItemQuestionTextLiteralTextText <- rep(xmlNode("DUMMY"), length(DataDictionary[,1])) # LiteralText contatins Text element QuestionItemResponseDomain <- rep(xmlNode("DUMMY"), length(DataDictionary[,1])) # There may be multiple variables per row of the DataDictionary data Frame VariableID <- rep("foo", length(DataDictionary[,1])) # a dummy array of variable IDs VariableIDList <- as.list(VariableID) # a list of VariableIDs - will contain character vectors of length 1 except for checkboxes SubUniverseID <- rep("NA", length(DataDictionary[,1])) for (ixDD in 1:length(DataDictionary[,1])){ # begin loop through DataDictionary FormNumber <- FormNamesHash[[ as.character(DataDictionary$form_name[ixDD]) ]] # this row is for form FormNumber # ____________________________ # Questions # ---------------------------- QuestionItemID[ixDD] <- rUUID() # generate a Question ID for each row of the data dictionary # do not generate an XML element for "descriptive" rows if (DataDictionary$field_type[ixDD]!="descriptive"){ # ResponseDomain Descriptions all begin with the REDCap Field Type ResponseDomainDescriptionText <- paste("REDCap Field Type: ",DataDictionary$field_type[ixDD], " .") QuestionItem[[ixDD]] <- addNode(xmlNode("d:QuestionItem", attrs=c(id = QuestionItemID[ixDD], version = CurrentVersion) ), QuestionScheme, DDI ) # QuestionItem Name is found in column 1 of the data dictionary "field_name" (see VariableName) QuestionItemName[[ixDD]] <- addTextNode("d:QuestionItemName", DataDictionary[ixDD,1], QuestionItem[[ixDD]], DDI ) # Question Text is found in column 5 of the data dictionary field_label # The column question_number mays also contain a question number for display QuestionItemQuestionText[[ixDD]] <- addNode(xmlNode("d:QuestionText"), QuestionItem[[ixDD]], DDI ) QuestionItemQuestionTextLiteralText[[ixDD]] <- addNode(xmlNode("d:LiteralText"), QuestionItemQuestionText[[ixDD]], DDI ) LastQuestionText <- as.character(DataDictionary[ixDD,"field_label"]) if (!is.na(DataDictionary$question_number[ixDD]) && DataDictionary$question_number[ixDD] != "" ) LastQuestionText <- paste("(", as.character(DataDictionary$question_number[ixDD]), ") ", LastQuestionText) QuestionItemQuestionTextLiteralTextText[[ixDD]] <- addTextNode("d:Text", LastQuestionText, QuestionItemQuestionTextLiteralText[[ixDD]], DDI ) # ------------ Response Domains for Text Fields ------------------- # DataDictionary Column 8 "text_validation_type_or_show_slider_number" contains the data type for date and time fields if ( (DataDictionary$field_type[ixDD]=="text") || (DataDictionary$field_type[ixDD]=="notes") ){ #Date if (substr(DataDictionary[ixDD,"text_validation_type_or_show_slider_number"],start=1, stop=5)=="date_" ){ dateFormat <- substr(DataDictionary[ixDD,"text_validation_type_or_show_slider_number"], start=6, stop=8) QuestionItemResponseDomain[[ixDD]] <- addNode(xmlNode("d:DateTimeDomain", attrs=c(type="Date", format=dateFormat) ), QuestionItem[[ixDD]], DDI ) if(DataDictionary$text_validation_min[ixDD]!="" || DataDictionary$text_validation_max[ixDD]!="" ){ ResponseDomainDescriptionText <- paste(ResponseDomainDescriptionText, " NOTE: This question has the following range restrictions. ") if (DataDictionary$text_validation_min[ixDD]!="" ) ResponseDomainDescriptionText <- paste(ResponseDomainDescriptionText, "It has a minimum value of: ", DataDictionary$text_validation_min[ixDD] , " ." ) if (DataDictionary$text_validation_max[ixDD]!="" ) ResponseDomainDescriptionText <- paste(ResponseDomainDescriptionText, "It has a maximum value of: ", DataDictionary$text_validation_max[ixDD] , " ." ) } # ends (DataDictionary$text_validation_min[ixDD]!="" || DataDictionary$text_validation_max[ixDD]!="" ) } # ends (substr(DataDictionary[ixDD,"text_validation_type_or_show_slider_number"],start=1, stop=5)=="date_" #DateTime if (substr(DataDictionary[ixDD,"text_validation_type_or_show_slider_number"],start=1, stop=9)=="datetime_" ){ dateFormat <- substr(DataDictionary[ixDD,"text_validation_type_or_show_slider_number"], start=10, stop=12) QuestionItemResponseDomain[[ixDD]] <- addNode(xmlNode("d:DateTimeDomain", attrs=c(type="DateTime", format=dateFormat) ), QuestionItem[[ixDD]], DDI ) if(DataDictionary$text_validation_min[ixDD]!="" || DataDictionary$text_validation_max[ixDD]!="" ){ ResponseDomainDescriptionText <- paste(ResponseDomainDescriptionText, " NOTE: This question has the following range restrictions. ") if (DataDictionary$text_validation_min[ixDD]!="" ) ResponseDomainDescriptionText <- paste(ResponseDomainDescriptionText, "It has a minimum value of: ", DataDictionary$text_validation_min[ixDD] , " ." ) if (DataDictionary$text_validation_max[ixDD]!="" ) ResponseDomainDescriptionText <- paste(ResponseDomainDescriptionText, "It has a maximum value of: ", DataDictionary$text_validation_max[ixDD] , " ." ) } # ends (DataDictionary$text_validation_min[ixDD]!="" || DataDictionary$text_validation_max[ixDD]!="" ) } # ends (substr(DataDictionary[ixDD,"text_validation_type_or_show_slider_number"],start=1, stop=9)=="datetime_" ) #Time if (substr(DataDictionary[ixDD,"text_validation_type_or_show_slider_number"],start=1, stop=4)=="time" ){ QuestionItemResponseDomain[[ixDD]] <- addNode(xmlNode("d:DateTimeDomain", attrs=c(type="Time") ), QuestionItem[[ixDD]], DDI ) if(DataDictionary$text_validation_min[ixDD]!="" || DataDictionary$text_validation_max[ixDD]!="" ){ ResponseDomainDescriptionText <- paste(ResponseDomainDescriptionText, " NOTE: This question has the following range restrictions. ") if (DataDictionary$text_validation_min[ixDD]!="" ) ResponseDomainDescriptionText <- paste(ResponseDomainDescriptionText, "It has a minimum value of: ", DataDictionary$text_validation_min[ixDD] , " ." ) if (DataDictionary$text_validation_max[ixDD]!="" ) ResponseDomainDescriptionText <- paste(ResponseDomainDescriptionText, "It has a maximum value of: ", DataDictionary$text_validation_max[ixDD] , " ." ) } # ends (DataDictionary$text_validation_min[ixDD]!="" || DataDictionary$text_validation_max[ixDD]!="" ) } # ends (substr(DataDictionary[ixDD,"text_validation_type_or_show_slider_number"],start=1, stop=4)=="time" ) #Number entered in a text field if (substr(DataDictionary[ixDD,"text_validation_type_or_show_slider_number"],start=1, stop=6)=="number" ){ QuestionItemResponseDomain[[ixDD]] <- addNode(xmlNode("d:NumericDomain", attrs=c(type="Double") ), QuestionItem[[ixDD]], DDI ) if(DataDictionary$text_validation_min[ixDD]!="" || DataDictionary$text_validation_max[ixDD]!="" ){ LastNumberRange <- addNode(xmlNode("r:NumberRange"), QuestionItemResponseDomain[[ixDD]] , DDI ) if (DataDictionary$text_validation_min[ixDD]!="" ) LastNumberRangeLow <- addTextNode("r:Low", DataDictionary$text_validation_min[ixDD], LastNumberRange, DDI ) if (DataDictionary$text_validation_max[ixDD]!="" ) LastNumberRangeHigh <- addTextNode("r:High", DataDictionary$text_validation_max[ixDD], LastNumberRange, DDI ) } # ends (DataDictionary$text_validation_min[ixDD]!="" || DataDictionary$text_validation_max[ixDD]!="" ) } #ends (substr(DataDictionary[ixDD,"text_validation_type_or_show_slider_number"],start=1, stop=6)=="number" ) #Integer entered in a text field if (substr(DataDictionary[ixDD,"text_validation_type_or_show_slider_number"],start=1, stop=7)=="integer" ){ QuestionItemResponseDomain[[ixDD]] <- addNode(xmlNode("d:NumericDomain", attrs=c(type="Integer") ), QuestionItem[[ixDD]], DDI ) if(DataDictionary$text_validation_min[ixDD]!="" || DataDictionary$text_validation_max[ixDD]!="" ){ LastNumberRange <- addNode(xmlNode("r:NumberRange"), QuestionItemResponseDomain[[ixDD]] , DDI ) if (DataDictionary$text_validation_min[ixDD]!="" ) LastNumberRangeLow <- addTextNode("r:Low", DataDictionary$text_validation_min[ixDD], LastNumberRange, DDI ) if (DataDictionary$text_validation_max[ixDD]!="" ) LastNumberRangeHigh <- addTextNode("r:High", DataDictionary$text_validation_max[ixDD], LastNumberRange, DDI ) } # ends (DataDictionary$text_validation_min[ixDD]!="" || DataDictionary$text_validation_max[ixDD]!="" ) } # ends (substr(DataDictionary[ixDD,"text_validation_type_or_show_slider_number"],start=1, stop=7)=="integer" ) #email text field (DDI has no corresponding ResponseDomain - make a note) if (DataDictionary[ixDD,"text_validation_type_or_show_slider_number"]=="email" ){ QuestionItemResponseDomain[[ixDD]] <- addNode(xmlNode("d:TextDomain" ), QuestionItem[[ixDD]], DDI ) ResponseDomainDescriptionText <- paste(ResponseDomainDescriptionText, " NOTE: REDCap validates this field as an email address. ") } #phone text field (DDI has no corresponding ResponseDomain - make a note) if (DataDictionary[ixDD,"text_validation_type_or_show_slider_number"]=="phone" ){ QuestionItemResponseDomain[[ixDD]] <- addNode(xmlNode("d:TextDomain" ), QuestionItem[[ixDD]], DDI ) ResponseDomainDescriptionText <- paste(ResponseDomainDescriptionText, " NOTE: REDCap validates this field as a telephone number. ") } #just text entered in a text field (no validation) if (DataDictionary[ixDD,"text_validation_type_or_show_slider_number"]=="" ){ QuestionItemResponseDomain[[ixDD]] <- addNode(xmlNode("d:TextDomain" ), QuestionItem[[ixDD]], DDI ) } } # ends DataDictionary[ixDD,"field_type"=="text" || (DataDictionary$field_type[ixDD]=="notes" # QuestionItemResponseDomain if ( (DataDictionary$field_type[ixDD]=="radio") || (DataDictionary$field_type[ixDD]=="checkbox") || (DataDictionary$field_type[ixDD]=="dropdown") || (DataDictionary$field_type[ixDD]=="yesno") || (DataDictionary$field_type[ixDD]=="truefalse") ){ # a coded question - checkbox will have multiple variables QuestionItemResponseDomain[[ixDD]] <- addNode(xmlNode("d:CodeDomain", attrs=c( classificationLevel="Nominal") ), QuestionItem[[ixDD]], DDI ) # CodeSchemeReference ixCodeScheme <- CodeAndCategoriesHash[[ as.character(DataDictionary$select_choices_or_calculations[ixDD]) ]] LastCodeSchemeReference <- addNode( xmlNode("r:CodeSchemeReference" ), QuestionItemResponseDomain[[ixDD]], DDI ) LastCodeSchemeReferenceReturned <- addSchemeReference(CodeSchemeID[ixCodeScheme], agency, CurrentVersion, LastCodeSchemeReference, DDI ) if(DataDictionary$field_type[ixDD]=="checkbox") ResponseDomainDescriptionText <- paste(ResponseDomainDescriptionText, " NOTE:This question is defined as a REDCap checkbox.", " Each of the code/category levels in this code scheme will actually generate a separate variable.", " Those variables will be coded with the CodeScheme with the ID: ", CodeSchemeID[ixCodeScheme], " 0=Unchecked, 1=Checked" ) } # ends radio, checkbox, dropdown, yesno, truefalse, checkbox # File data type if (DataDictionary$field_type[ixDD]=="file") { QuestionItemResponseDomain[[ixDD]] <- addNode(xmlNode("d:TextDomain" ), QuestionItem[[ixDD]], DDI ) ResponseDomainDescriptionText <- paste(ResponseDomainDescriptionText, " NOTE: This question allows for upload of a file. The content of the file is presumed to be text. ") } # ends DataDictionary$field_type[ixDD]=="file" # Calc fields are numeric and should have a Coding element placed in ProcesingEvent #Number entered in a text field if ( DataDictionary$field_type[ixDD]=="calc" ){ QuestionItemResponseDomain[[ixDD]] <- addNode(xmlNode("d:NumericDomain", attrs=c(type="Double") ), QuestionItem[[ixDD]], DDI ) } # ends ( DataDictionary$field_type[ixDD]=="calc" ) # ------------ Response Domains for Slider Fields (numeric) ------------------- #slider field - a slider is an odd duck, returning a number, but having ther end and mid points labeled if ( DataDictionary$field_type[ixDD]=="slider" ){ QuestionItemResponseDomain[[ixDD]] <- addNode(xmlNode("d:NumericDomain", attrs=c(type="Integer") ), QuestionItem[[ixDD]], DDI ) LastNumberRange <- addNode(xmlNode("r:NumberRange"), QuestionItemResponseDomain[[ixDD]] , DDI ) LastNumberRangeLow <- addTextNode("r:Low","0",LastNumberRange, DDI ) LastNumberRangeHigh <- addTextNode("r:High","100",LastNumberRange, DDI ) ResponseDomainDescriptionText <- paste(ResponseDomainDescriptionText, " NOTE: This question has a minimum of 0 and a maximum of 100. The end and middle points were labeled as: ", DataDictionary$select_choices_or_calculations[ixDD] ) if (DataDictionary$text_validation_type_or_show_slider_number[ixDD]=="number")ResponseDomainDescriptionText <- paste(ResponseDomainDescriptionText, " This slider question displays the selected number on the survey instrument as the slider is moved.") } # ends ( (DataDictionary$field_type[ixDD]=="slider") ) ) # every question gets at least the field type as a ResponseDomain Description # other notes may be included if(DataDictionary$branching_logic[ixDD] != "")ResponseDomainDescriptionText <- paste(ResponseDomainDescriptionText, "UNIVERSE NOTE: This question was only presented to respondents satisfying the following logical expression: ", DataDictionary$branching_logic[ixDD] ) lastResponseDomainDescription <- addTextNode("r:Description", ResponseDomainDescriptionText, QuestionItemResponseDomain[[ixDD]], DDI) # --------- ControlConstructs for questions and statements -------------------------- # ----------------------------------------------------------------------------------- # branching logic specified - # make a sub-Sequence subSequenceSpecified <- DataDictionary$branching_logic[ixDD] != "" if(subSequenceSpecified){ LastSubSequenceID <- rUUID() LastSubSequence <- addNode(xmlNode("d:Sequence", attrs=c(id=LastSubSequenceID, version=CurrentVersion ) ), ControlConstructSchemeSubSequences, DDI ) LastSubSequenceName <- addTextNode("d:ConstructName", paste("subSequenceForQuestion", ixDD, sep=""), LastSubSequence, DDI ) LastSubSequenceLabel <- addTextNode("r:Label", paste("Sub-Sequence for question: ", ixDD, ", ", DataDictionary[ixDD,1] ), LastSubSequence, DDI ) # make an ifThenElse construct for that sequence LastIfThenElseID <- rUUID() LastIfThenElse <- addNode(xmlNode("d:IfThenElse", attrs=c(id=LastIfThenElseID, version=CurrentVersion ) ), ControlConstructSchemeSubSequences, DDI ) LastIfThenElseName <- addTextNode("d:ConstructName", paste("IFsubSequenceForQuestion", ixDD, sep=""), LastIfThenElse, DDI ) LastIfThenElseLabel <- addTextNode("r:Label", paste("IfThenElse for Sub-Sequence for question: ", ixDD, ", ", DataDictionary[ixDD,1] ), LastIfThenElse, DDI ) LastIfCondition <- addNode(xmlNode("d:IfCondition" ), LastIfThenElse, DDI ) LastIfConditionCode <- addNode(xmlNode("r:Code", attrs=c(programmingLanguage="REDCap")), LastIfCondition, DDI) LastIfConditionCodeText <- addNode(xmlTextNode(DataDictionary$branching_logic[ixDD]), LastIfConditionCode, DDI) # >>>>>>>>>>>>> NOTE: for futire addition - parse the expression and add a SourceQuestionReference for each field referenced <<<<<<<<<< LastIfConditionDescription <- addTextNode("r:Description", "REDCap logical expression. Field Names are in square brackets", LastIfCondition, DDI ) LastThenConstructReference <- addNode(xmlNode("d:ThenConstructReference" ), LastIfThenElse, DDI ) LastThenConstructReferenceReturned <- addIdentifiableReference(ControlConstructSchemeSubSequencesID, agency, CurrentVersion,LastSubSequenceID, LastThenConstructReference, DDI ) } # ends subSequenceSpecified # Section Header if(DataDictionary$section_header[ixDD] != ""){ LastStatementItemID <- rUUID() LastStatementItem <- addNode(xmlNode("d:StatementItem", attrs=c(id=LastStatementItemID, version=CurrentVersion ) ), ControlConstructSchemeObjects, DDI ) LastStatementItemName <- addTextNode("d:ConstructName", paste("SectionHeaderForQuestion", ixDD, sep=""), LastStatementItem, DDI ) LastStatementItemLabel <- addTextNode("r:Label", paste("Section Header preceding question: ", ixDD, ", ", DataDictionary[ixDD,1] ), LastStatementItem, DDI ) LastStatementItemDisplayText <- addNode(xmlNode("d:DisplayText" ), LastStatementItem, DDI ) LastStatementItemDisplayTextLiteralText <- addNode(xmlNode("d:LiteralText" ), LastStatementItemDisplayText, DDI ) LastStatementItemDisplayTextLiteralTextText <- addTextNode("d:Text", as.character(DataDictionary$section_header[ixDD]), LastStatementItemDisplayTextLiteralText, DDI ) # add this either to it's sub-Sequence or the main Sequence if(subSequenceSpecified) LastControlConstructReference <- addNode(xmlNode("d:ControlConstructReference" ), LastSubSequence, DDI ) else LastControlConstructReference <- addNode(xmlNode("d:ControlConstructReference" ), FormSequence[[FormNumber]], DDI ) LastControlConstructReferenceReturned <- addIdentifiableReference(ControlConstructSchemeObjectsID, agency, CurrentVersion, LastStatementItemID, LastControlConstructReference, DDI ) } # ends (DataDictionary$section_header != "") # Question LastQuestionConstructID <- rUUID() LastQuestionConstruct <- addNode(xmlNode("d:QuestionConstruct", attrs=c(id=LastQuestionConstructID, version=CurrentVersion ) ), ControlConstructSchemeObjects, DDI ) LastQuestionConstructName <- addTextNode("d:ConstructName", paste("ControlConstructForQuestion", ixDD, sep=""), LastQuestionConstruct, DDI ) LastQuestionConstructLabel <- addTextNode("r:Label", paste("Question Construct for question: ", ixDD, ", ", DataDictionary[ixDD,1] ), LastQuestionConstruct, DDI ) LastQuestionReference <- addNode(xmlNode("d:QuestionReference" ), LastQuestionConstruct, DDI ) LastQuestionReferenceReturned <- addIdentifiableReference(QuestionSchemeID, agency, CurrentVersion, QuestionItemID[ixDD], LastQuestionReference, DDI ) # add this either to it's sub-Sequence or the main Sequence if(subSequenceSpecified) LastControlConstructReference <- addNode(xmlNode("d:ControlConstructReference" ), LastSubSequence, DDI ) else LastControlConstructReference <- addNode(xmlNode("d:ControlConstructReference" ), FormSequence[[FormNumber]], DDI ) LastControlConstructReferenceReturned <- addIdentifiableReference(ControlConstructSchemeObjectsID, agency, CurrentVersion, LastQuestionConstructID, LastControlConstructReference, DDI ) # Field Note (follows question on the questionnaire if(DataDictionary$field_note[ixDD] != ""){ LastStatementItemID <- rUUID() LastStatementItem <- addNode(xmlNode("d:StatementItem", attrs=c(id=LastStatementItemID, version=CurrentVersion ) ), ControlConstructSchemeObjects, DDI ) LastStatementItemName <- addTextNode("d:ConstructName", paste("FieldNoteForQuestion", ixDD, sep=""), LastStatementItem, DDI ) LastStatementItemLabel <- addTextNode("r:Label", paste("Field Note following question: ", ixDD, ", ", DataDictionary[ixDD,1] ), LastStatementItem, DDI ) LastStatementItemDisplayText <- addNode(xmlNode("d:DisplayText" ), LastStatementItem, DDI ) LastStatementItemDisplayTextLiteralText <- addNode(xmlNode("d:LiteralText" ), LastStatementItemDisplayText, DDI ) LastStatementItemDisplayTextLiteralTextText <- addTextNode("d:Text", as.character(DataDictionary$field_note[ixDD]), LastStatementItemDisplayTextLiteralText, DDI ) # add this either to it's sub-Sequence or the main Sequence if(subSequenceSpecified) LastControlConstructReference <- addNode(xmlNode("d:ControlConstructReference" ), LastSubSequence, DDI ) else LastControlConstructReference <- addNode(xmlNode("d:ControlConstructReference" ), FormSequence[[FormNumber]], DDI ) LastControlConstructReferenceReturned <- addIdentifiableReference(ControlConstructSchemeObjectsID, agency, CurrentVersion, LastStatementItemID, LastControlConstructReference, DDI ) } # ends (DataDictionary$field_note != "") # If a new Sub-Sequence was built add its IfThenElse to the main sequence if(subSequenceSpecified){ LastIfThenElseReference <- addNode(xmlNode("d:ControlConstructReference" ), FormSequence[[FormNumber]], DDI ) LastIfThenElseReferenceReturned <- addIdentifiableReference(ControlConstructSchemeSequencesID, agency, CurrentVersion, LastIfThenElseID, LastIfThenElseReference , DDI ) } # ends subSequenceSpecified } # ends DataDictionary$field_type[ixDD]!="descriptive" else{ # a Descriptive Field generates a StatementItem LastStatementItemID <- rUUID() LastStatementItem <- addNode(xmlNode("d:StatementItem", attrs=c(id=LastStatementItemID, version=CurrentVersion ) ), ControlConstructSchemeObjects, DDI ) LastStatementItemName <- addTextNode("d:ConstructName", paste("REDCapField", ixDD, "Descriptive", sep=""), LastStatementItem, DDI ) LastStatementItemLabel <- addTextNode("r:Label", paste("Descriptive Field in position: ", ixDD, ", ", DataDictionary[ixDD,1] ), LastStatementItem, DDI ) LastStatementItemDisplayText <- addNode(xmlNode("d:DisplayText" ), LastStatementItem, DDI ) LastStatementItemDisplayTextLiteralText <- addNode(xmlNode("d:LiteralText" ), LastStatementItemDisplayText, DDI ) LastStatementItemDisplayTextLiteralTextText <- addTextNode("d:Text", as.character(DataDictionary$field_label[ixDD]), LastStatementItemDisplayTextLiteralText, DDI ) LastControlConstructReference <- addNode(xmlNode("d:ControlConstructReference" ), FormSequence[[FormNumber]], DDI ) LastControlConstructReferenceReturned <- addIdentifiableReference(ControlConstructSchemeObjectsID, agency, CurrentVersion, LastStatementItemID, LastControlConstructReference, DDI ) } # ends else for DataDictionary$field_type[ixDD]!="descriptive" # ____________________________ # Variables # ---------------------------- VariableIDList[[ixDD]] <- rUUID() # generate a variable ID for each row of the data dictionary (the default is one ID per row) # do not generate an XML element for "descriptive" rows if (DataDictionary$field_type[ixDD]!="descriptive" && DataDictionary$field_type[ixDD]!="checkbox" ){ # print(ixDD) LastVariableElement <- addNode(xmlNode("l:Variable", attrs=c(id = VariableIDList[[ixDD]][1], version = CurrentVersion) ), VariableScheme, DDI ) # Variable Name is found in column 1 of the data dictionary "field_name" (see QuestionItemName) LastVariableName <- addTextNode("l:VariableName", DataDictionary[ixDD,1], LastVariableElement, DDI ) # store the entry in the VariableIDhash VariableIDhash[[ DataDictionary[ixDD,1] ]] <- VariableIDList[[ixDD]][1] # Question Text is found in column 5 of the data dictionary field_label (used here as the variable label LastVariableLabel <- addTextNode("r:Label", DataDictionary[ixDD,"field_label"], LastVariableElement, DDI ) # Variable Description - put a note here about the field type that generated this variable, elaborate about slider types VariableDescriptionText <- paste("This variable was collected from a REDCap ", DataDictionary$field_type[ixDD], " question.") # extrema of the variable if any if (DataDictionary$text_validation_min[ixDD]!="" ) VariableDescriptionText <- paste(VariableDescriptionText, "It has a minimum value of: ", DataDictionary$text_validation_min[ixDD] , " ." ) if (DataDictionary$text_validation_max[ixDD]!="" ) VariableDescriptionText <- paste(VariableDescriptionText, "It has a maximum value of: ", DataDictionary$text_validation_max[ixDD] , " ." ) # slider properties if(DataDictionary$field_type[ixDD]=="slider"){ VariableDescriptionText <- paste(VariableDescriptionText, " A slider field returns a numeric value between 0 and 100.") if(DataDictionary$select_choices_or_calculations[ixDD]!=""){ VariableDescriptionText <- paste(VariableDescriptionText, " The following labels were assigned to the end amd mid points of this scale: ", DataDictionary$select_choices_or_calculations[ixDD]) } # ends (DataDictionary$select_choices_or_calculations[ixDD]!="") } # ends (DataDictionary$field_type[ixDD]=="slider") # calc foumula if ( (DataDictionary$field_type[ixDD]=="calc") ){ VariableDescriptionText <- paste(VariableDescriptionText, " This variable is automatically calculated by formula in REDCap.", " The formula can be found in the r:Command element referenced by l:CodingInstructionsReference: ", CalcCol[ixDD]) } # ends if ( (DataDictionary$field_type[ixDD]=="calc") ) if(DataDictionary$required_field[ixDD]=="y") VariableDescriptionText <- paste(VariableDescriptionText, " REQUIRED FIELD NOTE: The question for this variable was flagged as required.") if(DataDictionary$identifier[ixDD]=="y") VariableDescriptionText <- paste(VariableDescriptionText, " IDENTIFIABLE NOTE: This variable has been flagged as containing personally identifiable information") if(DataDictionary$branching_logic[ixDD] != "")VariableDescriptionText <- paste(VariableDescriptionText, "UNIVERSE NOTE: This question was only presented to respondents satisfying the following logical expression: ", DataDictionary$branching_logic[ixDD] ) # add the Variable Description element LastVariableDescription <- addTextNode("r:Description", VariableDescriptionText, LastVariableElement, DDI ) # Variable Sub Universe # "Hidden" variables are only seen by a subset of the overall universe based on the display logic if(DataDictionary$branching_logic[ixDD] != ""){ # create a Universe element SubUniverseID[ixDD] <- rUUID() LastSubUniverse <- addNode(xmlNode("c:SubUniverse", attrs=c(id = SubUniverseID[ixDD], version = CurrentVersion) ),Universe, DDI ) SubUniverseName <- addTextNode("c:UniverseName" , paste("Universe for Question ",ixDD), LastSubUniverse, DDI ) SubUniverseHumanReadable <- addTextNode("c:HumanReadable", paste("Question ", ixDD, ", ", DataDictionary$field_name[ixDD], ", was only presented to respondents satisfying the following logical expression: ", DataDictionary$branching_logic[ixDD] ), LastSubUniverse, DDI ) # point the variable to it LastSubUniverseReference <- addNode(xmlNode("r:UniverseReference" ), LastVariableElement, DDI ) LastSubUniverseReferenceReturned <- addIdentifiableReference(UniverseSchemeID, agency, CurrentVersion, SubUniverseID[ixDD], LastSubUniverseReference, DDI ) } # ends (DataDictionary$branching_logic != "") # QuestionReference LastQuestionReference <- addNode(xmlNode("l:QuestionReference"), LastVariableElement, DDI ) QuestionReferenceReturned <- addIdentifiableReference(QuestionSchemeID, agency, CurrentVersion, QuestionItemID[[ixDD]], LastQuestionReference, DDI ) # add an embargo reference for variables marked as an Identifier if(DataDictionary$identifier[ixDD]=="y"){ EmbargoReference <- addNode(xmlNode("l:EmbargoReference"), LastVariableElement, DDI ) EmbargoReferenceReturned <- addIdentifiableReference(StudyUnitID, agency, CurrentVersion, EmbargoID, EmbargoReference, DDI ) } # ends (DataDictionary$identifier[ixDD]=="y") # vvvvvvvvvvvvvv Variable Representaton vvvvvvvvvvvvvv if ( (DataDictionary$field_type[ixDD]=="radio") || (DataDictionary$field_type[ixDD]=="checkbox") || (DataDictionary$field_type[ixDD]=="dropdown") || (DataDictionary$field_type[ixDD]=="yesno") || (DataDictionary$field_type[ixDD]=="truefalse") ){ # a coded question - checkbox will have multiple variables LastVariableRepresentation <- addNode(xmlNode("l:Representation" ), LastVariableElement, DDI ) LastCodeRepresentation <- addNode( xmlNode("l:CodeRepresentation" ), LastVariableRepresentation, DDI ) LastCodeSchemeReference <- addNode( xmlNode("r:CodeSchemeReference" ), LastCodeRepresentation, DDI ) ixCodeScheme <- CodeAndCategoriesHash[[ as.character(DataDictionary$select_choices_or_calculations[ixDD]) ]] LastCodeSchemeReferenceReturned <- addSchemeReference(CodeSchemeID[ixCodeScheme], agency, CurrentVersion, LastCodeSchemeReference, DDI ) } # ends radio, checkbox, dropdown, yesno, trueFalse, slider if ( (DataDictionary$field_type[ixDD]=="file") ){ LastVariableRepresentation <- addNode(xmlNode("l:Representation" ), LastVariableElement, DDI ) LastTextRepresentation <- addNode( xmlNode("l:TextRepresentation" ), LastVariableRepresentation, DDI ) } # ends (DataDictionary$field_type[ixDD]=="file") # ------------ Representations for Text Fields ------------------- # DataDictionary Column 8 "text_validation_type_or_show_slider_number" contains the data type for date and time fields if ( (DataDictionary$field_type[ixDD]=="text") || (DataDictionary$field_type[ixDD]=="notes") ){ #Date if (substr(DataDictionary[ixDD,"text_validation_type_or_show_slider_number"],start=1, stop=5)=="date_" ){ dateFormat <- substr(DataDictionary[ixDD,"text_validation_type_or_show_slider_number"], start=6, stop=8) LastVariableRepresentation <- addNode(xmlNode("l:Representation" ), LastVariableElement, DDI ) LastDateTimeRepresentation <- addNode( xmlNode("l:DateTimeRepresentation", attrs=c(type="Date") ), LastVariableRepresentation, DDI ) LastGenericOutputFormat <- addTextNode("r:GenericOutputFormat", dateFormat, LastDateTimeRepresentation, DDI) } # ends (substr(DataDictionary[ixDD,"text_validation_type_or_show_slider_number"],start=1, stop=5)=="date_" #DateTime if (substr(DataDictionary[ixDD,"text_validation_type_or_show_slider_number"],start=1, stop=9)=="datetime_" ){ dateFormat <- substr(DataDictionary[ixDD,"text_validation_type_or_show_slider_number"], start=10, stop=12) LastVariableRepresentation <- addNode(xmlNode("l:Representation" ), LastVariableElement, DDI ) LastDateTimeRepresentation <- addNode( xmlNode("l:DateTimeRepresentation", attrs=c(type="DateTime") ), LastVariableRepresentation, DDI ) LastGenericOutputFormat <- addTextNode("r:GenericOutputFormat", dateFormat, LastDateTimeRepresentation, DDI) } # ends (substr(DataDictionary[ixDD,"text_validation_type_or_show_slider_number"],start=1, stop=9)=="datetime_" ) #Time if (substr(DataDictionary[ixDD,"text_validation_type_or_show_slider_number"],start=1, stop=4)=="time" ){ LastVariableRepresentation <- addNode(xmlNode("l:Representation" ), LastVariableElement, DDI ) LastDateTimeRepresentation <- addNode( xmlNode("l:DateTimeRepresentation", attrs=c(type="Time") ), LastVariableRepresentation, DDI ) } # ends (substr(DataDictionary[ixDD,"text_validation_type_or_show_slider_number"],start=1, stop=4)=="time" ) #Number entered in a text field if (substr(DataDictionary[ixDD,"text_validation_type_or_show_slider_number"],start=1, stop=6)=="number" ){ LastVariableRepresentation <- addNode(xmlNode("l:Representation" ), LastVariableElement, DDI ) LastNumericRepresentation <- addNode(xmlNode("l:NumericRepresentation", attrs=c(type="Double") ), LastVariableRepresentation, DDI ) if(DataDictionary$text_validation_min[ixDD]!="" || DataDictionary$text_validation_max[ixDD]!="" ){ LastNumberRange <- addNode(xmlNode("r:NumberRange"), LastNumericRepresentation , DDI ) if (DataDictionary$text_validation_min[ixDD]!="" ) LastNumberRangeLow <- addTextNode("r:Low", DataDictionary$text_validation_min[ixDD], LastNumberRange, DDI ) if (DataDictionary$text_validation_max[ixDD]!="" ) LastNumberRangeHigh <- addTextNode("r:High", DataDictionary$text_validation_max[ixDD], LastNumberRange, DDI ) } # ends (DataDictionary$text_validation_min[ixDD]!="" || DataDictionary$text_validation_max[ixDD]!="" ) } #ends (substr(DataDictionary[ixDD,"text_validation_type_or_show_slider_number"],start=1, stop=6)=="number" ) #Integer entered in a text field if (substr(DataDictionary[ixDD,"text_validation_type_or_show_slider_number"],start=1, stop=7)=="integer" ){ LastVariableRepresentation <- addNode(xmlNode("l:Representation" ), LastVariableElement, DDI ) LastNumericRepresentation <- addNode(xmlNode("l:NumericRepresentation", attrs=c(type="Integer") ), LastVariableRepresentation, DDI ) if(DataDictionary$text_validation_min[ixDD]!="" || DataDictionary$text_validation_max[ixDD]!="" ){ LastNumberRange <- addNode(xmlNode("r:NumberRange"), LastNumericRepresentation , DDI ) if (DataDictionary$text_validation_min[ixDD]!="" ) LastNumberRangeLow <- addTextNode("r:Low", DataDictionary$text_validation_min[ixDD], LastNumberRange, DDI ) if (DataDictionary$text_validation_max[ixDD]!="" ) LastNumberRangeHigh <- addTextNode("r:High", DataDictionary$text_validation_max[ixDD], LastNumberRange, DDI ) } # ends (DataDictionary$text_validation_min[ixDD]!="" || DataDictionary$text_validation_max[ixDD]!="" ) } # ends (substr(DataDictionary[ixDD,"text_validation_type_or_show_slider_number"],start=1, stop=7)=="integer" ) #REDCap email validation type is represented as text if (DataDictionary[ixDD,"text_validation_type_or_show_slider_number"]=="email" ){ LastVariableRepresentation <- addNode(xmlNode("l:Representation" ), LastVariableElement, DDI ) LastTextRepresentation <- addNode( xmlNode("l:TextRepresentation" ), LastVariableRepresentation, DDI ) } #REDCap telephone number validation type is represented as text if (DataDictionary[ixDD,"text_validation_type_or_show_slider_number"]=="phone" ){ LastVariableRepresentation <- addNode(xmlNode("l:Representation" ), LastVariableElement, DDI ) LastTextRepresentation <- addNode( xmlNode("l:TextRepresentation" ), LastVariableRepresentation, DDI ) } #just text entered in a text field (no validation) if (DataDictionary[ixDD,"text_validation_type_or_show_slider_number"]=="" ){ LastVariableRepresentation <- addNode(xmlNode("l:Representation" ), LastVariableElement, DDI ) LastTextRepresentation <- addNode( xmlNode("l:TextRepresentation" ), LastVariableRepresentation, DDI ) } } # ends DataDictionary[ixDD,"field_type"=="text" || (DataDictionary$field_type[ixDD]=="notes" #slider field - a slider is an odd duck, returning a number, but having ther end and mid points labeled if ( DataDictionary$field_type[ixDD]=="slider" ){ LastVariableRepresentation <- addNode(xmlNode("l:Representation" ), LastVariableElement, DDI ) LastNumericRepresentation <- addNode(xmlNode("l:NumericRepresentation", attrs=c(type="Integer") ), LastVariableRepresentation, DDI ) LastNumberRange <- addNode(xmlNode("r:NumberRange"), LastNumericRepresentation , DDI ) LastNumberRangeLow <- addTextNode("r:Low", "0", LastNumberRange, DDI ) LastNumberRangeHigh <- addTextNode("r:High", "0", LastNumberRange, DDI ) } # ends ( (DataDictionary$field_type[ixDD]=="slider") ) ) if ( (DataDictionary$field_type[ixDD]=="calc") ){ # a calculated question - add a coding element to ProcessingEvent and reference it # add the Coding element LastCoding <- addNode(xmlNode("d:Coding", attrs=c(id = CodingID[ixDD]) ), ProcessingEvent, DDI ) LastGenerationInstruction <- addNode(xmlNode("d:GenerationInstruction" ), LastCoding, DDI ) LastGenerationInstructionDescription <-addTextNode("r:Description", "NOTE: This variable is automatically calculated by formula in REDCap. The formula can be found in the r:Command element referenced by l:CodingInstructionsReference", LastGenerationInstruction, DDI ) LastGenerationInstructionCommand <- addNode(xmlNode("r:Command" ), LastGenerationInstruction, DDI ) LastGenerationInstructionCommandText <-addTextNode("r:CommandText", CalcCol[ixDD], LastGenerationInstructionCommand, DDI ) # >>>>>> NOTE: for a future improvement parse the formula and add d:SourceVariable elements here # In REDCap variable names are enclosed in square brackets # reference it LastVariableRepresentation <- addNode(xmlNode("l:Representation" ), LastVariableElement, DDI ) LastCodingInstructionsReference <- addNode(xmlNode("l:CodingInstructionsReference" ), LastVariableRepresentation, DDI ) LastCodingInstructionsReferenceReturned <- addIdentifiableReference(DataCollectionID, agency, CurrentVersion, CodingID[ixDD], LastCodingInstructionsReference, DDI ) LastNumericRepresentation <- addNode(xmlNode("l:NumericRepresentation", attrs=c(type="Double") ), LastVariableRepresentation, DDI ) } # ends (DataDictionary$field_type[ixDD]=="calc") } # ends DataDictionary$field_type[ixDD]!="descriptive" && DataDictionary$field_type[ixDD]!="checkbox" # ------ checkboxes ----- # checkbox questions generate one variable for each item in the questions associated code and category scheme # each variable has a code/category scheme of 0=Unchecked, 1=Checked if (DataDictionary$field_type[ixDD]=="checkbox" ){ ixCheckboxCodeScheme <- CodeAndCategoriesHash[[ as.character(DataDictionary$select_choices_or_calculations[ixDD]) ]] nCheckboxVariables <- length(CodeCatPairs[[ixCheckboxCodeScheme]] ) # generate a list of IDs for this set of variable VariableIDList[[ixDD]] <- replicate(nCheckboxVariables, rUUID()) for(ixVariable in 1:nCheckboxVariables){ LastVariableElement <- addNode(xmlNode("l:Variable", attrs=c(id = VariableIDList[[ixDD]][ixVariable], version = CurrentVersion) ), VariableScheme, DDI ) # Variable Name is found in column 1 of the data dictionary "field_name" (see QuestionItemName) LastVariableNameText <- paste(DataDictionary[ixDD,1],"___", ixVariable, sep="" ) LastVariableName <- addTextNode("l:VariableName", LastVariableNameText, LastVariableElement, DDI ) # store the entry in the VariableIDhash VariableIDhash[[LastVariableNameText]] <- VariableIDList[[ixDD]][ixVariable] # Question Text is found in column 5 of the data dictionary field_label (used here as the variable label) # append the category of the choice to the label LastVariableLabel <- addTextNode("r:Label", paste(DataDictionary[ixDD,"field_label"], "__choice__", CodeCatSchemes[[ixCheckboxCodeScheme]][[ixVariable]][2] ), LastVariableElement, DDI ) # Variable Description - put a note here about the field type that generated this variable, elaborate about slider types VariableDescriptionText <- paste("This variable was collected from a REDCap ", DataDictionary$field_type[ixDD], " question. It will have a value of 1 when the option ", CodeCatSchemes[[ixCheckboxCodeScheme]][[ixVariable]][2], "was selected." ) if(DataDictionary$identifier[ixDD]=="y") VariableDescriptionText <- paste(VariableDescriptionText, "IDENTIFIABLE NOTE: This variable has been flagged as containing personally identifiable information") # add the Variable Description element LastVariableDescription <- addTextNode("r:Description", VariableDescriptionText, LastVariableElement, DDI ) # QuestionReference LastQuestionReference <- addNode(xmlNode("l:QuestionReference"), LastVariableElement, DDI ) QuestionReferenceReturned <- addIdentifiableReference(QuestionSchemeID, agency, CurrentVersion, QuestionItemID[[ixDD]], LastQuestionReference, DDI ) LastVariableRepresentation <- addNode(xmlNode("l:Representation" ), LastVariableElement, DDI ) LastCodeRepresentation <- addNode( xmlNode("l:CodeRepresentation" ), LastVariableRepresentation, DDI ) LastCodeSchemeReference <- addNode( xmlNode("r:CodeSchemeReference" ), LastCodeRepresentation, DDI ) # all of the checkbox variables use the first code scheme (0=unchecked 1=checked) LastCodeSchemeReferenceReturned <- addSchemeReference(CodeSchemeID[1], agency, CurrentVersion, LastCodeSchemeReference, DDI ) } # ends for(ixVariable in 1:nCheckboxVariables) } # ends (DataDictionary$field_type[ixDD]=="checkbox" ) } # end of loop through DataDictionary # ========================= Paradata Variables ===================================================== # REDCap adds several variables which are not part of the instruments, An event name variable and # variables tracking completion or each form, these shoudl always be described # =================================================================================================== # ========================= NOTE =================================================== # there will be one variable at the end of the list of variables for each form with a # name of the form _complete # this is a coded scheme with the CodeScheme with the ID of CodeSchemeID[2} # (which was put in manually when the ode scheme list was built # # use paste(unique(formEventMapping$form_name),"_complete", sep='') to get a list of the # completion variables (paradata). These are all coded using the code scheme # with id CodeSchemeID[2] # add these variables to the VariableScheme and note them as paradata # add the id of these variables to the VariableIDhash hash. # add IDs of the other variables to this hash as they are added to the variable scheme # ==================================================================================== # --------------------------------------------------- # Add the variable "redcap_event_name" # --------------------------------------------------- Var_redcap_event_nameID <- rUUID() LastVariableElement <- addNode(xmlNode("l:Variable", attrs=c(id = Var_redcap_event_nameID, version = CurrentVersion) ), VariableScheme, DDI ) # Variable Name is the form name followed by _complete LastVariableName <- addTextNode("l:VariableName", "redcap_event_name", LastVariableElement, DDI ) # store the entry in the VariableIDhash VariableIDhash[["redcap_event_name"]] <- Var_redcap_event_nameID # Variable Label LastVariableLabel <- addTextNode("r:Label", "Event Name for this observaton", LastVariableElement, DDI ) # Variable Description - VariableDescriptionText <- "This variable contains the name of the event in which this observation was collected" # add the Variable Description element LastVariableDescription <- addTextNode("r:Description", VariableDescriptionText, LastVariableElement, DDI ) # this is a text variable LastVariableRepresentation <- addNode(xmlNode("l:Representation" ), LastVariableElement, DDI ) LastTextRepresentation <- addNode( xmlNode("l:TextRepresentation" ), LastVariableRepresentation, DDI ) # --------------------------------------------------- # Add variables for the completion tracking (paradata) # --------------------------------------------------- # First, create a vector containing the names FormName <- unique(formEventMapping$form_name) CompletionVariableName <- paste(FormName,"_complete", sep='') for (ixCVN in 1:length(CompletionVariableName)){ LastVariableElementID <- rUUID() LastVariableElement <- addNode(xmlNode("l:Variable", attrs=c(id = LastVariableElementID, version = CurrentVersion) ), VariableScheme, DDI ) # Variable Name is the form name followed by _complete LastVariableName <- addTextNode("l:VariableName", CompletionVariableName[ixCVN], LastVariableElement, DDI ) # store the entry in the VariableIDhash VariableIDhash[[CompletionVariableName[ixCVN]]] <- LastVariableElementID # Variable Label LastVariableLabel <- addTextNode("r:Label", paste("completion variable for form: ", FormName[ixCVN]), LastVariableElement, DDI ) # Variable Description - VariableDescriptionText <- paste("This variable indicates the completion status of the form: ", FormName[ixCVN], " with values of 0, Incomplete | 1, Unverified | 2, Complete" ) # add the Variable Description element LastVariableDescription <- addTextNode("r:Description", VariableDescriptionText, LastVariableElement, DDI ) # these variables are all coded with code scheme number 2 (added explicitly in buildign the list of CodeSchemes) LastVariableRepresentation <- addNode(xmlNode("l:Representation" ), LastVariableElement, DDI ) LastCodeRepresentation <- addNode( xmlNode("l:CodeRepresentation" ), LastVariableRepresentation, DDI ) LastCodeSchemeReference <- addNode( xmlNode("r:CodeSchemeReference" ), LastCodeRepresentation, DDI ) LastCodeSchemeReferenceReturned <- addSchemeReference(CodeSchemeID[2], agency, CurrentVersion, LastCodeSchemeReference, DDI ) } # ends for (ixCVN in 1:length(CompletionVariableName)) # ========================= PhysicalDataProduct ===================================================== # The will be optional, in the initial version of this program it will contain # a element. Future version could offer a separate csv file # =================================================================================================== if (IncludeData) { print ("including data") # -- PhysicalDataProduct -- PhysicalDataProductID <- rUUID() PhysicalDataProduct <- addNode(xmlNode("p:PhysicalDataProduct", attrs=c(id = PhysicalDataProductID, version = CurrentVersion, agency = agency) ), StudyUnit, DDI ) PhysicalStructureSchemeID <- rUUID() PhysicalStructureScheme <- addNode(xmlNode("p:PhysicalStructureScheme", attrs=c(id = PhysicalStructureSchemeID, version = CurrentVersion, agency = agency) ), PhysicalDataProduct, DDI ) PhysicalStructureID <- rUUID() PhysicalStructure <- addNode(xmlNode("p:PhysicalStructure", attrs=c(id = rUUID(), version = CurrentVersion) ), PhysicalStructureScheme, DDI ) LogicalProductRef <- addNode(xmlNode("p:LogicalProductReference"),PhysicalStructure,DDI) LogicalProductRefReturned <- addSchemeReference(LogicalProductID, agency, CurrentVersion, LogicalProductRef, DDI ) GrossRecordStructure <- addNode(xmlNode("p:GrossRecordStructure", attrs=c(id = rUUID()) ), PhysicalStructure, DDI ) LogicalRecordReference <- addNode(xmlNode("p:LogicalRecordReference" ), GrossRecordStructure, DDI ) LogicalRecordReferenceReturned <- addIdentifiableReference(LogicalProductID, agency, CurrentVersion, LogicalRecordID, LogicalRecordReference, DDI ) PhysicalRecordSegmentID <- rUUID() PhysicalRecordSegment <- addNode(xmlNode("p:PhysicalRecordSegment", attrs=c(id = PhysicalDataProductID) ), GrossRecordStructure, DDI ) PhysicalRecordSegmentUserID <- addNode(xmlNode("r:UserID", attrs=c(type="REDCapExport") ), PhysicalRecordSegment, DDI ) UserIDText <- addNode(xmlTextNode("Segment1"), PhysicalRecordSegmentUserID, DDI) RecordLayoutScheme <- addNode(xmlNode("p:RecordLayoutScheme", attrs=c(id = PhysicalRecordSegmentID, version = CurrentVersion, agency = agency) ), PhysicalDataProduct, DDI ) DataSet <- addNode(xmlNode("ds:DataSet", attrs=c(id = rUUID(), version = CurrentVersion) ), RecordLayoutScheme, DDI ) PhysicalStructureReference <- addNode(xmlNode("p:PhysicalStructureReference" ), DataSet, DDI ) PhysicalStructureReferenceReturned <- addIdentifiableReference(PhysicalStructureSchemeID, agency, CurrentVersion, PhysicalStructureID, PhysicalStructureReference, DDI ) PhysicalRecordSegmentUsed <- addTextNode("p:PhysicalRecordSegmentUsed", "Segment1", PhysicalStructureReference, DDI) NameOfDataSet <- addTextNode("ds:Name", paste("Exported Dataset for study ", StudyTitle), DataSet, DDI) RecordSet <- addNode(xmlNode("ds:RecordSet" ), DataSet, DDI ) VariableOrder <- addNode(xmlNode("ds:VariableOrder" ), RecordSet, DDI ) # NOTE This will contain a sequence of ds:VariableReference elements # the ds:VariableOrder element determines the order of the ds:Value elements in each ds:Record element # ========================= NOTE =================================================== # use names(Records_flat) to get the sequence of variables in Records_flat and # put that sequence into the element in the # ==================================================================================== # loop through names(Records_flat) and add a reference to the VariableOrder VarName <- names(Records_flat) for (ixVname in 1:length(VarName)){ # -- Add a VariableReference for this variable to the VariableOrder LastVariableReference <- addNode(xmlNode("ds:VariableReference" ), VariableOrder, DDI ) LastVariableReferenceReturned <- addIdentifiableReference(VariableSchemeID, agency, CurrentVersion, VariableIDhash[[VarName[ixVname] ]], LastVariableReference, DDI ) } # ends for (ixVname in 1:length(VarNames)) # Add the data in Records_flat as a sequence of elements in the RecordSet # each element contains a sequence of elements for (ixRow in 1:length(Records_flat[,1]) ){ LastRecord <- addNode(xmlNode("ds:Record"), RecordSet, DDI ) for (ixValue in 1:length(VarName)){ LastValue <- addTextNode("ds:Value", Records_flat[ixRow,ixValue], LastRecord, DDI ) } # ends for (ixValue in 1:length(VarName)) } # ends for (ixR in 1:length(Records_flat[,1]) ) } # ends if (IncludeData) # ----------------------------- # Output # ----------------------------- # DDI # saveXML does not seem to work for xmlHASH!! #saveXML(DDI,file=DDIinstanceFile, prefix = '\n') # working alternative, write the XML to a file sink(DDIinstanceFile) write('',stdout()) print(DDIInstance) sink() # at end clear(CodeAndCategoriesHash) # and rm(CodeAndCategoriesHash) if (EchoDDI) print(DDI) return(DDIInstance) } # ends else of (secret_token=="" || DDIinstanceFile=="") } # ends functiondefinintion REDCapToDDI DDIInstance <- REDCapAPI_ToDDI(secret_token = '325347F271F375292924CF3EFA4CBAC4', DDIinstanceFile="C:\\DDrive\\projects\\various\\DDI-Naddi\\REDCAP_HoyleAnd_VanRoekel\\R\\outputs\\DDIfromREDCap_NADDIexample.xml", EchoDDI=FALSE, IncludeData=TRUE ) # DDIInstance