<?xml version="1.0" encoding="UTF-8"?>
<codeBook version="1.2.2" ID="AHRI.PANGEA1.Data.2019.v1" xml-lang="en" xmlns="http://www.icpsr.umich.edu/DDI" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.icpsr.umich.edu/DDI http://www.icpsr.umich.edu/DDI/Version1-2-2.xsd">
<docDscr>
  <citation>
    <titlStmt>
      <IDNo>DDI.AHRI.PANGEA1.Data.2019.v1</IDNo>
    </titlStmt>
    <prodStmt>
      <producer abbr="AHRI" affiliation="" role="">Africa Health Research Institute</producer>
      <prodDate date="">
        <_value></_value>
      </prodDate>
      <software version="v5">NADA</software>
    </prodStmt>
    <verStmt>
      <version></version>
    </verStmt>
  </citation>
</docDscr>
<stdyDscr>
  <citation>
    <titlStmt>
      <titl>PANGEA1 Demographic and Clinical Data.2019.v1</titl>
      <subTitl/>
      <altTitl/>
      <parTitl/>
      <IDNo>AHRI.PANGEA1.Data.2019.v1</IDNo>
    </titlStmt>
    <rspStmt>
      <AuthEnty affiliation="Africa Health Research Institute">Pillay, Deenan</AuthEnty>
      <AuthEnty affiliation=""/>
      <AuthEnty affiliation=""/>
      <othId role="Cleaned, aligned and help analyse the sequence data." affiliation="Africa Health Research Insitute" email="">
        <p>Derache, Anne</p>
      </othId>
      <othId role="" affiliation="" email="">
        <p/>
      </othId>
      <othId role="" affiliation="" email="">
        <p/>
      </othId>
      <othId role="" affiliation="" email="">
        <p/>
      </othId>
      <othId role="" affiliation="" email="">
        <p/>
      </othId>
    </rspStmt>
    <prodStmt>
      <producer abbr="" affiliation="" role="">Africa Health Research Institute</producer>
      <copyright/>
      <software version="5.0" date="2023-03-12">NADA</software>
      <fundAg abbr="BMGF" role="Genotyping funding source">PANGEA consortium, funded by the Bill &amp; Melinda Gates Foundation</fundAg>
      <grantNo/>
    </prodStmt>
    <distStmt>
      <depDate date=""/>
      <distDate date=""/>
    </distStmt>
    <serStmt>
      <serName/>
      <serInfo><![CDATA[]]></serInfo>
    </serStmt>
    <verStmt>
      <version date="">V1.0.0</version>
      <verResp/>
      <notes><![CDATA[]]></notes>
    </verStmt>
    <biblCit format=""><![CDATA[]]></biblCit>
    <notes><![CDATA[]]></notes>
  </citation>
  <stdyInfo>
    <studyBudget><![CDATA[]]></studyBudget>
    <subject>
      <keyword vocab="Africa Health Research Institute" vocabURI="AHRI">HIV full-length sequences; date of sampling; phylodynamics, phylogeny, HIV-1, ART history</keyword>
      <topcClas vocab="Africa Health Research Institute" vocabURI="AHRI">HIV-1; Incidence; Phylogeny;  Epidemics;  Population Surveillance;  Rural Population; HIV Infections; Africa</topcClas>
    </subject>
    <abstract><![CDATA[PANGEA is an international consortium of researchers based in Africa, the US and the UK studying transmission dynamics in HIV epidemics in sub-Saharan Africa. The goal of the consortium is to analyse HIV phylogenetic and demographic data to identify individual and population-level factors that drive the epidemic, analyse the dynamics of the epidemic and translate these findings into information that can be used to more effectively target interventions. PANGEA encompasses four analysis themes: (1) molecular epidemiology and mathematical modelling, (2) phylodynamics, (3) mobility and migration, and (4) clinical science, drug resistance and ethics. Major variables within the dataset to answer this question are: (1) the genotype; (2) the participant&#039;s geolocation from which the genotype was derived; and (3) the date of sampling, (4) all relevant clinical data and (5) all relevant demographic data.]]></abstract>
    <sumDscr>
      <collDate date="2010-01-01" event="start" cycle=""/>
      <collDate date="2016-12-04" event="end" cycle=""/>
      <nation abbr="ZA">South Africa</nation>
      <geogCover>South Africa</geogCover>
      <geogUnit/>
      <anlyUnit><![CDATA[Each sequences derived from a single specimen, but some sequences may have been repeated on the same specimen, or on another time point, depending on the quality of the sequence.]]></anlyUnit>
      <universe><![CDATA[HIV genome extracted and sequenced from participants infected with HIV]]></universe>
      <dataKind>Clinical and Demographic data</dataKind>
    </sumDscr>
    <qualityStatement>
      <standardsCompliance>
        <standard>
          <standardName/>
          <producer/>
        </standard>
        <complianceDescription/>
      </standardsCompliance>
      <otherQualityStatement/>
    </qualityStatement>
    <notes><![CDATA[]]></notes>
    <exPostEvaluation completionDate="" type="">
      <evaluationProcess/>
      <outcomes/>
    </exPostEvaluation>
  </stdyInfo>
  <method>
    <dataColl>
      <timeMeth/>
      <frequenc/>
      <sampProc><![CDATA[HIV positive individuals within the demographic surveillance area of the Africa Health Research Insititue from 2010 to 2016. Full-length HIV deep sequencing was attempted on samples that:
A) Had a blood sample taken (not DBS)
B) Had a viral load &gt;1000 copies/ml
C) The laboratory got approximately 60% success rate on sequencing them, they should have the list that they attempt to sequence and the ones that were successful.]]></sampProc>
      <sampleFrame>
        <sampleFrameName/>
        <custodian/>
        <universe/>
        <frameUnit isPrimary="">
          <unitType numberOfUnits=""/>
        </frameUnit>
        <updateProcedure/>
      </sampleFrame>
      <deviat/>
      <resInstru><![CDATA[]]></resInstru>
      <instrumentDevelopment type=""/>
      <collSitu><![CDATA[]]></collSitu>
      <actMin><![CDATA[]]></actMin>
      <ConOps><![CDATA[]]></ConOps>
      <weight><![CDATA[]]></weight>
      <cleanOps><![CDATA[Sequences were generated by the Durban based laboratory of AHRI. Sequences were aligned with one another in ClustalW and are presented in the standard fasta file format.]]></cleanOps>
    </dataColl>
    <notes><![CDATA[]]></notes>
    <anlyInfo>
      <respRate><![CDATA[]]></respRate>
      <EstSmpErr><![CDATA[]]></EstSmpErr>
      <dataAppr><![CDATA[]]></dataAppr>
    </anlyInfo>
    <stdyClas><![CDATA[]]></stdyClas>
    <dataProcessing type=""/>
    <codingInstructions relatedProcesses="" type="">
      <txt/>
      <command formalLanguage=""/>
    </codingInstructions>
  </method>
  <dataAccs>
    <setAvail>
      <accsPlac URI=""/>
      <origArch/>
      <avlStatus/>
      <collSize/>
      <complete/>
      <fileQnty/>
      <notes><![CDATA[]]></notes>
    </setAvail>
    <useStmt>
      <restrctn/>
      <citReq><![CDATA[]]></citReq>
      <deposReq><![CDATA[]]></deposReq>
      <conditions><![CDATA[1. The representative of the Receiving Organization agrees to comply with the following conditions:
2. Access to the restricted data will be limited to the Lead Researcher and other members of the research team listed in this request.
3. Copies of the restricted data or any data created on the basis of the original data will not be copied or made available to anyone other than those mentioned in this Data Access Agreement, unless formally authorized by the Data Archive.
4. The data will only be processed for the stated statistical and research purpose. They will be used for solely for reporting of aggregated information, and not for investigation of specific individuals or organizations. Data will not in any way be used for any administrative, proprietary or law enforcement purposes. 
5. The Lead Researcher must state if it is their intention to match the restricted microdata with any other micro-dataset. If any matching is to take place, details must be provided of the datasets to be matched and of the reasons for the matching. Any datasets created as a result of matching will be considered to be      restricted and must comply with the terms of this Data Access Agreement.
6. The Lead Researcher undertakes that no attempt will be made to identify any individual person, family, business, enterprise or organization. If such a unique disclosure is made inadvertently, no use will be made of the identity of any person or establishment discovered and full details will be reported to the Data Archive. The identification will not be revealed to any other person not included in the Data Access Agreement.
7. The Lead Researcher will implement security measures to prevent unauthorized access to licensed microdata acquired from the Data Archive. The microdata must be destroyed upon the completion of this research, unless the Data Archive obtains satisfactory guarantee that the data can be secured and provides written authorization to the Receiving Organization to retain them. Destruction of the microdata will be confirmed in writing by the Lead Researcher to the Data Archive.
8. Any books, articles, conference papers, theses, dissertations, reports, or other publications that employ data obtained from the Data Archive will cite the source of data in accordance with the citation requirement provided with the dataset.
9. An electronic copy of all reports and publications based on the requested data will be sent to the Data Archive.
10. The original collector of the data, the Data Archive, and the relevant funding agencies bear no responsibility for use of the data or for interpretations or inferences based upon such uses.
11. This agreement will come into force on the date that approval is given for access to the restricted dataset and remain in force until the completion date of the project or an earlier date if the project is completed ahead of time.
If there are any changes to the project specification, security arrangements, personnel or organization detailed in this application form, it is the responsibility of the Lead Researcher to seek the agreement of the Data Archive to these changes. Where there is a change to the employer organization of the Lead Researcher this will involve a new application being made and termination of the original project.
12. Breaches of the agreement will be taken seriously and the Data Archive will take action against those responsible for the lapse if willful or accidental. Failure to comply with the directions of the Data Archive will be deemed to be a major breach of the agreement and may involve recourse to legal proceedings. The Data Archive will maintain and share with partner data archives a register of those individuals and organizations which are responsible for breaching the terms of the Data Access Agreement and will impose sanctions on release of future data to these parties.]]></conditions>
      <disclaimer><![CDATA[]]></disclaimer>
    </useStmt>
    <notes><![CDATA[]]></notes>
  </dataAccs>
  <notes><![CDATA[]]></notes>
</stdyDscr>
<fileDscr ID="F6">
  <fileTxt>
    <fileName>AHRI.PANGEA1.ART.2019.v1</fileName>
    <fileCont></fileCont>
    <dimensns>
      <caseQnty>71421</caseQnty>
      <varQnty>7</varQnty>
    </dimensns>
    <dataChck></dataChck>
    <dataMsng></dataMsng>
    <verStmt>
      <version></version>
    </verStmt>
  </fileTxt>
  <notes></notes>
</fileDscr>
<fileDscr ID="F7">
  <fileTxt>
    <fileName>AHRI.PANGEA1.INDIVIDUALS.2019.v1</fileName>
    <fileCont></fileCont>
    <dimensns>
      <caseQnty>3890</caseQnty>
      <varQnty>15</varQnty>
    </dimensns>
    <dataChck></dataChck>
    <dataMsng></dataMsng>
    <verStmt>
      <version></version>
    </verStmt>
  </fileTxt>
  <notes></notes>
</fileDscr>
<fileDscr ID="F8">
  <fileTxt>
    <fileName>AHRI.PANGEA1.LAB.2019.v1</fileName>
    <fileCont></fileCont>
    <dimensns>
      <caseQnty>19197</caseQnty>
      <varQnty>6</varQnty>
    </dimensns>
    <dataChck></dataChck>
    <dataMsng></dataMsng>
    <verStmt>
      <version></version>
    </verStmt>
  </fileTxt>
  <notes></notes>
</fileDscr>
<dataDscr>
<var ID="V116" name="PangeaId" files="F6" intrvl="discrete">
  <varFormat type="character"/>
  <location width="20"/>
  <labl>Pangea unique Identifier of Individual</labl>
  <sumStat type="vald">71421</sumStat>
  <sumStat type="invd"/>
</var>
<var ID="V117" name="ARTStartDate" files="F6" intrvl="discrete">
  <varFormat type="character" formatname="Nesstar.date"/>
  <location width="11"/>
  <labl>Date when antiretroviral therapy was initiated</labl>
  <sumStat type="vald">70901</sumStat>
  <sumStat type="min">1928-04-24</sumStat>
  <sumStat type="max">2017-05-29</sumStat>
</var>
<var ID="V118" name="ARTEncounterDate" files="F6" intrvl="discrete">
  <varFormat type="character" formatname="Nesstar.date"/>
  <location width="11"/>
  <labl>ART encounter or visit date</labl>
  <sumStat type="vald">70901</sumStat>
  <sumStat type="min">1928-04-24</sumStat>
  <sumStat type="max">2018-02-14</sumStat>
</var>
<var ID="V119" name="ARTRegimen" files="F6" intrvl="discrete">
  <varFormat type="character"/>
  <location width="244"/>
  <labl>ART regimen</labl>
  <sumStat type="vald">63949</sumStat>
</var>
<var ID="V120" name="ARTEndEventDate" files="F6" intrvl="discrete">
  <varFormat type="character" formatname="Nesstar.date"/>
  <location width="11"/>
  <labl>Date of ART end event</labl>
  <sumStat type="vald">6440</sumStat>
  <sumStat type="min">2006-01-05</sumStat>
  <sumStat type="max">2018-08-07</sumStat>
</var>
<var ID="V121" name="ARTEndEventReasons" files="F6" intrvl="discrete">
  <varFormat type="character"/>
  <location width="244"/>
  <labl>Reasons for ART end event occuring</labl>
  <sumStat type="vald">93</sumStat>
  <catgry>
    <catValu>Entry Required</catValu>
    <catStat type="vald"/>
  </catgry>
  <catgry>
    <catValu>Lipodystrophy</catValu>
  </catgry>
  <catgry>
    <catValu>Other</catValu>
  </catgry>
  <catgry>
    <catValu>Policy change</catValu>
  </catgry>
  <catgry>
    <catValu>Poor adherence</catValu>
  </catgry>
  <catgry>
    <catValu>Renal impairment</catValu>
  </catgry>
  <catgry>
    <catValu>TasP trial switch to Atripla</catValu>
  </catgry>
  <catgry>
    <catValu>Virological failure</catValu>
  </catgry>
</var>
<var ID="V122" name="ARTEndEventType" files="F6" intrvl="discrete">
  <varFormat type="numeric"/>
  <location width="12"/>
  <labl>ARTEndEventType</labl>
  <sumStat type="vald">5874</sumStat>
  <sumStat type="invd">65547</sumStat>
  <catgry>
    <catValu>1</catValu>
    <labl>Care changed</labl>
    <catStat type="vald"/>
  </catgry>
  <catgry>
    <catValu>2</catValu>
    <labl>Care interrupted</labl>
    <catStat type="invd"/>
  </catgry>
  <catgry>
    <catValu>3</catValu>
    <labl>Death</labl>
  </catgry>
  <catgry>
    <catValu>4</catValu>
    <labl>Transfer out</labl>
  </catgry>
  <catgry>
    <catValu>5</catValu>
    <labl>Lost to follow up</labl>
  </catgry>
  <catgry>
    <catValu>Sysmiss</catValu>
  </catgry>
</var>
<var ID="V123" name="PangeaId" files="F7" intrvl="discrete">
  <varFormat type="character"/>
  <location width="244"/>
  <labl>Pangea unique Identifier of Individual</labl>
  <sumStat type="vald">3890</sumStat>
</var>
<var ID="V124" name="SampleId" files="F7" intrvl="discrete">
  <varFormat type="character"/>
  <location width="244"/>
  <labl>Sample identifier</labl>
  <sumStat type="vald">3890</sumStat>
</var>
<var ID="V125" name="SampleSource" files="F7" intrvl="discrete">
  <varFormat type="numeric"/>
  <location width="12"/>
  <labl>A study a sample was taken from</labl>
  <sumStat type="vald">3890</sumStat>
  <sumStat type="invd"/>
  <catgry>
    <catValu>1</catValu>
    <catStat type="vald"/>
  </catgry>
  <catgry>
    <catValu>2</catValu>
    <catStat type="invd"/>
  </catgry>
  <catgry>
    <catValu>3</catValu>
  </catgry>
  <catgry>
    <catValu>4</catValu>
  </catgry>
</var>
<var ID="V126" name="Source" files="F7" intrvl="discrete">
  <varFormat type="character"/>
  <location width="244"/>
  <sumStat type="vald">3890</sumStat>
  <catgry>
    <catValu>ACDIS</catValu>
    <catStat type="vald"/>
  </catgry>
  <catgry>
    <catValu>CC</catValu>
  </catgry>
  <catgry>
    <catValu>RES</catValu>
  </catgry>
  <catgry>
    <catValu>TasP</catValu>
  </catgry>
</var>
<var ID="V127" name="DoB" files="F7" intrvl="discrete">
  <varFormat type="character" formatname="Nesstar.date"/>
  <location width="11"/>
  <labl>Individual's Date of Birth</labl>
  <sumStat type="vald">3890</sumStat>
  <sumStat type="min">1901-01-01</sumStat>
  <sumStat type="max">1999-11-16</sumStat>
</var>
<var ID="V128" name="Age" files="F7" intrvl="contin">
  <varFormat type="numeric"/>
  <location width="12"/>
  <labl>Individual's age</labl>
  <sumStat type="vald">3890</sumStat>
  <sumStat type="invd"/>
  <sumStat type="min">14</sumStat>
  <sumStat type="max">114</sumStat>
  <sumStat type="mean">36.354</sumStat>
  <sumStat type="stdev">12.299</sumStat>
</var>
<var ID="V129" name="Sex" files="F7" intrvl="discrete">
  <varFormat type="numeric"/>
  <location width="12"/>
  <labl>Gender</labl>
  <sumStat type="vald">3889</sumStat>
  <sumStat type="invd">1</sumStat>
  <catgry>
    <catValu>1</catValu>
    <labl>Male</labl>
    <catStat type="vald"/>
  </catgry>
  <catgry>
    <catValu>2</catValu>
    <labl>Female</labl>
    <catStat type="invd"/>
  </catgry>
  <catgry>
    <catValu>9</catValu>
    <labl>Unknown</labl>
  </catgry>
  <catgry>
    <catValu>Sysmiss</catValu>
  </catgry>
</var>
<var ID="V130" name="SampleDate" files="F7" intrvl="discrete">
  <varFormat type="character" formatname="Nesstar.date"/>
  <location width="11"/>
  <labl>Date when sample was taken</labl>
  <sumStat type="vald">3890</sumStat>
  <sumStat type="min">2010-12-14</sumStat>
  <sumStat type="max">2016-09-14</sumStat>
</var>
<var ID="V131" name="ACDIS_Id" files="F7" intrvl="contin">
  <varFormat type="numeric"/>
  <location width="12"/>
  <labl>Surveillance unique Identifier of Individual</labl>
  <sumStat type="vald">2736</sumStat>
  <sumStat type="invd">1154</sumStat>
  <sumStat type="min">84</sumStat>
  <sumStat type="max">218561</sumStat>
</var>
<var ID="V132" name="ARTEMIS_Id" files="F7" intrvl="contin">
  <varFormat type="numeric"/>
  <location width="12"/>
  <labl>ARTemis unique Identifier of Individual</labl>
  <sumStat type="vald">1343</sumStat>
  <sumStat type="invd">2547</sumStat>
  <sumStat type="min">41</sumStat>
  <sumStat type="max">200695</sumStat>
</var>
<var ID="V133" name="ACCDB_Id" files="F7" intrvl="contin">
  <varFormat type="numeric"/>
  <location width="12"/>
  <labl>Tier.Net unique Identifier of Individual</labl>
  <sumStat type="vald">2574</sumStat>
  <sumStat type="invd">1316</sumStat>
  <sumStat type="min">4</sumStat>
  <sumStat type="max">56761</sumStat>
</var>
<var ID="V134" name="TasP_Id" files="F7" intrvl="contin">
  <varFormat type="numeric"/>
  <location width="12"/>
  <labl>TasP unique Identifier of Individual</labl>
  <sumStat type="vald">2538</sumStat>
  <sumStat type="invd">1352</sumStat>
  <sumStat type="min">9</sumStat>
  <sumStat type="max">100102</sumStat>
  <sumStat type="mean">11738.361</sumStat>
  <sumStat type="stdev">8344.069</sumStat>
</var>
<var ID="V135" name="LatestNegative" files="F7" intrvl="discrete">
  <varFormat type="character" formatname="Nesstar.date"/>
  <location width="11"/>
  <labl>Date of last negative</labl>
  <sumStat type="vald">304</sumStat>
  <sumStat type="min">2003-06-26</sumStat>
  <sumStat type="max">2017-11-22</sumStat>
</var>
<var ID="V136" name="EarliestPositive" files="F7" intrvl="discrete">
  <varFormat type="character" formatname="Nesstar.date"/>
  <location width="11"/>
  <labl>Date of earliest positive</labl>
  <sumStat type="vald">3879</sumStat>
  <sumStat type="min">2002-11-25</sumStat>
  <sumStat type="max">2017-09-12</sumStat>
</var>
<var ID="V137" name="EarliestKnownART" files="F7" intrvl="discrete">
  <varFormat type="character" formatname="Nesstar.date"/>
  <location width="11"/>
  <labl>Date of earliest known antiretroviral therapy</labl>
  <sumStat type="vald">3585</sumStat>
  <sumStat type="min">1928-04-24</sumStat>
  <sumStat type="max">2017-03-01</sumStat>
</var>
<var ID="V138" name="PangeaId" files="F8" intrvl="discrete">
  <varFormat type="character"/>
  <location width="20"/>
  <labl>Pangea unique Identifier of Individual</labl>
  <sumStat type="vald">19197</sumStat>
  <sumStat type="invd"/>
</var>
<var ID="V139" name="SampleId" files="F8" intrvl="discrete">
  <varFormat type="character"/>
  <location width="20"/>
  <labl>Sample identifier</labl>
  <sumStat type="vald">19197</sumStat>
  <sumStat type="invd"/>
</var>
<var ID="V140" name="ResultDate" files="F8" intrvl="discrete">
  <varFormat type="character" formatname="Nesstar.date"/>
  <location width="11"/>
  <labl>Date when sample was processed</labl>
  <sumStat type="vald">19197</sumStat>
  <sumStat type="min">2003-02-03</sumStat>
  <sumStat type="max">2017-08-10</sumStat>
</var>
<var ID="V141" name="CD4Count" files="F8" intrvl="contin">
  <varFormat type="numeric"/>
  <location width="10"/>
  <labl>CD4 Count value</labl>
  <sumStat type="vald">13822</sumStat>
  <sumStat type="invd">5375</sumStat>
  <sumStat type="min"/>
  <sumStat type="max">4495</sumStat>
  <catgry>
    <catValu>10001</catValu>
    <labl>..</labl>
    <catStat type="vald"/>
  </catgry>
</var>
<var ID="V142" name="CD4Percentage" files="F8" intrvl="contin">
  <varFormat type="numeric"/>
  <location width="10"/>
  <labl>CD4 Percentage value</labl>
  <sumStat type="vald">4934</sumStat>
  <sumStat type="invd">14263</sumStat>
  <sumStat type="min"/>
  <sumStat type="max">66</sumStat>
  <catgry>
    <catValu>101</catValu>
    <labl>..</labl>
    <catStat type="vald"/>
  </catgry>
</var>
<var ID="V143" name="Viralload" files="F8" intrvl="contin">
  <varFormat type="numeric"/>
  <location width="10"/>
  <labl>Viral load value</labl>
  <sumStat type="vald">15189</sumStat>
  <sumStat type="invd">4008</sumStat>
  <sumStat type="min"/>
  <sumStat type="max">11000000</sumStat>
  <catgry>
    <catValu>100000001</catValu>
    <labl>..</labl>
    <catStat type="vald"/>
  </catgry>
</var>
</dataDscr></codeBook>
