<?xml version="1.0" encoding="UTF-8"?>
<codeBook version="1.2.2" ID="AHRI.Vukuzazi.PANGEASequence.2023.v1" xml-lang="en" xmlns="http://www.icpsr.umich.edu/DDI" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.icpsr.umich.edu/DDI http://www.icpsr.umich.edu/DDI/Version1-2-2.xsd">
<docDscr>
  <citation>
    <titlStmt>
      <IDNo>DDI.AHRI.Vukuzazi.PANGEASequence.2023.v1</IDNo>
    </titlStmt>
    <prodStmt>
      <producer abbr="AHRI" affiliation="" role="">Africa Health Research Institute</producer>
      <prodDate date="">
        <_value></_value>
      </prodDate>
      <software version="v5">NADA</software>
    </prodStmt>
    <verStmt>
      <version></version>
    </verStmt>
  </citation>
</docDscr>
<stdyDscr>
  <citation>
    <titlStmt>
      <titl>Vukuzazi HIV polymerase sequences:2018-2020.</titl>
      <subTitl/>
      <altTitl/>
      <parTitl/>
      <IDNo>AHRI.Vukuzazi.PANGEASequence.2023.v1</IDNo>
    </titlStmt>
    <rspStmt>
      <AuthEnty affiliation="Africa Health Research Institute">Prof. Willem Hanekom</AuthEnty>
      <AuthEnty affiliation="Africa Health Research Institute">Prof. Thumbi Ndung’u</AuthEnty>
      <AuthEnty affiliation="Africa Health Research Institute">Dr. Kobus Herbst</AuthEnty>
      <othId role="Data Documentation" affiliation="AHRI" email="">
        <p>Sweetness Dube</p>
      </othId>
      <othId role="Head of Research Data Management" affiliation="AHRI" email="">
        <p>Dickman Gareta</p>
      </othId>
      <othId role="Clinical Research Coordinator" affiliation="AHRI" email="">
        <p>Dr. Anne Derache</p>
      </othId>
      <othId role="Vukuzazi Programme Manager" affiliation="AHRI" email="">
        <p>Dr. Resign Gunda</p>
      </othId>
      <othId role="Sequencing platform" affiliation="Big Data Institute Oxford" email="">
        <p>PANGEA consortium</p>
      </othId>
    </rspStmt>
    <prodStmt>
      <producer abbr="" affiliation="" role="">Africa Health Research Institute</producer>
      <copyright/>
      <software version="5.0" date="2023-07-18">NADA</software>
      <fundAg abbr="WT" role="Core funding">Wellcome Trust</fundAg>
      <grantNo>097410/Z/11/Z</grantNo>
    </prodStmt>
    <distStmt>
      <depDate date=""/>
      <distDate date=""/>
    </distStmt>
    <serStmt>
      <serName/>
      <serInfo><![CDATA[]]></serInfo>
    </serStmt>
    <verStmt>
      <version date="">v1.0.0</version>
      <verResp/>
      <notes><![CDATA[]]></notes>
    </verStmt>
    <biblCit format=""><![CDATA[]]></biblCit>
    <notes><![CDATA[]]></notes>
  </citation>
  <stdyInfo>
    <studyBudget><![CDATA[]]></studyBudget>
    <subject>
      <keyword vocab="Africa Health Research Institute" vocabURI="www.ahri.org">HIV polymerase sequence; phylodynamics; phylogeny; HIV-1</keyword>
      <topcClas vocab="Africa Health Research Institute" vocabURI="www.ahri.org">HIV-1;phylogeny;population surveillance; Epidemics; HIV infections; Africa</topcClas>
    </subject>
    <abstract><![CDATA[Vukuzazi (&quot;Wake up and know yourself &quot; in isiZulu) was established in 2018 to offer a community-based health phenotyping and comprehensive bio-sampling to all resident adult (=15 years) members of the Africa Health Research Institute demographic health surveillance population, while building upon the existing wealth of demographic and HIV information collected over the previous 20 years.  The objective of the platform was to determine the prevalence and overlap of infectious diseases and NCDs in the population 20 years into the HIV epidemic.  Additionally, the platform aimed to create a data, image and biorepository that could be used to understand the host, pathogen, social and environmental determinants of specific states of health and disease in the population.

The purpose of the genomic HIV data was to understand HIV transmission patterns among HIV infected individuals in rural KwaZulu Natal. To address this question, consensus polymerase sequences were derived from HIV positive samples with viral load greater than 200 copies/ml.]]></abstract>
    <sumDscr>
      <collDate date="2018-01-01" event="start" cycle=""/>
      <collDate date="2020-03-30" event="end" cycle=""/>
      <nation abbr="ZA">South Africa</nation>
      <geogCover>uMkhanyakude district in northern KwaZulu-Natal</geogCover>
      <geogUnit/>
      <anlyUnit><![CDATA[Each sequence derived from a single specimen]]></anlyUnit>
      <universe><![CDATA[HIV positive individuals enrolled in Vukuzazi study with a VL &gt;200 copies/ml]]></universe>
      <dataKind>HIV Genomic Data, Viral Load</dataKind>
    </sumDscr>
    <qualityStatement>
      <standardsCompliance>
        <standard>
          <standardName/>
          <producer/>
        </standard>
        <complianceDescription/>
      </standardsCompliance>
      <otherQualityStatement/>
    </qualityStatement>
    <notes><![CDATA[]]></notes>
    <exPostEvaluation completionDate="" type="">
      <evaluationProcess/>
      <outcomes/>
    </exPostEvaluation>
  </stdyInfo>
  <method>
    <dataColl>
      <timeMeth/>
      <frequenc/>
      <sampProc><![CDATA[HIV positive individuals enrolled in Vukuzazi study between 2018 and 2020. Individuals blood were attempted for sequencing if they:
1. Had a viral load done.

Viral load was above 200 copies/ml]]></sampProc>
      <sampleFrame>
        <sampleFrameName/>
        <custodian/>
        <universe/>
        <frameUnit isPrimary="">
          <unitType numberOfUnits=""/>
        </frameUnit>
        <updateProcedure/>
      </sampleFrame>
      <deviat/>
      <resInstru><![CDATA[]]></resInstru>
      <instrumentDevelopment type=""/>
      <collSitu><![CDATA[]]></collSitu>
      <actMin><![CDATA[]]></actMin>
      <ConOps><![CDATA[]]></ConOps>
      <weight><![CDATA[]]></weight>
      <cleanOps><![CDATA[Consensus sequences were derived from full-length HIV Next Generation Sequencing data generated by the PANGEA-HIV 2 (phylogenetics and Networks for Generalised Epidemics in Africa). Consensus sequences are presented in the standard fasta file format.]]></cleanOps>
    </dataColl>
    <notes><![CDATA[]]></notes>
    <anlyInfo>
      <respRate><![CDATA[]]></respRate>
      <EstSmpErr><![CDATA[]]></EstSmpErr>
      <dataAppr><![CDATA[]]></dataAppr>
    </anlyInfo>
    <stdyClas><![CDATA[]]></stdyClas>
    <dataProcessing type=""/>
    <codingInstructions relatedProcesses="" type="">
      <txt/>
      <command formalLanguage=""/>
    </codingInstructions>
  </method>
  <dataAccs>
    <setAvail>
      <accsPlac URI=""/>
      <origArch/>
      <avlStatus/>
      <collSize/>
      <complete/>
      <fileQnty/>
      <notes><![CDATA[]]></notes>
    </setAvail>
    <useStmt>
      <restrctn/>
      <citReq><![CDATA[Hanekom, W., Ndung&#039;u, T., &amp; Herbst, K. (2023). Vukuzazi HIV polymerase sequences:2018-2020. [Data set]. Africa Health Research Institute.
DOI:https://doi.org/10.23664/AHRI.Vukuzazi.PANGEASequence.2023]]></citReq>
      <deposReq><![CDATA[]]></deposReq>
      <conditions><![CDATA[Access to the data requires accurate completion of the online data access application form accessible on the AHRI Data repository(&lt;https://data.ahri.org/&gt;). Data users are required to abide by the data use conditions stipulated on the application for access to the data. Failure to do so may result in their data access privileges being revoked by the Data Custodian. In order to recognise the effort and intellectual contributions of AHRI investigators in producing and curating the data, users of AHRI data must acknowledge the source of the data and abide by the terms and conditions under which the data is accessed and must cite the dataset in publication using the citation provided as part of this documentation. All analytical datasets published on the AHRI Data Repository are assigned digital object identifier (DOIs) and the DOIs can be found on the Data Repository under Study Description tab - Access policy. AHRI data users are required to always cite the dataset using the relevant DOI.]]></conditions>
      <disclaimer><![CDATA[]]></disclaimer>
    </useStmt>
    <notes><![CDATA[]]></notes>
  </dataAccs>
  <notes><![CDATA[]]></notes>
</stdyDscr>
<fileDscr ID="F7">
  <fileTxt>
    <fileName>AHRI.Vukuzazi.PANGEASequence.2023.v1</fileName>
    <fileCont></fileCont>
    <dimensns>
      <caseQnty>1112</caseQnty>
      <varQnty>7</varQnty>
    </dimensns>
    <dataChck></dataChck>
    <dataMsng></dataMsng>
    <verStmt>
      <version></version>
    </verStmt>
  </fileTxt>
  <notes></notes>
</fileDscr>
<dataDscr>
<var ID="V299" name="IndividualId" files="F7" intrvl="discrete">
  <varFormat type="character"/>
  <location width="12"/>
  <labl>Vukuzazi Individual ID</labl>
  <sumStat type="vald">1112</sumStat>
  <sumStat type="invd"/>
</var>
<var ID="V300" name="SpecimenId" files="F7" intrvl="discrete">
  <varFormat type="character"/>
  <location width="14"/>
  <labl>Specimen ID</labl>
  <sumStat type="vald">1112</sumStat>
  <sumStat type="invd"/>
</var>
<var ID="V301" name="AliquotId" files="F7" intrvl="discrete">
  <varFormat type="character"/>
  <location width="13"/>
  <labl>Aliquot ID</labl>
  <sumStat type="vald">1112</sumStat>
  <sumStat type="invd"/>
</var>
<var ID="V302" name="MappedNum" files="F7" intrvl="discrete">
  <varFormat type="character"/>
  <location width="9"/>
  <labl>Mapped Number</labl>
  <sumStat type="vald">1108</sumStat>
  <sumStat type="invd"/>
</var>
<var ID="V303" name="LengthRelaxed" files="F7" intrvl="discrete">
  <varFormat type="character"/>
  <location width="13"/>
  <labl>Length Relaxed</labl>
  <sumStat type="vald">1098</sumStat>
  <sumStat type="invd"/>
</var>
<var ID="V304" name="ShiverConsensus" files="F7" intrvl="discrete">
  <varFormat type="character"/>
  <location width="255"/>
  <labl>Shiver Consensus</labl>
  <sumStat type="vald">1098</sumStat>
</var>
<var ID="V305" name="PANGEAId" files="F7" intrvl="discrete">
  <varFormat type="character"/>
  <location width="13"/>
  <labl>PANGEA ID</labl>
  <sumStat type="vald">1112</sumStat>
  <sumStat type="invd"/>
</var>
</dataDscr></codeBook>
