require(tidyverse)
require(reporttools)
require(ggpubr)
require(ggh4x)
require(ggbeeswarm)
require(gridGraphics)
library(scales)

set.seed(7)

dataset<-read.csv("dataset.csv")
dataset<-dataset[dataset$TB==1,]
dataset<-dataset[!(dataset$before_threshold_change==1 & dataset$survey=="Vukuzazi"),]

CAD_data<-as.data.frame(matrix(NA,ncol=8,nrow=16))
colnames(CAD_data)<-c("type","HIV_ART","population","Symptoms","Setting","mean","lb","ub")
CAD_data$type<-"CAD score"
CAD_data$HIV_ART<-rep(c("HIV-","HIV+ART-","HIV+ART+", "All"),4)
CAD_data$population<-rep(c("Clinic, aTB","Clinic, sTB","Community aTB","Community sTB"),each=4)
CAD_data$Symptoms<-rep(rep(c("Asymptomatic","Symptomatic"),each=4),2)
CAD_data$Setting<-rep(c("Clinic","Community"),each=8)

CAD_data_exclude<-CAD_data
CAD_data_exclude$type<-"CAD score (xpert undetected included)"

trace_data<-CAD_data
trace_data$type<-"Xpert above trace"

trace_adj_data<-CAD_data
trace_adj_data$type<-"Xpert above trace\n(undetected excluded)"


HIV_list<-rep(c("HIV-","HIV+ART-","HIV+ART+","All"),4)
pop_list<-rep(c("AHCoS aTB","AHCoS sTB","Vuk aTB","Vuk sTB"),each=4)
for (r in seq(1,16)) {
  if (HIV_list[r]=="All") {
    data<-dataset$cad_v5[dataset$survey_symp==pop_list[r] & is.na(dataset$cad_v5)==FALSE]
  } else {
    data<-dataset$cad_v5[dataset$hivart==HIV_list[r] & dataset$survey_symp==pop_list[r] & is.na(dataset$cad_v5)==FALSE]
  }
  CAD_data$mean[r]<-median(data)
  CAD_data$lb[r]<-quantile(data,probs=c(0.25))
  CAD_data$ub[r]<-quantile(data,probs=c(0.75))
}

HIV_list<-rep(c("HIV-","HIV+ART-","HIV+ART+","All"),4)
pop_list<-rep(c("AHCoS aTB","AHCoS sTB","Vuk aTB","Vuk sTB"),each=4)
for (r in seq(1,16)) {
  if (HIV_list[r]=="All") {
    data<-dataset$cad_v5[dataset$survey_symp==pop_list[r] & is.na(dataset$cad_v5)==FALSE & dataset$xpert_pos==1]
  } else {
    data<-dataset$cad_v5[dataset$hivart==HIV_list[r] & dataset$survey_symp==pop_list[r] & is.na(dataset$cad_v5)==FALSE & dataset$xpert_pos==1]
  }
  CAD_data_exclude$mean[r]<-median(data)
  CAD_data_exclude$lb[r]<-quantile(data,probs=c(0.25))
  CAD_data_exclude$ub[r]<-quantile(data,probs=c(0.75))
}



######trace

for (r in seq(1,16)) {
  if (HIV_list[r]=="All") {
    data<-dataset$above_trace[dataset$survey_symp==pop_list[r] & is.na(dataset$above_trace)==FALSE]
  } else {
    data<-dataset$above_trace[dataset$hivart==HIV_list[r] & dataset$survey_symp==pop_list[r] & is.na(dataset$above_trace)==FALSE]
  }
  l<-binom.test(sum(data),length(data))
  trace_data$mean[r]<-l$estimate
  trace_data$lb[r]<-l$conf.int[1]
  trace_data$ub[r]<-l$conf.int[2]
}

######trace exclude xpert neg

for (r in seq(1,16)) {
  if (HIV_list[r]=="All") {
    data<-dataset$above_trace[dataset$survey_symp==pop_list[r] & dataset$xpert_pos==1 & is.na(dataset$above_trace)==FALSE]
  } else {
    data<-dataset$above_trace[dataset$hivart==HIV_list[r] & dataset$survey_symp==pop_list[r] & dataset$xpert_pos==1 & is.na(dataset$above_trace)==FALSE]
  }
  l<-binom.test(sum(data),length(data))
  trace_adj_data$mean[r]<-l$estimate
  trace_adj_data$lb[r]<-l$conf.int[1]
  trace_adj_data$ub[r]<-l$conf.int[2]
}


#############p-value data

pvals<-as.data.frame(matrix(NA,ncol=6,nrow=(4*4*4)))
colnames(pvals)<-c("type","HIV_ART","y.position","group1","group2","p")
pvals$type<-rep(c("CAD score","CAD score (xpert undetected included)","Xpert above trace", "Xpert above trace\n(undetected excluded)"),each=16)
pvals$HIV_ART<-rep(rep(c("HIV-","HIV+ART-","HIV+ART+","All"),each=4),4)
pvals$group1<-rep(c(0.8,1.8,0.8,1.2),16)
pvals$group2<-rep(c(1.2,2.2,1.8,2.2),16)
pvals$y.position<-c(rep(c(90,90,104,118),4),rep(c(90,90,104,118),4),rep(c(1.05,1.05,1.22,1.39),4),rep(c(1.05,1.05,1.22,1.39),4))

dataset$above_trace_detect<-dataset$above_trace
dataset$above_trace_detect[dataset$xpert_pos==0]<-NA

dataset$cad_v5_detect<-dataset$cad_v5
dataset$cad_v5_detect[dataset$xpert_pos==0]<-NA

r<-1
for (type in c("cad_v5","cad_v5_detect","above_trace","above_trace_detect")) {
  for (hiv in c("HIV-","HIV+ART-","HIV+ART+","All")) {
    if (hiv=="All") {
      data<-dataset
    } else {
      data<-dataset[dataset$hivart==hiv,]
    }
    data<-data[data$survey=="AHCoS",]
    col<-which(colnames(data)==type)
    if (type == "above_trace"|type=="above_trace_detect") {
      model<-glm(data[,col]~data$tbsymp, family = "binomial")
      if (length(summary(model)$coefficients[,1]) > 1) {
        pvals$p[r]<-format.pval(summary(model)$coefficients[2,4],digits=2)
      }
    } else {
      model<-wilcox.test(data[,col]~data$tbsymp)
      pvals$p[r]<-format.pval(model$p.value,digits=2)
      #model<-lm(data[,col]~data$tbsymp)
    }
    
    r<-r+1
    
    if (hiv=="All") {
      data<-dataset
    } else {
      data<-dataset[dataset$hivart==hiv,]
    }
    data<-data[data$survey=="Vukuzazi",]
    col<-which(colnames(data)==type)
    if (type == "above_trace"|type=="above_trace_detect") {
      model<-glm(data[,col]~data$tbsymp, family = "binomial")
      if (length(summary(model)$coefficients[,1]) > 1) {
        pvals$p[r]<-format.pval(summary(model)$coefficients[2,4],digits=2)
      }
    } else {
      model<-wilcox.test(data[,col]~data$tbsymp)
      pvals$p[r]<-format.pval(model$p.value,digits=2)
      #model<-lm(data[,col]~data$tbsymp)
    }
    r<-r+1
    
    if (hiv=="All") {
      data<-dataset
    } else {
      data<-dataset[dataset$hivart==hiv,]
    }
    data<-data[data$tbsymp==0,]
    col<-which(colnames(data)==type)
    if (type == "above_trace"|type=="above_trace_detect") {
      model<-glm(data[,col]~data$survey, family = "binomial")
      if (length(summary(model)$coefficients[,1]) > 1) {
        pvals$p[r]<-format.pval(summary(model)$coefficients[2,4],digits=2)
      }
    } else {
      model<-wilcox.test(data[,col]~data$survey)
      pvals$p[r]<-format.pval(model$p.value,digits=2)
      #model<-lm(data[,col]~data$survey)
    }
    
    r<-r+1
    
    if (hiv=="All") {
      data<-dataset
    } else {
      data<-dataset[dataset$hivart==hiv,]
    }
    data<-data[data$tbsymp==1,]
    col<-which(colnames(data)==type)
    if (type == "above_trace"|type=="above_trace_detect") {
      model<-glm(data[,col]~data$survey, family = "binomial")
      if (length(summary(model)$coefficients[,1]) > 1) {
        pvals$p[r]<-format.pval(summary(model)$coefficients[2,4],digits=2)}
    } else {
      model<-wilcox.test(data[,col]~data$survey)
      pvals$p[r]<-format.pval(model$p.value,digits=2)
      #model<-lm(data[,col]~data$survey)
    }
    r<-r+1
  }
}
pvals_all<-pvals


##########supporting info figures

####xpert trace

scales2 <- list(
  #scale_y_continuous(limits = c(0, 130),breaks=c(0,50,100), minor_breaks=c(0, 25, 50, 75, 100),name="Mean CAD score/proportion Xpert above trace"),
  #scale_y_continuous(limits = c(0, 130),breaks=c(0,50,100), minor_breaks=c(0, 25, 50, 75, 100),name="Mean CAD score/proportion Xpert above trace"),
  scale_y_continuous(limits = c(0, 1.5),breaks=c(0,0.5,1), minor_breaks=c(0, 0.25, 0.5, 0.75, 1),name="Proportion Xpert above trace"),
  scale_y_continuous(limits = c(0, 1.5),breaks=c(0,0.5,1), minor_breaks=c(0, 0.25, 0.5, 0.75, 1),name="Proportion Xpert above trace")
)

plot_data_final2<-rbind(trace_data,trace_adj_data)
#plot_data_final<-plot_data_final[plot_data_final$HIV_ART!="HIV+ART-",]
#pvals2<-pvals[pvals$HIV_ART!="HIV+ART-",]
pvals2a<-pvals[pvals$type!="Cavitation",]
pvals2a<-pvals2a[pvals2a$type!="Culture",]
pvals2a<-pvals2a[pvals2a$type!="Xpert",]
pvals2a<-pvals2a[pvals2a$type!="Xpert (adjusted)",]
pvals2a<-pvals2a[pvals2a$type!="CAD score",]
pvals2a<-pvals2a[pvals2a$type!="CAD score (xpert undetected included)",]

plot_data_final2$type<-factor(plot_data_final2$type,
                    levels = c("Xpert above trace\n(undetected excluded)",
                               "Xpert above trace"),
                    labels = c("Xpert above trace",
                               "Xpert above trace\n(Xpert undetected included)"))

pvals2a$type<-factor(pvals2a$type,
                              levels = c("Xpert above trace\n(undetected excluded)",
                                         "Xpert above trace"),
                              labels = c("Xpert above trace",
                                         "Xpert above trace\n(Xpert undetected included)"))

plot_all<-ggplot(data=plot_data_final2) +
  theme_bw() +
  geom_col(aes(x=Setting,fill=Symptoms,y=mean),position="dodge") +
  geom_errorbar(aes(x=Setting,group=Symptoms, ymin = lb, ymax = ub),position="dodge") +
  stat_pvalue_manual(pvals2a, label = "p", y.position="y.position", xmin="group1", xmax="group2",
                     inherit.aes = F, size=5, vjust=-0.3) +
  scale_y_continuous(name="Proportion Xpert above trace/mean CAD score",
                     expand = expansion(mult = c(0.05, 0.2))) +
  theme(axis.title.x = element_blank()) +
  theme(axis.title.y = element_text(size=16)) +
  theme(legend.title = element_text(size=16)) +
  theme(legend.text = element_text(size=16)) +
  theme(axis.text.x  = element_text(size=16)) +
  theme(axis.text.y  = element_text(size=16)) +
  theme(plot.title = element_text(size=16, face="bold")) +
  facet_grid(type~HIV_ART,scales="free_y",
             labeller = labeller(HIV_ART = 
                                   c("HIV-" = "HIV negative",
                                     "HIV+ART-" = "HIV positive, not on ART",
                                     "HIV+ART+" = "HIV positive, on ART",
                                     "All" = "All"))) +
  facetted_pos_scales(y = scales2) +
  theme(strip.text = element_text(size = 16))
#windows()
#plot_all

####CAD

dataset_CADplot<-select(dataset, c('tbsymp','survey','cad_v5','hivart','xpert_pos'))
dataset_CADplot$group<-"Community aTB"
dataset_CADplot$group[dataset_CADplot$tbsymp==1]<-"Community sTB"
dataset_CADplot$group[dataset_CADplot$tbsymp==0 & dataset_CADplot$survey=="AHCoS"]<-"Clinic aTB"
dataset_CADplot$group[dataset_CADplot$tbsymp==1 & dataset_CADplot$survey=="AHCoS"]<-"Clinic sTB"
dataset_CADplot<-dataset_CADplot[!is.na(dataset_CADplot$cad_v5),]
#dataset_CADplot<-dataset_CADplot[!dataset_CADplot$group=="Clinic aTB",]
dataset_CADplot<-dataset_CADplot[!dataset_CADplot$hivart=="",]
dataset_CADplot2<-dataset_CADplot
dataset_CADplot2$hivart<-"All"
dataset_CADplot<-rbind(dataset_CADplot,dataset_CADplot2)
dataset_CADplot$type<-"CAD score (xpert undetected included)"
dataset_CADplot2<-dataset_CADplot
dataset_CADplot2<-dataset_CADplot2[!dataset_CADplot2$xpert_pos==0,]
dataset_CADplot2$type<-"CAD score"
dataset_CADplot<-rbind(dataset_CADplot,dataset_CADplot2)

dataset_CADplot2<-dataset_CADplot[!dataset_CADplot$hivart=="HIV+ART-",]
cols <- c("group","hivart","type")
stats <- dataset_CADplot2 %>% 
  group_by(across(all_of(cols))) %>%
  summarise('mean' = median(cad_v5),
            'lower_ci' = quantile(cad_v5,probs=c(0.25),na.rm=TRUE),
            'upper_ci' = quantile(cad_v5,probs=c(0.75),na.rm=TRUE)
  )

dataset_CADplot2<-dataset_CADplot[dataset_CADplot$hivart=="HIV+ART-",]
dataset_CADplot2<-dataset_CADplot2[!(dataset_CADplot2$group=="Clinic aTB"),]
dataset_CADplot2<-dataset_CADplot2[!(dataset_CADplot2$group=="Community sTB"),]
cols <- c("group","hivart","type")
stats2 <- dataset_CADplot2 %>% 
  group_by(across(all_of(cols))) %>%
  summarise('mean' = median(cad_v5),
            'lower_ci' = quantile(cad_v5,probs=c(0.25),na.rm=TRUE),
            'upper_ci' = quantile(cad_v5,probs=c(0.75),na.rm=TRUE)
  )

stats<-rbind(stats,stats2)

dataset_CADplot$group<-factor(dataset_CADplot$group,
                              levels = c("Clinic aTB", "Clinic sTB","Community aTB", "Community sTB"),
                              labels = c("Clinic-detected\nasymptomatic TB",
                                         "Clinic-detected\nsymptomatic TB",
                                         "Community-detected\nasymptomatic TB",
                                         "Community-detected\nsymptomatic TB"))

stats$group<-factor(stats$group,
                    levels = c("Clinic aTB", "Clinic sTB","Community aTB", "Community sTB"),
                    labels = c("Clinic-detected\nasymptomatic TB",
                               "Clinic-detected\nsymptomatic TB",
                               "Community-detected\nasymptomatic TB",
                               "Community-detected\nsymptomatic TB"))

stats$survey<-c(rep("Clinic",12),rep("Community",12),rep("Clinic",2),rep("Community",2))
dataset_CADplot$survey[dataset_CADplot$survey=="AHCoS"]<-"Clinic"
dataset_CADplot$survey[dataset_CADplot$survey=="Vukuzazi"]<-"Community"

pvals_CADplot<-pvals
colnames(pvals_CADplot)[2]<-"hivart"
pvals_CADplot<-pvals_CADplot[pvals_CADplot$type=="CAD score (xpert undetected included)"|pvals_CADplot$type=="CAD score",]
pvals_CADplot$group1<-rep(c(1,3,1,2),8)
pvals_CADplot$group2<-rep(c(2,4,3,4),8)
pvals_CADplot$y.position<-rep(c(105,105,115,128),8)

scales <- list(
  scale_y_continuous(limits = c(0, 135),breaks=c(0,50,100), minor_breaks=c(0, 25, 50, 75, 100),name="Mean CAD score/proportion Xpert above trace"),
  scale_y_continuous(limits = c(0, 135),breaks=c(0,50,100), minor_breaks=c(0, 25, 50, 75, 100),name="Mean CAD score/proportion Xpert above trace"),
  scale_y_continuous(limits = c(0, 135),breaks=c(0,50,100), minor_breaks=c(0, 25, 50, 75, 100),name="Mean CAD score/proportion Xpert above trace")
  #scale_y_continuous(limits = c(0, 1.5),breaks=c(0,0.5,1), minor_breaks=c(0, 0.25, 0.5, 0.75, 1),name="Proportion Xpert above trace"),
  #scale_y_continuous(limits = c(0, 1.5),breaks=c(0,0.5,1), minor_breaks=c(0, 0.25, 0.5, 0.75, 1),name="Proportion Xpert above trace")
)

plot_CAD<-ggplot() +
  theme_bw() +
  geom_violin(data=dataset_CADplot, aes(x=group,fill=as.factor(group),colour=as.factor(group),y=cad_v5),alpha=0.2,drop=FALSE) +
  geom_beeswarm(data=dataset_CADplot, aes(x=group,colour=as.factor(group),y=cad_v5),cex = 1, size=3, alpha=0.8) +
  geom_errorbar(data=stats,aes(x=group, ymin = lower_ci, ymax = upper_ci),position="dodge",width=.2, size=1) +
  geom_errorbar(data=stats,aes(x=group, ymin = mean, ymax = mean),position="dodge",width=0.8, size=1.5) +
  stat_pvalue_manual(pvals_CADplot, label = "p", y.position="y.position", xmin="group1", xmax="group2",
                     inherit.aes = F, size=4.5, vjust=-0.3) +
  #theme(legend.position = "none") +
  theme(axis.title.x = element_blank()) +
  theme(axis.title.y = element_blank()) +
  scale_y_continuous(name="CAD score") +
  theme(axis.text.x  = element_blank()) +
  theme(axis.text.y  = element_text(size=16)) +
  theme(legend.title = element_text(size=16)) +
  theme(legend.text = element_text(size=14)) +
  theme(plot.title = element_text(size=18, face="bold")) +
  labs(title = "CAD score (CAD4TB v5)") +
  scale_colour_discrete(name="Setting and symptoms") +
  scale_fill_discrete(guide=FALSE) +
  facet_grid(type~hivart,scales="free_y",
             labeller = labeller(hivart = 
                                   c("HIV-" = "HIV negative",
                                     "HIV+ART-" = "HIV positive, not on ART",
                                     "HIV+ART+" = "HIV positive, on ART",
                                     "All" = "All"))) +
  facetted_pos_scales(y = scales) +
  theme(strip.text = element_text(size = 16))
windows()
plot_CAD



#######main paper figure

plot_data_final<-plot_data_final2[!plot_data_final2$HIV_ART=="HIV+ART-",]
plot_data_final<-plot_data_final[!plot_data_final$type=="Xpert above trace\n(Xpert undetected included)",]
plot_data_final<-plot_data_final[!plot_data_final$type=="CAD score",]

pvals<-pvals2a[!pvals2a$HIV_ART=="HIV+ART-",]
pvals<-pvals[!pvals$type=="Xpert above trace\n(Xpert undetected included)",]
pvals<-pvals[!pvals$type=="CAD score",]

pvals$y.position[pvals$type=="Xpert above trace"]<-(pvals$y.position[pvals$type=="Xpert above trace"] - 0.1)


scales <- list(
  #scale_y_continuous(limits = c(0, 125),breaks=c(0,50,100), minor_breaks=c(0, 25, 50, 75, 100), name="Mean CAD score/proportion Xpert above trace"),
  scale_y_continuous(limits = c(0, 1.37),breaks=c(0,0.5,1), minor_breaks=c(0, 0.25, 0.5, 0.75, 1), name="Proportion Xpert above trace")
)


plot_main<-ggplot(data=plot_data_final) +
  theme_bw() +
  geom_col(aes(x=Setting,fill=Symptoms,y=mean),position="dodge") +
  geom_errorbar(aes(x=Setting,group=Symptoms, ymin = lb, ymax = ub),position="dodge") +
  stat_pvalue_manual(pvals, label = "p", y.position="y.position", xmin="group1", xmax="group2",
                     inherit.aes = F, size=5, vjust=-0.3) +
  scale_fill_discrete(name="Reported symptoms") +
  #scale_y_continuous(name="Proportion Xpert above trace",
  #                   expand = expansion(mult = c(0.05, 0.2))) +
  labs(title = "b) Proportion with Xpert greater than trace") +
  theme(axis.title.y = element_text(size=16)) +
  theme(axis.title.x = element_text(size=16)) +
  theme(axis.text.y = element_text(size=16)) +
  theme(axis.text.x = element_text(size=16)) +
  theme(plot.title = element_text(size=16, face="bold")) +
  theme(legend.position = "none") +
  scale_y_continuous(labels=percent) +
  facet_grid(cols=vars(HIV_ART),scales="free_y",
             labeller = labeller(HIV_ART = 
                                   c("HIV-" = "HIV negative",
                                     "HIV+ART-" = "HIV positive, not on ART",
                                     "HIV+ART+" = "HIV positive, on ART",
                                     "All" = "All"))) +
  facetted_pos_scales(y = scales) +
  theme(strip.text = element_text(size = 16))
#windows()
#plot_main

dataset_CADplot<-select(dataset, c('tbsymp','survey','cad_v5','hivart','xpert_pos'))
dataset_CADplot$group<-"Community aTB"
dataset_CADplot$group[dataset_CADplot$tbsymp==1]<-"Community sTB"
dataset_CADplot$group[dataset_CADplot$tbsymp==0 & dataset_CADplot$survey=="AHCoS"]<-"Clinic aTB"
dataset_CADplot$group[dataset_CADplot$tbsymp==1 & dataset_CADplot$survey=="AHCoS"]<-"Clinic sTB"
dataset_CADplot<-dataset_CADplot[!is.na(dataset_CADplot$cad_v5),]
dataset_CADplot<-dataset_CADplot[!dataset_CADplot$xpert_pos==0,]
#dataset_CADplot<-dataset_CADplot[!dataset_CADplot$group=="Clinic aTB",]
dataset_CADplot<-dataset_CADplot[!dataset_CADplot$hivart=="",]
dataset_CADplot2<-dataset_CADplot
dataset_CADplot2$hivart<-"All"
dataset_CADplot<-rbind(dataset_CADplot,dataset_CADplot2)
dataset_CADplot<-dataset_CADplot[!dataset_CADplot$hivart=="HIV+ART-",]

cols <- c("group","hivart")
stats <- dataset_CADplot %>% 
  group_by(across(all_of(cols))) %>%
  summarise('mean' = median(cad_v5),
            'lower_ci' = quantile(cad_v5,probs=c(0.25),na.rm=TRUE),
            'upper_ci' = quantile(cad_v5,probs=c(0.75),na.rm=TRUE)
  )

dataset_CADplot$group<-factor(dataset_CADplot$group,
                              levels = c("Clinic aTB", "Clinic sTB","Community aTB", "Community sTB"),
                              labels = c("Clinic-detected\nasymptomatic TB",
                                         "Clinic-detected\nsymptomatic TB",
                                         "Community-detected\nasymptomatic TB",
                                         "Community-detected\nsymptomatic TB"))

stats$group<-factor(stats$group,
                    levels = c("Clinic aTB", "Clinic sTB","Community aTB", "Community sTB"),
                    labels = c("Clinic-detected\nasymptomatic TB",
                               "Clinic-detected\nsymptomatic TB",
                               "Community-detected\nasymptomatic TB",
                               "Community-detected\nsymptomatic TB"))

pvals_CADplot<-pvals_all
colnames(pvals_CADplot)[2]<-"hivart"
pvals_CADplot<-pvals_CADplot[!pvals_CADplot$hivart=="HIV+ART-",]
pvals_CADplot<-pvals_CADplot[pvals_CADplot$type=="CAD score (xpert undetected included)",]
pvals_CADplot$group1<-rep(c(1,3,1,2),3)
pvals_CADplot$group2<-rep(c(2,4,3,4),3)
pvals_CADplot$y.position<-rep(c(105,105,119,133),3)

scales_CAD <- list(
  scale_y_continuous(limits = c(0, 140),breaks=c(0,50,100), minor_breaks=c(0, 25, 50, 75, 100), name="Mean CAD score")
)


plot_CAD<-ggplot(data=dataset_CADplot, aes(x=group)) +
  theme_bw() +
  geom_violin(data=dataset_CADplot, aes(x=group,fill=as.factor(tbsymp),colour=as.factor(tbsymp),y=cad_v5),alpha=0.2) +
  geom_beeswarm(data=dataset_CADplot, aes(x=group,colour=as.factor(tbsymp),y=cad_v5),cex = 1, size=4, alpha=0.8) +
  geom_errorbar(data=stats,aes(x=group, ymin = lower_ci, ymax = upper_ci),position="dodge",width=.2, size=1) +
  geom_errorbar(data=stats,aes(x=group, ymin = mean, ymax = mean),position="dodge",width=0.8, size=1.5) +
  stat_pvalue_manual(pvals_CADplot, label = "p", y.position="y.position", xmin="group1", xmax="group2",
                     inherit.aes = F, size=5, vjust=-0.3) +
  #theme(legend.position = "none") +
  theme(axis.title.x = element_blank()) +
  theme(axis.title.y = element_text(size=16)) +
  theme(legend.title = element_text(size=16)) +
  theme(legend.text = element_text(size = 16)) +
  scale_y_continuous(name="CAD score") +
  theme(axis.text.x  = element_blank()) +
  theme(axis.text.y  = element_text(size=16)) +
  labs(title = "a) CAD score (CAD4TBv5)") +
  theme(plot.title = element_text(size=16, face="bold")) +
  scale_colour_discrete(name="Reported symptoms",
                        breaks=c("0", "1"),
                        labels=c("Asymptomatic", "Symptomatic")) +
  scale_fill_discrete(guide=FALSE) +
  facet_grid(cols=vars(hivart),scales="free_y",
             labeller = labeller(hivart = 
                                   c("HIV-" = "HIV negative",
                                     "HIV+ART-" = "HIV positive, not on ART",
                                     "HIV+ART+" = "HIV positive, on ART",
                                     "All" = "All"))) +
  facetted_pos_scales(y = scales_CAD) +
  theme(strip.text = element_text(size = 16))
#windows()
#plot_CAD

 pdf(paste0("Figure 1.pdf"), width=11,height=10,onefile=FALSE)
 ggarrange(
   ggarrange(plot_CAD,ncol=1,widths=c(1)),
   ggarrange(NULL,ncol=1,widths=c(1)),
   ggarrange(plot_main,NULL,ncol=2,widths=c(1,0.26)),
   nrow=3,heights=c(1,0.12,1))
 dev.off()


