#######################################################################- # useful packages ---- #######################################################################- library(dplyr) #######################################################################- # read in raw data ---- #######################################################################- # 2x2 table data rawdat = textConnection( "case,exposed,n 1,1,58 1,0,117 0,1,27 0,0,148 ", open = "r") study_2x2 = read.csv(rawdat) # smoking stratified data, inferred from the text rawdat_smk = textConnection( "case,exposed,smoker,n 1,1,1,44 1,0,1,50 0,1,1,20 0,0,1,11 1,1,0,14 1,0,0,67 0,1,0,7 0,0,0,137 ", open = "r") study_2x2x2 = read.csv(rawdat_smk) close.connection(rawdat) close.connection(rawdat_smk) #######################################################################- # apparent analysis (here, crude for the pseudo-data approach, adjusted for the bias factor approach) ---- #######################################################################- # apparent odds/risk ratios (rr_apparent <- exp(glm(case~exposed, weight=n, data = study_2x2, family = binomial(link="logit"))$coefficients[2])) (rr_smkadjusted <- exp(glm(case~exposed+smoker, weight=n, data = study_2x2x2, family = binomial(link="logit"))$coefficients[2])) conf_bias = rr_apparent / rr_smkadjusted # estimate of bias due to confounding by smoking # confidence intervals (rr_apparent_ci <- exp(confint(glm(case~exposed, weight=n, data = study_2x2, family = binomial(link="logit")))[2,])) (rr_smkadjusted_ci <- exp(confint(glm(case~exposed+smoker, weight=n, data = study_2x2x2, family = binomial(link="logit")))[2,])) #######################################################################- # useful functions ---- #######################################################################- # function: g_of_ab ---- # commonly used function to combine risk/odds ratios into a common # bias factor g_of_ab <- function(a,b){ a*b/(a+b-1) } # function: boundaway ---- # useful to avoid division by zero in cases where one wishes # to plug in parameter values that indicate no bias boundaway <- function(x,eps=0.00000001) { ifelse(x==1, 1-eps, ifelse(x==0, eps, x)) } # function: checkdata ---- # useful to check for implausible cells in pseudo-data checkdata <- function(data2x2){ if(any(data2x2$n < 0 )) stop("Bias correction at current parameter values yielded implausible data") } #######################################################################- # correct for selection bias ---- # note that the bounding approach and pseudo-data approach # use fundamentally different assumption sets and have # different input parameters #######################################################################- ##################################- ## Selection bias functions ---- ##################################- ## function: selection_biasfactor ---- selection_biasfactor <- function(risky_exposed=c(1,1), risky_unexposed=c(1,1), prevu_selected_exposed=1, prevu_selected_unexposed=1, prevu_unselected_exposed=1, prevu_unselected_unexposed=1){ # this approach conceptualizes selection bias as being due to a latent, or unmeasured factor # see: Smith LH, VanderWeele TJ. Bounding bias due to selection. Epidemiology. 2019;30:509–516. rr_usy_1 = max(risky_exposed)/min(risky_exposed) rr_usy_0 = max(risky_unexposed)/min(risky_unexposed) # rr_sus_1 = (prevu_selected_exposed)/(prevu_unselected_exposed) rr_sus_0 = (prevu_selected_unexposed)/(prevu_selected_unexposed) rr_sus_1 = max(rr_sus_1, 1/rr_sus_1) rr_sus_0 = max(rr_sus_0, 1/rr_sus_0) bias_factor_selection <- g_of_ab(rr_usy_1, rr_sus_1) * g_of_ab(rr_usy_0, rr_sus_0) bias_factor_selection } ## function: selection_correct ---- selection_correct <- function(data2x2, case_1sel=1, case_0sel=1, cont_1sel=1, cont_0sel=1){ # inverse probability of selection weight based correction of cell frequencies A0 = with(data2x2, (sum(case*exposed*n)/case_1sel)) B0 = with(data2x2, (sum(case*(1-exposed)*n)/case_0sel)) C0 = with(data2x2, (sum((1-case)*exposed*n)/cont_1sel)) D0 = with(data2x2, (sum((1-case)*(1-exposed)*n)/cont_0sel)) data2x2_adj <- data2x2 %>% mutate( n = case_when( case==1 & exposed == 1 ~ A0, case==1 & exposed == 0 ~ B0, case==0 & exposed == 1 ~ C0, case==0 & exposed == 0 ~ D0, ) ) data2x2_adj } ##################################- ## Selection bias examples ---- ##################################- ## selection bias parameters used in bias-factor example ---- # stratum specific risks or_female = (93/1077)/(624/2400) risk_males_exposed = 0.08 risk_males_unexposed = 0.02 risky_exposed = c(risk_males_exposed, risk_males_exposed*or_female) risky_unexposed = c(risk_males_unexposed, risk_males_unexposed*or_female) # prevalence of being male, given exposure and selection into study, by Bayes formula pmale_population = 0.5 popium_male_population = 0.241 # no unrounded values available popium_female_population = 0.022 # no unrounded values available popium_male_sample = 0.241 # no unrounded values available pmale_selected = 153/(153+22) pselection = (153+22)/(153*2) # marginal probability of selection popium_population = popium_male_population*pmale_population + popium_female_population*(1-pmale_population) popium_selected = 27/(27+148) p_opium_unselected = (popium_population - (pselection*popium_selected))/(1-pselection) # gives conditional probabilites of being male - the function also # uses this to calculate conditional probabilities of being female prevu_unselected_exposed = popium_male_population*pmale_population*popium_selected prevu_unselected_unexposed = (1-popium_male_population)*pmale_selected*popium_selected prevu_selected_exposed = popium_male_population*pmale_selected*popium_selected prevu_selected_unexposed = (1-popium_male_population)*pmale_selected*(p_opium_unselected) ## selection bias bounding factor ---- bfs <- selection_biasfactor(risky_exposed=risky_exposed, risky_unexposed=risky_unexposed, prevu_selected_exposed=prevu_unselected_exposed, prevu_selected_unexposed=prevu_unselected_unexposed, prevu_unselected_exposed=prevu_selected_exposed, prevu_unselected_unexposed=prevu_selected_unexposed ) ## selection bias parameters used in pseudo-data example ---- pmale_population = 0.5 pfemale_controls = 22/(153+22) pselection_controls = (153+22)/(153+153) popium_male_population = 0.241 # no unrounded values available popium_female_population = 0.022 # no unrounded values available popium_male_sample = 0.241 # no unrounded values available pmale_selected = 153/(153+22) popium_selected = 27/(27+148) popium_sexstandardized = popium_male_population*pmale_population + popium_female_population*(1-pmale_population) #prevu_unselected_exposed = popium_male_population*pmale_population*popium_selected #prevu_unselected_unexposed = popium_male_population*pmale_selected*popium_selected # probability of selection, given case and exposure status # left side of expansion Pr(S=1|Y,A,U)=Pr(S=1|Y,U) pselection_femalecontrols = pfemale_controls*pselection_controls/(1-pmale_population) pselection_malecontrols = (1-pfemale_controls)*pselection_controls/(pmale_population) # right side of expansion Pr(U=u|Y,A) pfemale_exposedcontrols = popium_female_population*(1-pmale_population)/popium_sexstandardized pmale_exposedcontrols = popium_male_population*pmale_population/popium_sexstandardized pfemale_unexposedcontrols = (1-popium_female_population)*(1-pmale_population)/(1-popium_sexstandardized) pmale_unexposedcontrols = (1-popium_male_population)*pmale_population/(1-popium_sexstandardized) (exposed_case_selectionprob <- 1.0) (unexposed_case_selectionprob <- 1.0) (exposed_control_selectionprob <- pselection_femalecontrols*pfemale_exposedcontrols + pselection_malecontrols*pmale_exposedcontrols) (unexposed_control_selectionprob <- pselection_femalecontrols*pfemale_unexposedcontrols + pselection_malecontrols*pmale_unexposedcontrols) ## selection bias adjusted pseudo-data ---- data_select_adj = selection_correct(study_2x2, case_1sel=exposed_case_selectionprob, case_0sel=unexposed_case_selectionprob, cont_1sel=exposed_control_selectionprob, cont_0sel=unexposed_control_selectionprob ) #######################################################################- # adjusted analysis (here, only selection bias) ---- #######################################################################- # adjusted 2x2 data_select_adj # adjusted effect measure (rr_adj<- exp(glm(case~exposed, weight=n, data = data_select_adj, family = binomial(link='logit'))$coefficients[2])) # pessimistic OR bound rr_smkadjusted / bfs #######################################################################- # 2) correct for misclassification ---- # note that the bounding approach and pseudo-data approach # use similar input parameters, but which differs from # other biases #######################################################################- ## function: misclass_biasfactor ---- misclass_biasfactor <- function(case_sens=1, case_spec=1, cont_sens=1, cont_spec=1){ G = boundaway(1-case_spec) H = boundaway(1-cont_spec) I = boundaway(case_sens) J = boundaway(cont_sens) fpor = (G/H)/((1-G)/(1-H)) # false positive odds ratio seor = (I/J)/((1-I)/(1-J)) # sensitivity odds ratio ccr = (I/J)/((1-G)/(1-H)) # correct classification odds ratio icr = (G/H)/((1-I)/(1-J)) # incorrect classification odds ratio or_astary = max(fpor, seor, ccr, icr) bias_factor_misclass <- or_astary bias_factor_misclass } ## function: misclass_correct ---- misclass_correct <- function(data2x2, case_sens=1, case_spec=1, cont_sens=1, cont_spec=1){ # calculate adjusted 2 by 2 table cells under misclassification ncases = sum(with(data2x2, n*case)) ncontrols = sum(with(data2x2, n*(1-case))) A0 = with(data2x2, (sum(case*exposed*n) - ncases*(1-case_spec))/(case_sens - 1 + case_spec)) B0 = ncases - A0 C0 = with(data2x2, (sum((1-case)*exposed*n) - ncontrols*(1-cont_spec))/(cont_sens - 1 + cont_spec)) D0 = ncontrols - C0 data2x2_adj <- data2x2 %>% mutate( n = case_when( case==1 & exposed == 1 ~ A0, case==1 & exposed == 0 ~ B0, case==0 & exposed == 1 ~ C0, case==0 & exposed == 0 ~ D0, ) ) data2x2_adj } ## misclassification parameters used in example ---- # from Rashidia et al (2017) (case_sensitivity <- 38/49) # 77.5% (case_specificity <- 129/140) # 92% (control_sensitivity <- case_sensitivity) (control_specificity <- case_specificity) ## exposure misclassification bounding factor ---- bfm <- misclass_biasfactor( case_sens = case_sensitivity, case_spec = case_specificity, cont_sens = control_sensitivity, cont_spec = control_specificity ) ## selection, exposure misclassification adjusted pseudo-data ---- data_selectmisclass_adj = misclass_correct(data_select_adj, case_sens = case_sensitivity, case_spec = case_specificity, cont_sens = control_sensitivity, cont_spec = control_specificity ) #######################################################################- # selection bias, exposure misclassification adjusted analysis ---- #######################################################################- # adjusted 2x2 data_selectmisclass_adj # adjusted effect measure (safe to ignore warning about non-integer successes) (rr_adj<- exp(glm(case~exposed, weight=n, data = data_selectmisclass_adj, family = binomial(link = "logit"))$coefficients[2])) # pessimistic OR bound rr_smkadjusted / (bfs*bfm) #######################################################################- # correct for unmeasured confounding bias ---- #######################################################################- ## function: confounding_biasfactor ---- confounding_biasfactor <- function(risky_exposed=c(1,1), risky_unexposed=c(1,1), prevu_exposed=1, prevu_unexposed=1){ # this approach conceptualizes selection bias as being due to a latent, or unmeasured factor # see: Smith LH, VanderWeele TJ. Bounding bias due to selection. Epidemiology. 2019;30:509–516. rr_ucy_1 = max(risky_exposed)/min(risky_exposed) rr_ucy_0 = max(risky_unexposed)/min(risky_unexposed) rr_ucy = max(rr_ucy_1, rr_ucy_0) # rr_auc = (prevu_exposed)/(prevu_unexposed) rr_auc = max(rr_auc, 1/rr_auc) bias_factor_confounding <- g_of_ab(rr_ucy, rr_auc) bias_factor_confounding } ## function: confounding_correct ---- confounding_correct <- function(data2x2, rr_uy=2, prevu_exposed=1, prevu_unexposed=.6, datatype="casecon" # are 2x2 data in case/control format or case/population format? ){ if(!(tolower(datatype) %in% c("casecon", "cohort"))) stop("datatype must be 'casecon' or 'cohort'") # inverse probability of selection weight based correction of cell frequencies a = with(data2x2, (sum(case*exposed*n))) b = with(data2x2, (sum(case*(1-exposed)*n))) c = with(data2x2, (sum((1-case)*exposed*n))) d = with(data2x2, (sum((1-case)*(1-exposed)*n))) m = with(data2x2, (sum(exposed*n))) # here, the "exposed" group is really the total n = with(data2x2, (sum((1-exposed)*n))) # here, the "exposed" group is really the total # RR if(toupper(datatype) == "cohort"){ M1 = with(data2x2, (sum(exposed*n)))*prevu_exposed M0 = with(data2x2, (sum(exposed*n))) - M1 N1 = with(data2x2, (sum((1-exposed)*n)))*prevu_unexposed N0 = with(data2x2, (sum((1-exposed)*n))) - N1 A1 = rr_uy * M1 * a / (rr_uy * M1 + m - M1) B1 = rr_uy * N1 * b / (rr_uy * N1 + n - N1) A0 = a - A1 B0 = b - B1 C1 = M1 - A1 C0 = M0 - A0 D1 = N1 - B1 D0 = N0 - B0 } else{ C1 = c*prevu_exposed C0 = c - C1 D1 = d*prevu_unexposed D0 = d - D1 A1 = rr_uy * C1 * a / (rr_uy * C1 + c - C1) A0 = a - A1 B1 = rr_uy * D1 * b / (rr_uy * D1 + d - D1) B0 = b - B1 } data2x21 = data2x2 %>% mutate( u=1 ) data2x20 = data2x2 %>% mutate( u=0 ) data2x2_adj <- rbind(data2x21,data2x20) %>% mutate( n = case_when( case==1 & exposed == 1 & u == 0 ~ A0, case==1 & exposed == 0 & u == 0 ~ B0, case==0 & exposed == 1 & u == 0 ~ C0, case==0 & exposed == 0 & u == 0 ~ D0, case==1 & exposed == 1 & u == 1 ~ A1, case==1 & exposed == 0 & u == 1 ~ B1, case==0 & exposed == 1 & u == 1 ~ C1, case==0 & exposed == 0 & u == 1 ~ D1, ) ) data2x2_adj } ## selection bias parameters used in bias-factor example ---- # prevalence of being male, given exposure and selection into study, by Bayes formula pmale_population = 0.5 popium_male_population = 0.241 # no unrounded values available popium_female_population = 0.022 # no unrounded values available popium_male_sample = 0.241 # no unrounded values available popium_population = popium_male_population*pmale_population + popium_female_population*(1-pmale_population) pmale_exposed = popium_male_population * pmale_population/popium_population pmale_unexposed = (1-popium_male_population) * pmale_population/(1-popium_population) ## selection bias parameters used in the pseudo-data example ---- # stratum specific risks or_female = (93/1077)/(624/2400) risk_males_exposed = 0.08 # arbitrary risk_males_unexposed = 0.02 # arbitrary risky_exposed = c(risk_males_exposed, risk_males_exposed*or_female) risky_unexposed = c(risk_males_unexposed, risk_males_unexposed*or_female) p_opium_unselected = (popium_population - (pselection*popium_selected))/(1-pselection) bfc <- confounding_biasfactor(risky_exposed=risky_exposed, risky_unexposed=risky_unexposed, prevu_exposed=pmale_exposed, # pfemale_exposed calculated within the function prevu_unexposed=pmale_unexposed ) bfc ## misclassification + selection bias + confounding adjusted pseudo-data ---- (data_confoundselectmisclass_adj <- confounding_correct(data_selectmisclass_adj, rr_uy=or_female, prevu_exposed=(1-pmale_exposed), prevu_unexposed=(1-pmale_unexposed), datatype="casecon" )) ############################################################- # adjusted analysis (here, all 3 biases) ---- ############################################################- # adjusted 2x2x2 data_confoundselectmisclass_adj # adjusted effect measure (rr_adj<- exp(glm(case~exposed+u, weight=n, data = data_confoundselectmisclass_adj, family = binomial(link='logit'))$coefficients[2])) # pessimistic OR bound rr_smkadjusted / (bfs * bfm * bfc) ############################################################- # adjusted analysis (here, selection bias and exposure misclassification bias) ---- ############################################################- # adjusted 2x2x2 data_confoundselectmisclass_adj # adjusted effect measure (rr_adj<- exp(glm(case~exposed+u, weight=n, data = data_confoundselectmisclass_adj, family = binomial(link='logit'))$coefficients[2])) # pessimistic lower bound rr_apparent / (bfs * bfm * bfc) ############################################################- # approximately re-correct for measured confounding if needed ---- ############################################################- print(rr_adj/conf_bias) ############################################################- # sensitivity analysis ---- ############################################################- multiple_bias_analysis_pseudodata <- function(data2x2, exposed_case_selectionprob=1, unexposed_case_selectionprob=1, exposed_control_selectionprob=1, unexposed_control_selectionprob=1, case_sensitivity=1, case_specificity=1, control_sensitivity=1, control_specificity=1, rr_uy=1, prevu_exposed=1, prevu_unexposed=1, conf_bias = 1.0 # bias from measured confounder ){ # parameter values data_select_adj = selection_correct(data2x2, case_1sel=exposed_case_selectionprob, case_0sel=unexposed_case_selectionprob, cont_1sel=exposed_control_selectionprob, cont_0sel=unexposed_control_selectionprob ) checkdata(data_select_adj) data_selectmisclass_adj = misclass_correct(data_select_adj, case_sens = case_sensitivity, case_spec = case_specificity, cont_sens = control_sensitivity, cont_spec = control_specificity ) checkdata(data_selectmisclass_adj) data_confoundselectmisclass_adj <- confounding_correct(data_selectmisclass_adj, rr_uy=rr_uy, prevu_exposed=prevu_exposed, prevu_unexposed=prevu_unexposed, datatype = "casecon" ) checkdata(data_confoundselectmisclass_adj) suppressWarnings(rr_adj<- exp(glm(case~exposed+u, weight=n, data = data_confoundselectmisclass_adj, family = binomial(link='logit'))$coefficients[2])) list(estimate = unname(rr_adj/conf_bias), pseudo_data = data_confoundselectmisclass_adj) } # original analysis multiple_bias_analysis_pseudodata(study_2x2, # selection bias exposed_case_selectionprob=1, unexposed_case_selectionprob=1, exposed_control_selectionprob=0.9283779, unexposed_control_selectionprob=0.5179202, # exposure misclassification case_sensitivity=38/49, case_specificity=129/140, control_sensitivity=38/49, control_specificity=129/140, # confounding rr_uy=(93/1077)/(624/2400), prevu_exposed=0.08365019, prevu_unexposed=0.5630397, conf_bias = conf_bias ) # higher specificity multiple_bias_analysis_pseudodata(study_2x2, # selection bias exposed_case_selectionprob=1, unexposed_case_selectionprob=1, exposed_control_selectionprob=0.9283779, unexposed_control_selectionprob=0.5179202, # exposure misclassification case_sensitivity=38/49, case_specificity=1.0, control_sensitivity=38/49, control_specificity=1.0, # confounding rr_uy=(93/1077)/(624/2400), prevu_exposed=0.08365019, prevu_unexposed=0.5630397, conf_bias = conf_bias ) # differential exposure misclassification (false positives among cases only) multiple_bias_analysis_pseudodata(study_2x2, # selection bias exposed_case_selectionprob=1, unexposed_case_selectionprob=1, exposed_control_selectionprob=0.9283779, unexposed_control_selectionprob=0.5179202, # exposure misclassification case_sensitivity=38/49, case_specificity=129/140, control_sensitivity=38/49, control_specificity=1, # confounding rr_uy=(93/1077)/(624/2400), prevu_exposed=0.08365019, prevu_unexposed=0.5630397, conf_bias = conf_bias ) # differential exposure misclassification (false positives among controls only) multiple_bias_analysis_pseudodata(study_2x2, # selection bias exposed_case_selectionprob=1, unexposed_case_selectionprob=1, exposed_control_selectionprob=0.9283779, unexposed_control_selectionprob=0.5179202, # exposure misclassification case_sensitivity=38/49, case_specificity=1, control_sensitivity=38/49, control_specificity=129/140, # confounding rr_uy=(93/1077)/(624/2400), prevu_exposed=0.08365019, prevu_unexposed=0.5630397, conf_bias = conf_bias ) # stronger confounding #https://pubmed.ncbi.nlm.nih.gov/8275221/ multiple_bias_analysis_pseudodata(study_2x2, # selection bias exposed_case_selectionprob=1, unexposed_case_selectionprob=1, exposed_control_selectionprob=0.9283779, unexposed_control_selectionprob=0.5179202, # exposure misclassification case_sensitivity=38/49, case_specificity=129/140, control_sensitivity=38/49, control_specificity=129/140, # confounding rr_uy=1/5.95, prevu_exposed=0.08365019, prevu_unexposed=0.5630397, conf_bias = conf_bias ) # use of hospital controls multiple_bias_analysis_pseudodata(study_2x2, # selection bias exposed_case_selectionprob=1, unexposed_case_selectionprob=1, exposed_control_selectionprob=1.0, unexposed_control_selectionprob=0.5179202, # exposure misclassification case_sensitivity=38/49, case_specificity=129/140, control_sensitivity=38/49, control_specificity=129/140, # confounding rr_uy=(93/1077)/(624/2400), prevu_exposed=0.08365019, prevu_unexposed=0.5630397, conf_bias = conf_bias ) # Abnet et al sensitivity and specificity multiple_bias_analysis_pseudodata(study_2x2, # selection bias exposed_case_selectionprob=1, unexposed_case_selectionprob=1, exposed_control_selectionprob=0.9283779, unexposed_control_selectionprob=0.5179202, # exposure misclassification case_sensitivity=70/78, case_specificity=67/72, control_sensitivity=70/78, control_specificity=67/72, # confounding rr_uy=(93/1077)/(624/2400), prevu_exposed=0.08365019, prevu_unexposed=0.5630397, conf_bias = conf_bias ) ############################################################- # probabilistic bias analysis ---- # included here to give the general strategy ############################################################- # function: bootstrap_tabled_data ---- # special function to bootstrap sample tabular data bootstrap_tabled_data <- function(data){ resl = lapply(1:nrow(data), function(x) do.call("rbind", lapply(1:data[x,"n"], function(y) data[x,,drop=FALSE]))) res = do.call(rbind, resl) bootid = sample(1:nrow(res), replace=TRUE) newdat = res[bootid,] %>% select(-n) tabdat = as.data.frame(table(newdat), responseName="n") %>% mutate( case=as.numeric(as.character(case)), exposed=as.numeric(as.character(exposed)) ) tabdat } # special function to get bootstrap variance estimates for measured confounding bias # used in probabilistic bias analysis bootstrap_conf_bias <- function(data2x2x2){ testfn <- function(i){ bootdata = bootstrap_tabled_data(data2x2x2) (rr_apparent <- exp(glm(case~exposed, weight=n, data = bootdata, family = binomial(link="logit"))$coefficients[2])) (rr_smkadjusted <- exp(glm(case~exposed+smoker, weight=n, data = bootdata, family = binomial(link="logit"))$coefficients[2])) conf_bias = rr_apparent / rr_smkadjusted # estimate of bias due to confounding by smoking c(rr_apparent, rr_smkadjusted, conf_bias ) } bootsamps = sapply(1:1000, testfn) sd(log(bootsamps)) } #sd_confbias = bootstrap_conf_bias(study_2x2x2) # 0.1811161 pba_iteration = function(iter, data2x2, # selection bias exposed_case_selectionprob=1, unexposed_case_selectionprob=1, exposed_control_selectionprob=0.9283779, unexposed_control_selectionprob=0.5179202, # exposure misclassification case_sensitivity=38/49, case_specificity=129/140, control_sensitivity=38/49, # set to null if you want this to exactly equal case value control_specificity=129/140, # set to null if you want this to exactly equal case value # confounding rr_uy=(93/1077)/(624/2400), prevu_exposed=0.08365019, prevu_unexposed=0.5630397, conf_bias = conf_bias, verbose=FALSE ){ if(verbose){ # this will print out the sensitivity parameters print(sapply(as.list(match.call())[6:15], eval, simplify=TRUE)) } if(is.null(control_sensitivity)){ control_sensitivity=case_sensitivity } if(is.null(control_specificity)){ control_specificity=case_specificity } if(iter >0) { # bootstrap sampling from case control study: case/control ns are fixed bootdata <- bootstrap_tabled_data(data2x2) } else bootdata = data2x2 res = tryCatch( multiple_bias_analysis_pseudodata(bootdata, # selection bias exposed_case_selectionprob=exposed_case_selectionprob, unexposed_case_selectionprob=unexposed_case_selectionprob, exposed_control_selectionprob=exposed_control_selectionprob, unexposed_control_selectionprob=unexposed_control_selectionprob, # exposure misclassification case_sensitivity=case_sensitivity, case_specificity=case_specificity, control_sensitivity=control_sensitivity, control_specificity=control_specificity, # confounding rr_uy=rr_uy, prevu_exposed=prevu_exposed, prevu_unexposed=prevu_unexposed, conf_bias = conf_bias )$estimate, error = function(e) NA, finally=NULL ) res } # probabilistic bias analysis no bias ---- # parametric bootstrap on confounding bias set.seed(12312) rr_samples_nobias = sapply(0:1000, function(i) pba_iteration(iter=i, data2x2=study_2x2, exposed_case_selectionprob=1, unexposed_case_selectionprob=1, exposed_control_selectionprob=1, unexposed_control_selectionprob=1, # exposure misclassification case_sensitivity=1, case_specificity=1, control_sensitivity=1, control_specificity=1, # confounding rr_uy=1, prevu_exposed=1, prevu_unexposed=1, conf_bias = ifelse(i==0, conf_bias, rlnorm(1, log(conf_bias), 0.1811161)) ), simplify=TRUE) rr_samples_nobias[1] # the estimate on the original data #[1] 1.251411 quantile(rr_samples_nobias[-1], c(.5, .025, .975), na.rm=TRUE) # boostrap intervals # 50% 2.5% 97.5% # 1.2494500 0.7102496 2.4609188 # probabilistic bias analysis using distributionalized versions of original bias parameters---- # choice of prior distributions ---- # selection bias # misclassification # sensitivity and specificity: draws from beta distribution parameterized as # B(a,b), where 'a' is the numer of 'successes' and 'b' is the number of 'failures' # These can be directly taken from the validation data # Sensitivity = B(true positives, false negatives) ['true positive rate'] # Specificity = 1 - B(false positives, true negatives) [parameterized as 'false positive rate'] # unmeasured confounding # risk ratio: impact of u on y, can use crude log-or as 'prior' for log-rr Hadji_rawdat = textConnection( "case,exposed,n 1,1,93 1,0,1077 0,1,624 0,0,2400 ", open = "r") Hadji_dat = read.csv(Hadji_rawdat) close.connection(Hadji_rawdat) log_or_prior = summary(glm(case~exposed, weight=n, family=binomial(), data = Hadji_dat))$coefficients[2,] # 10000 draws from prior distribution hist(exp(rnorm(10000, log_or_prior["Estimate"], log_or_prior["Std. Error"]))) # prevalence of U among the exposed: Moradinazar study, again using beta distribution # total number exposed: 15600 # estimated proportion female: 0.08365019 nfe = 0.08365019*15600 # expected number of females among exposed nme = (1-0.08365019)*15600 # expected number of males among exposed hist(rbeta(10000, nfe, nme)) # prevalence of U among the unexposed # estimated proportion female: 0.5630397 nfu = 0.5630397*(130570-15600) # expected number of females among unexposed nmu = (1-0.5630397)*(130570-15600) # expected number of males among unexposed hist(rbeta(10000, nfu, nmu)) # forcing non-differentiality of cases and controls set.seed(12312) rr_samples1 = sapply(1:1000, function(i) pba_iteration(iter=i, data2x2=study_2x2, exposed_case_selectionprob=1, unexposed_case_selectionprob=1, exposed_control_selectionprob=rbeta(1,93,8), unexposed_control_selectionprob=rbeta(1,52, 48), case_sensitivity=rbeta(1,38,49-38), case_specificity=1-rbeta(1,140-129,129), control_sensitivity=NULL, control_specificity=NULL, rr_uy=exp(rnorm(1, log_or_prior["Estimate"], log_or_prior["Std. Error"])), prevu_exposed=rbeta(1, 0.08365019*15600, (1-0.08365019)*15600), prevu_unexposed=rbeta(1, 0.5630397*(130570-15600), (1-0.5630397)*(130570-15600)), conf_bias = rlnorm(1, log(conf_bias), 0.1811161), verbose=FALSE ), simplify=TRUE) quantile(rr_samples1, c(.5, .025, .975), na.rm=TRUE) # boostrap intervals # 50% 2.5% 97.5% # 4.815092 1.195824 69.046440 mean(is.na(rr_samples1))# proportion of samples with implausible values #[1] 0.324 # allowing cases and controls to differ in sens/spec, but being similar on average set.seed(12312) rr_samples2 = sapply(1:1000, function(i) pba_iteration(iter=i, data2x2=study_2x2, exposed_case_selectionprob=1, unexposed_case_selectionprob=1, exposed_control_selectionprob=rbeta(1,93,8), unexposed_control_selectionprob=rbeta(1,52, 48), case_sensitivity=rbeta(1,38,49-38), case_specificity=1-rbeta(1,140-129,129), control_sensitivity=rbeta(1,38,49-38), control_specificity=1-rbeta(1,140-129,140), rr_uy=exp(rnorm(1, log_or_prior["Estimate"], log_or_prior["Std. Error"])), prevu_exposed=rbeta(1, 0.08365019*15600, (1-0.08365019)*15600), prevu_unexposed=rbeta(1, 0.5630397*(130570-15600), (1-0.5630397)*(130570-15600)), conf_bias = rlnorm(1, log(conf_bias), 0.1811161), verbose=FALSE ), simplify=TRUE) quantile(rr_samples2, c(.5, .025, .975), na.rm=TRUE) # boostrap intervals #50% 2.5% 97.5% #4.075963 1.185807 65.023321 mean(is.na(rr_samples2))# proportion of samples with implausible values # 0.225 # alternative sens/spec estimatates from abnet #case_sensitivity=70/78, #case_specificity=67/72, set.seed(12312) rr_samples3 = sapply(1:1000, function(i) pba_iteration(iter=i, data2x2=study_2x2, exposed_case_selectionprob=1, unexposed_case_selectionprob=1, exposed_control_selectionprob=rbeta(1,93,8), unexposed_control_selectionprob=rbeta(1,52, 48), case_sensitivity=rbeta(1,70,78-70), case_specificity=1-rbeta(1,72-67,67), control_sensitivity=case_sensitivity, control_specificity=case_specificity, rr_uy=exp(rnorm(1, log_or_prior["Estimate"], log_or_prior["Std. Error"])), prevu_exposed=rbeta(1, 0.08365019*15600, (1-0.08365019)*15600), prevu_unexposed=rbeta(1, 0.5630397*(130570-15600), (1-0.5630397)*(130570-15600)), conf_bias = rlnorm(1, log(conf_bias), 0.1811161), verbose=FALSE ), simplify=TRUE) quantile(rr_samples3, c(.5, .025, .975), na.rm=TRUE) # boostrap intervals mean(is.na(rr_samples3))# proportion of samples with implausible values