######################################################################## ######################################################################## #The mutation significance cutoff (MSC) project. #R program for ROC curve comparisons between fixed and MSC-based CADD #predictions. #Have in work library the input files below with predicted values for #FP and TP sets (controls and cases, using values of 0 and 1 in #this study to reflect a binary prediction. #For questions please contact Yuval Itan: yitan@rockefeller.edu ######################################################################## ######################################################################## library(pROC) data(aSAH) aa11<-read.table("TP_CADD10.txt") #True positive predictions with a fixed CADD=10 cutoff CADD_10_cases<-matrix(as.numeric(aa11[,1]),nrow=nrow(aa11)) aa12<-read.table("FP_CADD10.txt") #False positive predictions with a fixed CADD=10 cutoff CADD_10_controls<-matrix(as.numeric(aa12[,1]),nrow=nrow(aa12)) aa21<-read.table("TP_CADD15.txt") #True positive predictions with a fixed CADD=15 cutoff CADD_15_cases<-matrix(as.numeric(aa21[,1]),nrow=nrow(aa21)) aa22<-read.table("FP_CADD15.txt") #False positive predictions with a fixed CADD=15 cutoff CADD_15_controls<-matrix(as.numeric(aa22[,1]),nrow=nrow(aa22)) aa31<-read.table("TP_CADD20.txt") #True positive predictions with a fixed CADD=20 cutoff CADD_20_cases<-matrix(as.numeric(aa31[,1]),nrow=nrow(aa31)) aa32<-read.table("FP_CADD20.txt") #False positive predictions with a fixed CADD=20 cutoff CADD_20_controls<-matrix(as.numeric(aa32[,1]),nrow=nrow(aa32)) aa41<-read.table("TP_MSC-CADD90.txt") #True positive predictions with a CADD-based MSC generated with a 90% CI CADD_MSC_90_cases<-matrix(as.numeric(aa41[,1]),nrow=nrow(aa41)) aa42<-read.table("FP_MSC-CADD90.txt") #False positive predictions with a CADD-based MSC generated with a 90% CI CADD_MSC_90_controls<-matrix(as.numeric(aa42[,1]),nrow=nrow(aa42)) aa51<-read.table("TP_MSC-CADD95.txt") #True positive predictions with a CADD-based MSC generated with a 95% CI CADD_MSC_95_cases<-matrix(as.numeric(aa51[,1]),nrow=nrow(aa51)) aa52<-read.table("FP_MSC-CADD99.txt") #False positive predictions with a CADD-based MSC generated with a 95% CI CADD_MSC_95_controls<-matrix(as.numeric(aa52[,1]),nrow=nrow(aa52)) aa61<-read.table("TP_MSC-CADD99.txt") #True positive predictions with a CADD-based MSC generated with a 99% CI CADD_MSC_99_cases<-matrix(as.numeric(aa61[,1]),nrow=nrow(aa61)) aa62<-read.table("FP_MSC-CADD99.txt") #False positive predictions with a CADD-based MSC generated with a 99% CI CADD_MSC_99_controls<-matrix(as.numeric(aa62[,1]),nrow=nrow(aa62)) pdf("ROC_CADD.pdf", width = 8, height = 8) #Figure file CADD_MSC_90_roc = roc(controls=CADD_MSC_90_controls, cases=CADD_MSC_90_cases) #Generating ROC curve by FP and TP predictions test4 = smooth(CADD_MSC_90_roc, method = "fitdistr") #Smoothing ROC curve due to binary predictions (can be potentially skipped) rocobj4 <- plot(test4, percent=TRUE, col="darkgreen") #Plotting ROC curve auc(test4) #Printing area under curve (AUC) for the ROC curve CADD_MSC_95_roc = roc(controls=CADD_MSC_95_controls, cases=CADD_MSC_95_cases) #Generating ROC curve by FP and TP predictions test5 = smooth(CADD_MSC_95_roc, method = "fitdistr") #Smoothing ROC curve due to binary predictions (can be potentially skipped) rocobj5 <- lines(test5, percent=TRUE, col="grey50") #Plotting ROC curve auc(test5) #Printing area under curve (AUC) for the ROC curve CADD_MSC_99_roc = roc(controls=CADD_MSC_99_controls, cases=CADD_MSC_99_cases) #Generating ROC curve by FP and TP predictions test6 = smooth(CADD_MSC_99_roc, method = "fitdistr") #Smoothing ROC curve due to binary predictions (can be potentially skipped) rocobj6 <- lines(test6, percent=TRUE, col="dodgerblue2") #Plotting ROC curve auc(test6) #Printing area under curve (AUC) for the ROC curve CADD_20_roc = roc(controls=CADD_20_controls, cases=CADD_20_cases) #Generating ROC curve by FP and TP predictions test3 = smooth(CADD_20_roc, method = "fitdistr") #Smoothing ROC curve due to binary predictions (can be potentially skipped) rocobj3 <- lines(test3, percent=TRUE, col="tan2") #Plotting ROC curve auc(test3) #Printing area under curve (AUC) for the ROC curve CADD_15_roc = roc(controls=CADD_15_controls, cases=CADD_15_cases) #Generating ROC curve by FP and TP predictions test2 = smooth(CADD_15_roc, method = "fitdistr") #Smoothing ROC curve due to binary predictions (can be potentially skipped) rocobj2 <- lines(test2, percent=TRUE, col="tomato4") #Plotting ROC curve auc(test2) #Printing area under curve (AUC) for the ROC curve CADD_10_roc = roc(controls=CADD_10_controls, cases=CADD_10_cases) #Generating ROC curve by FP and TP predictions test1 = smooth(CADD_10_roc, method = "fitdistr") #Smoothing ROC curve due to binary predictions (can be potentially skipped) rocobj1 <- lines(test1, percent=TRUE, col="red") #Plotting ROC curve auc(test1) #Printing area under curve (AUC) for the ROC curve ###Bootstrapping simulations for p-values between different curves, remove '#' to activate. #testobj <- roc.test(test1, test2, method="bootstrap", boot.n=1000) #testobj$p.value #testobj <- roc.test(test1, test3, method="bootstrap", boot.n=1000) #testobj$p.value #testobj <- roc.test(test1, test4, method="bootstrap", boot.n=1000) #testobj$p.value #testobj <- roc.test(test1, test5, method="bootstrap", boot.n=1000) #testobj$p.value #testobj <- roc.test(test1, test6, method="bootstrap", boot.n=1000) #testobj$p.value legend("bottomright", legend=c("MSC-CADD 90% CI", "MSC-CADD 95% CI", "MSC-CADD 99% CI", "CADD=20", "CADD=15", "CADD=10"), col=c("darkgreen", "grey50", "dodgerblue2","tan2", "tomato4", "red"), lwd=3, cex=1.5) #Plotting legened dev.off()