######################## 1. Package Loading ############################# ######################## 2. Data Input ################################### ######################## 3. Infect-Score ################################# ######################## 4. BV-Score ##################################### ######################## 5. Integrated final prediction results ########## ######################## 6. Final results of the test sample############## ####################################################################### ######################## Package Loading Begin ###################### require(gplots,lib.loc="/root/anaconda3/lib/R/library")->gplots require(ROCR,lib.loc="/root/anaconda3/lib/R/library")->ROCR require(ggplot2,lib.loc="/root/anaconda3/lib/R/library")->ggplot2 require(lattice,lib.loc="/root/anaconda3/lib/R/library")->lattice require(caret,lib.loc="/root/anaconda3/lib/R/library")->caret require(caTools,lib.loc="/root/anaconda3/lib/R/library")->caTools require(preprocessCore,lib.loc="/root/anaconda3/lib/R/library")->preprocessCore require(rJava,lib.loc="/root/anaconda3/lib/R/library")->rJava require(RWeka,lib.loc="/root/anaconda3/lib/R/library")->RWeka require(RWekajars,lib.loc="/root/anaconda3/lib/R/library")->RWekajars require(mlbench,lib.loc="/root/anaconda3/lib/R/library")->mlbench require(pROC,lib.loc="/root/anaconda3/lib/R/library")->pROC require(switchBox,lib.loc="/root/anaconda3/lib/R/library")->switchBox ######################## Package Loading End ###################### ##################################################################### ################################################################# ######################## Data Input Begin ###################### ############ Loading training data and classifeir ############### ###The input data and output results direction### data_dir<-getwd() results_dir<-getwd() InfectDiagno_data<-paste(data_dir,"/bagging_classifier_data_InfectDiagno.RData",sep="") load(InfectDiagno_data) ############ Reading the test data ############### sample_dir<-paste(data_dir,"/sample_data_input.txt",sep = "") sample_data<-read.table(sample_dir,head=TRUE) ############ Get the overlapped genes ############### bagging_rna_genes_unique<-get_gene_unique_combo sample_data_gene<-intersect(bagging_rna_genes_unique,sample_data[,1]) emli_train_data_ss<-valida_data_idam_ss[sample_data_gene,] emli_train_data_vb<-valida_data_idam_vb[sample_data_gene,] emli_train_data_sample<-sample_data[match(sample_data_gene,sample_data[,1]),] # train_gene_number<-length(bagging_rna_genes_unique) ovl_gene_number<-length(sample_data_gene) ## ## output the overlapped gene number ## results_gene_number<-paste("Among the ",train_gene_number," of the total biomarker genes used in the InfectDiagno method, \n",ovl_gene_number," can be found in the tested sample data.",sep="") sample_gene_number<-paste(results_dir,"/gene_number_output.txt",sep = "") write.table(results_gene_number,sample_gene_number,quote=FALSE,col.names = FALSE,row.names = FALSE) ######################## Data Input End ###################### ############################################################### ################################################################## ######################## Infect-Score Begin #################### ######training between infecion and non-infection###### train_data<-emli_train_data_ss train_data_lable<-valida_data_lable_idam_ss fold<-10 sample_num<-length(train_data_lable) classifier_bagging<-list() getTSP_bagging<-list() TSPscore_bagging<-list() for(k in 1:fold){ print(k) set.seed(k) random_index<-sample(1:sample_num, sample_num, replace = TRUE) random_data<-train_data[,random_index] random_lable<-train_data_lable[random_index] classifier_k <- SWAP.Train.KTSP(random_data, random_lable) classifier_k$TSPs->getTSP_k classifier_k$score->TSPscore_k classifier_bagging[[k]]<-classifier_k getTSP_bagging[[k]]<-getTSP_k TSPscore_bagging[[k]]<-TSPscore_k } bagging_classifier<-list(classifier_bagging=classifier_bagging,getTSP_bagging=getTSP_bagging,TSPscore_bagging=TSPscore_bagging) bagging_classifier_combine_sample<-bagging_classifier data_all<-data.frame(test_sample=emli_train_data_sample[,2],emli_train_data_ss) data_all<-as.matrix(data_all) ######prediction between infecion and non-infection###### vali_data<-data_all bagging_classifier<-bagging_classifier_combine_sample times<-length(bagging_classifier$classifier_bagging) predict_bagging_vali<-matrix(data=NA, nrow = ncol(vali_data), ncol = times) predict_bagging_vali_s<-matrix(data=NA, nrow = ncol(vali_data), ncol = times) for(w in 1:times){ valiPrediction_w <- SWAP.KTSP.Classify(vali_data, bagging_classifier$classifier_bagging[[w]]) valiPrediction_s <- SWAP.KTSP.Statistics(vali_data, bagging_classifier$classifier_bagging[[w]]) tr_vali_w<-as.character(valiPrediction_w) predict_bagging_vali[,w]<-tr_vali_w nrow(bagging_classifier$classifier_bagging[[w]]$TSPs)->pair_num predict_bagging_vali_s[,w]<-valiPrediction_s$statistics/(pair_num/2) } predict_bagging_vali<-apply(predict_bagging_vali,2,as.numeric) rownames(predict_bagging_vali)<-colnames(vali_data) predict_bagging_average_vali<-apply(predict_bagging_vali,1,mean) sample_score<-apply(predict_bagging_vali_s,1,sum) sample_score_int<-round(sample_score,3) bagging_score<-sample_score_int bagging_rna_results<-bagging_score ##get Infect-Score## plot_rna_ss<-data.frame(patient_id=colnames(data_all),score=bagging_rna_results) sample_score_infect<-plot_rna_ss$score[1] ######################## Infect-Score End #################### ################################################################ ################################################################# ######################## BV-Score Begin ######################## ######training between viral-infecion and bacterial-infection###### train_data<-emli_train_data_vb train_data_lable<-valida_data_lable_idam_vb fold<-10 sample_num<-length(train_data_lable) classifier_bagging<-list() getTSP_bagging<-list() TSPscore_bagging<-list() for(k in 1:fold){ print(k) set.seed(k) random_index<-sample(1:sample_num, sample_num, replace = TRUE) random_data<-train_data[,random_index] random_lable<-train_data_lable[random_index] classifier_k <- SWAP.Train.KTSP(random_data, random_lable) classifier_k$TSPs->getTSP_k classifier_k$score->TSPscore_k classifier_bagging[[k]]<-classifier_k getTSP_bagging[[k]]<-getTSP_k TSPscore_bagging[[k]]<-TSPscore_k } bagging_classifier<-list(classifier_bagging=classifier_bagging,getTSP_bagging=getTSP_bagging,TSPscore_bagging=TSPscore_bagging) bagging_classifier_combine_sample<-bagging_classifier data_all<-data.frame(test_sample=emli_train_data_sample[,2],emli_train_data_vb) data_all<-as.matrix(data_all) ######prediction between viral-infecion and bacterial-infection###### vali_data<-data_all bagging_classifier<-bagging_classifier_combine_sample times<-length(bagging_classifier$classifier_bagging) predict_bagging_vali<-matrix(data=NA, nrow = ncol(vali_data), ncol = times) predict_bagging_vali_s<-matrix(data=NA, nrow = ncol(vali_data), ncol = times) for(w in 1:times){ valiPrediction_w <- SWAP.KTSP.Classify(vali_data, bagging_classifier$classifier_bagging[[w]]) valiPrediction_s <- SWAP.KTSP.Statistics(vali_data, bagging_classifier$classifier_bagging[[w]]) tr_vali_w<-as.character(valiPrediction_w) predict_bagging_vali[,w]<-tr_vali_w nrow(bagging_classifier$classifier_bagging[[w]]$TSPs)->pair_num predict_bagging_vali_s[,w]<-valiPrediction_s$statistics/(pair_num/2) } predict_bagging_vali<-apply(predict_bagging_vali,2,as.numeric) rownames(predict_bagging_vali)<-colnames(vali_data) predict_bagging_average_vali<-apply(predict_bagging_vali,1,mean) sample_score<-apply(predict_bagging_vali_s,1,sum) sample_score_int<-round(sample_score,3) bagging_score<-sample_score_int bagging_rna_results<-bagging_score ##get BV-Score## plot_rna_bv<-data.frame(patient_id=colnames(data_all),score=bagging_rna_results) sample_score_vb<-plot_rna_bv$score[1] ######################## BV-Score End ######################## ############################################################### ################################################################# ############ Integrated final prediction results ############### cut_value<-1 if(sample_score_infect >= cut_value){ if(sample_score_vb >= cut_value){ sample_conclusion<-"Viral-infected" } if(sample_score_vb <= -cut_value){ sample_conclusion<-"Bacterial-infected" } if((sample_score_vb>-cut_value)&&(sample_score_vb=-cut_value)&&(sample_score_infect