comparison bin/SeqSero2_package.py @ 7:aa54a94b9aeb draft

planemo upload commit c50df40caef2fb97c178d6890961e0e527992324-dirty
author cstrittmatter
date Mon, 27 Apr 2020 19:39:25 -0400
parents fc22ec8e924e
children 357e38526e2a
comparison
equal deleted inserted replaced
6:7a62fe8e3e5e 7:aa54a94b9aeb
1231 subprocess.check_call("blastn -query "+database+" -db "+new_fasta+"_db -out "+xmlfile+" -outfmt 5 >> data_log.txt 2>&1",shell=True)###1/27/2015; 08272018, remove "-word_size 10" 1231 subprocess.check_call("blastn -query "+database+" -db "+new_fasta+"_db -out "+xmlfile+" -outfmt 5 >> data_log.txt 2>&1",shell=True)###1/27/2015; 08272018, remove "-word_size 10"
1232 else: 1232 else:
1233 xmlfile="NA" 1233 xmlfile="NA"
1234 return xmlfile,new_fasta 1234 return xmlfile,new_fasta
1235 1235
1236 def judge_subspecies(fnameA): 1236 def judge_subspecies(fnameA,dirpath):
1237 #seqsero2 -a; judge subspecies on just forward raw reads fastq 1237 #seqsero2 -a; judge subspecies on just forward raw reads fastq
1238 salmID_output=subprocess.Popen("SalmID.py -i "+fnameA,shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE) 1238 samid_strcmd = "python " + dirpath + "/../SalmID.py -i "+fnameA
1239 #subprocess.check_call(samid_strcmd + " > Salmidoutput.txt",shell=True)
1240 print(samid_strcmd)
1241 salmID_output=subprocess.Popen(samid_strcmd,shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
1239 out, err = salmID_output.communicate() 1242 out, err = salmID_output.communicate()
1243 print(err)
1244 print(out)
1240 out=out.decode("utf-8") 1245 out=out.decode("utf-8")
1241 file=open("data_log.txt","a") 1246 file=open("data_log.txt","a")
1242 file.write(out) 1247 file.write(out)
1243 file.close() 1248 file.close()
1249
1244 salm_species_scores=out.split("\n")[1].split("\t")[6:] 1250 salm_species_scores=out.split("\n")[1].split("\t")[6:]
1245 salm_species_results=out.split("\n")[0].split("\t")[6:] 1251 salm_species_results=out.split("\n")[0].split("\t")[6:]
1246 max_score=0 1252 max_score=0
1247 max_score_index=1 #default is 1, means "I" 1253 max_score_index=1 #default is 1, means "I"
1248 for i in range(len(salm_species_scores)): 1254 for i in range(len(salm_species_scores)):
1255 prediction="bongori" #if not, the prediction would always be enterica, since they are located in the later part 1261 prediction="bongori" #if not, the prediction would always be enterica, since they are located in the later part
1256 #if max_score<10: ## ed_SL_0318: change SalmID_ssp_threshold 1262 #if max_score<10: ## ed_SL_0318: change SalmID_ssp_threshold
1257 if max_score<60: 1263 if max_score<60:
1258 prediction="-" 1264 prediction="-"
1259 return prediction 1265 return prediction
1266
1260 1267
1261 def judge_subspecies_Kmer(Special_dict): 1268 def judge_subspecies_Kmer(Special_dict):
1262 #seqsero2 -k; 1269 #seqsero2 -k;
1263 max_score=0 1270 max_score=0
1264 prediction="-" #default should be I 1271 prediction="-" #default should be I
1359 for x in Final_list: 1366 for x in Final_list:
1360 file.write("\t".join(str(y) for y in x)+"\n") 1367 file.write("\t".join(str(y) for y in x)+"\n")
1361 file.close() 1368 file.close()
1362 Final_list_passed=[x for x in Final_list if float(x[0].split("_cov_")[1].split("_")[0])>=0.9 and (x[1]>=int(x[0].split("__")[1]) or x[1]>=int(x[0].split("___")[1].split("_")[3]) or x[1]>1000)] 1369 Final_list_passed=[x for x in Final_list if float(x[0].split("_cov_")[1].split("_")[0])>=0.9 and (x[1]>=int(x[0].split("__")[1]) or x[1]>=int(x[0].split("___")[1].split("_")[3]) or x[1]>1000)]
1363 O_choice,fliC_choice,fljB_choice,special_gene_list,contamination_O,contamination_H,Otypes_uniq,H1_cont_stat_list,H2_cont_stat_list=predict_O_and_H_types(Final_list,Final_list_passed,new_fasta) #predict O, fliC and fljB 1370 O_choice,fliC_choice,fljB_choice,special_gene_list,contamination_O,contamination_H,Otypes_uniq,H1_cont_stat_list,H2_cont_stat_list=predict_O_and_H_types(Final_list,Final_list_passed,new_fasta) #predict O, fliC and fljB
1364 subspecies=judge_subspecies(fnameA) #predict subspecies 1371 subspecies=judge_subspecies(fnameA,dirpath) #predict subspecies
1365 ###output 1372 ###output
1366 predict_form,predict_sero,star,star_line,claim=seqsero_from_formula_to_serotypes(O_choice,fliC_choice,fljB_choice,special_gene_list,subspecies) 1373 predict_form,predict_sero,star,star_line,claim=seqsero_from_formula_to_serotypes(O_choice,fliC_choice,fljB_choice,special_gene_list,subspecies)
1367 claim="" #04132019, disable claim for new report requirement 1374 claim="" #04132019, disable claim for new report requirement
1368 contamination_report="" 1375 contamination_report=""
1369 H_list=["fliC_"+x for x in H1_cont_stat_list if len(x)>0]+["fljB_"+x for x in H2_cont_stat_list if len(x)>0] 1376 H_list=["fliC_"+x for x in H1_cont_stat_list if len(x)>0]+["fljB_"+x for x in H2_cont_stat_list if len(x)>0]