Mercurial > repos > cstrittmatter > ss2v110
comparison bin/SeqSero2_package.py @ 7:aa54a94b9aeb draft
planemo upload commit c50df40caef2fb97c178d6890961e0e527992324-dirty
author | cstrittmatter |
---|---|
date | Mon, 27 Apr 2020 19:39:25 -0400 |
parents | fc22ec8e924e |
children | 357e38526e2a |
comparison
equal
deleted
inserted
replaced
6:7a62fe8e3e5e | 7:aa54a94b9aeb |
---|---|
1231 subprocess.check_call("blastn -query "+database+" -db "+new_fasta+"_db -out "+xmlfile+" -outfmt 5 >> data_log.txt 2>&1",shell=True)###1/27/2015; 08272018, remove "-word_size 10" | 1231 subprocess.check_call("blastn -query "+database+" -db "+new_fasta+"_db -out "+xmlfile+" -outfmt 5 >> data_log.txt 2>&1",shell=True)###1/27/2015; 08272018, remove "-word_size 10" |
1232 else: | 1232 else: |
1233 xmlfile="NA" | 1233 xmlfile="NA" |
1234 return xmlfile,new_fasta | 1234 return xmlfile,new_fasta |
1235 | 1235 |
1236 def judge_subspecies(fnameA): | 1236 def judge_subspecies(fnameA,dirpath): |
1237 #seqsero2 -a; judge subspecies on just forward raw reads fastq | 1237 #seqsero2 -a; judge subspecies on just forward raw reads fastq |
1238 salmID_output=subprocess.Popen("SalmID.py -i "+fnameA,shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE) | 1238 samid_strcmd = "python " + dirpath + "/../SalmID.py -i "+fnameA |
1239 #subprocess.check_call(samid_strcmd + " > Salmidoutput.txt",shell=True) | |
1240 print(samid_strcmd) | |
1241 salmID_output=subprocess.Popen(samid_strcmd,shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE) | |
1239 out, err = salmID_output.communicate() | 1242 out, err = salmID_output.communicate() |
1243 print(err) | |
1244 print(out) | |
1240 out=out.decode("utf-8") | 1245 out=out.decode("utf-8") |
1241 file=open("data_log.txt","a") | 1246 file=open("data_log.txt","a") |
1242 file.write(out) | 1247 file.write(out) |
1243 file.close() | 1248 file.close() |
1249 | |
1244 salm_species_scores=out.split("\n")[1].split("\t")[6:] | 1250 salm_species_scores=out.split("\n")[1].split("\t")[6:] |
1245 salm_species_results=out.split("\n")[0].split("\t")[6:] | 1251 salm_species_results=out.split("\n")[0].split("\t")[6:] |
1246 max_score=0 | 1252 max_score=0 |
1247 max_score_index=1 #default is 1, means "I" | 1253 max_score_index=1 #default is 1, means "I" |
1248 for i in range(len(salm_species_scores)): | 1254 for i in range(len(salm_species_scores)): |
1255 prediction="bongori" #if not, the prediction would always be enterica, since they are located in the later part | 1261 prediction="bongori" #if not, the prediction would always be enterica, since they are located in the later part |
1256 #if max_score<10: ## ed_SL_0318: change SalmID_ssp_threshold | 1262 #if max_score<10: ## ed_SL_0318: change SalmID_ssp_threshold |
1257 if max_score<60: | 1263 if max_score<60: |
1258 prediction="-" | 1264 prediction="-" |
1259 return prediction | 1265 return prediction |
1266 | |
1260 | 1267 |
1261 def judge_subspecies_Kmer(Special_dict): | 1268 def judge_subspecies_Kmer(Special_dict): |
1262 #seqsero2 -k; | 1269 #seqsero2 -k; |
1263 max_score=0 | 1270 max_score=0 |
1264 prediction="-" #default should be I | 1271 prediction="-" #default should be I |
1359 for x in Final_list: | 1366 for x in Final_list: |
1360 file.write("\t".join(str(y) for y in x)+"\n") | 1367 file.write("\t".join(str(y) for y in x)+"\n") |
1361 file.close() | 1368 file.close() |
1362 Final_list_passed=[x for x in Final_list if float(x[0].split("_cov_")[1].split("_")[0])>=0.9 and (x[1]>=int(x[0].split("__")[1]) or x[1]>=int(x[0].split("___")[1].split("_")[3]) or x[1]>1000)] | 1369 Final_list_passed=[x for x in Final_list if float(x[0].split("_cov_")[1].split("_")[0])>=0.9 and (x[1]>=int(x[0].split("__")[1]) or x[1]>=int(x[0].split("___")[1].split("_")[3]) or x[1]>1000)] |
1363 O_choice,fliC_choice,fljB_choice,special_gene_list,contamination_O,contamination_H,Otypes_uniq,H1_cont_stat_list,H2_cont_stat_list=predict_O_and_H_types(Final_list,Final_list_passed,new_fasta) #predict O, fliC and fljB | 1370 O_choice,fliC_choice,fljB_choice,special_gene_list,contamination_O,contamination_H,Otypes_uniq,H1_cont_stat_list,H2_cont_stat_list=predict_O_and_H_types(Final_list,Final_list_passed,new_fasta) #predict O, fliC and fljB |
1364 subspecies=judge_subspecies(fnameA) #predict subspecies | 1371 subspecies=judge_subspecies(fnameA,dirpath) #predict subspecies |
1365 ###output | 1372 ###output |
1366 predict_form,predict_sero,star,star_line,claim=seqsero_from_formula_to_serotypes(O_choice,fliC_choice,fljB_choice,special_gene_list,subspecies) | 1373 predict_form,predict_sero,star,star_line,claim=seqsero_from_formula_to_serotypes(O_choice,fliC_choice,fljB_choice,special_gene_list,subspecies) |
1367 claim="" #04132019, disable claim for new report requirement | 1374 claim="" #04132019, disable claim for new report requirement |
1368 contamination_report="" | 1375 contamination_report="" |
1369 H_list=["fliC_"+x for x in H1_cont_stat_list if len(x)>0]+["fljB_"+x for x in H2_cont_stat_list if len(x)>0] | 1376 H_list=["fliC_"+x for x in H1_cont_stat_list if len(x)>0]+["fljB_"+x for x in H2_cont_stat_list if len(x)>0] |