annotate SeqSero/libs/run_auto_All_for_assemblies.py @ 4:ab0802d77891 draft default tip

Uploaded
author estrain
date Thu, 12 Sep 2019 06:46:00 -0400
parents c577b57b7c74
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
1 #!/usr/bin/env python
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
2
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
3
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
4
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
5 import os
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
6 from Bio import SeqIO
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
7 import sys
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
8 import itertools
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
9 from Initial_Conditions import phase1
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
10 from Initial_Conditions import phase2
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
11 from Initial_Conditions import phaseO
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
12 from Initial_Conditions import sero
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
13 import time
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
14 import multiprocessing
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
15 import string
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
16
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
17 #m=string.atoi(sys.argv[1])
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
18 m=1 #temperorily, m can be set as one, because we just need one core to deal with it
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
19 file_name=sys.argv[1]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
20
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
21 def Combine(b,c):
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
22 fliC_combinations=[]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
23 fliC_combinations.append(",".join(c))
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
24 temp_combinations=[]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
25 for i in range(len(b)):
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
26 for x in itertools.combinations(b,i+1):
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
27 temp_combinations.append(",".join(x))
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
28 for x in temp_combinations:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
29 temp=[]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
30 for y in c:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
31 temp.append(y)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
32 temp.append(x)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
33 temp=",".join(temp)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
34 temp=temp.split(",")
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
35 temp.sort()
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
36 temp=",".join(temp)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
37 fliC_combinations.append(temp)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
38 return fliC_combinations
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
39
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
40
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
41 def Test(file1,z,q):
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
42 fliC="?"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
43 fljB="?"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
44 Otype="?"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
45 oafA=""#$$$$
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
46 O3_10=""
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
47 O1_3_19=""
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
48 file2=file1.replace(' ','_').replace(":","__").replace("[","").replace("]","")
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
49 try:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
50 os.rename(file1, file2)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
51 real_file=file2
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
52 except:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
53 real_file=file1
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
54 #print "###The genome name:",file1
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
55 dirpath = os.path.abspath(os.path.dirname(os.path.realpath(__file__)))###01/27/2015
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
56 os.system('touch result.txt')
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
57 database_path="database"###01/27/2015
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
58 os.system('python '+dirpath+'/Otype_determine_analysis.py '+database_path+'/Typhimurium_LT2_gnd_galF.fasta '+real_file+' '+database_path+'/new_Oserotype.fasta >temp_result_'+str(q)+'O.txt')
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
59 os.system('cat temp_result_'+str(q)+'O.txt>>data_log.txt')
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
60 handle=open('temp_result_'+str(q)+'O.txt',"r")
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
61 handle=handle.readlines()
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
62 for line in handle:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
63 if "$$$ Most" in line and "O_type" in line:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
64 Otype=line.split("O-")[1].split("_")[0].split(" ")[0]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
65 Otype=Otype.replace("\n","").strip()
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
66 #print line,
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
67 elif "$$$ No" in line:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
68 Otype="-"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
69 if "O-9" in line:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
70 Otype="9"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
71 #print line,
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
72 elif "$$$O5-" in line:#$$$
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
73 oafA="-"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
74 elif "$$$O3,10 more possible" in line:#$$$
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
75 O3_10="+"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
76 elif "$$$O1,3,19 more possible" in line:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
77 O1_3_19="+"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
78 if Otype=="1,3,19" or Otype=="3,10":#$$$judge O3,10 before formula forms
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
79 if O3_10=="+":
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
80 Otype="3,10"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
81 elif O1_3_19=="+":
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
82 Otype="1,3,19"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
83 else:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
84 print "No_O3,10_O1,3,19_spe_sequences"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
85 os.system('python '+dirpath+'/H_combination_output_analysis.py '+real_file+' '+database_path+'/H_new_fliC_protein_database.fasta '+database_path+'/H_new_fljB_protein_database.fasta >temp_result_'+str(q)+'H.txt')
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
86 os.system('cat temp_result_'+str(q)+'H.txt>>data_log.txt')
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
87 handle2=open('temp_result_'+str(q)+'H.txt',"r")
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
88 handle2=handle2.readlines()
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
89 suspect="no" #for the first choice doesn't hit core sequence
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
90 for line in handle2:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
91 if "$$$ Most" in line and "fliC" in line:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
92 #print line,
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
93 fliC=line.split("fliC_type: ")[1].split("_")[0].strip()
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
94 if fliC=="g,m,p,s":
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
95 fliC="g,m,s"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
96 elif "$$$ No" in line and "fliC" in line:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
97 fliC="-"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
98 #print line,
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
99 elif "$$$ Most" in line and "fljB" in line:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
100 #print line,
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
101 fljB=line.split("fljB_type: ")[1].split("_")[0].strip()
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
102 elif "$$$ No" in line and "fljB" in line:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
103 fljB="-"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
104 #print line,
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
105 if Otype=="9" and fliC=="g,m" and fljB=="-":
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
106 os.system('python '+dirpath+'/special_gene_test_assemblies.py '+database_path+'/specific_genes.fasta '+real_file+' sdf >temp_result_'+str(q)+'sdf.txt')
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
107 os.system('cat temp_result_'+str(q)+'sdf.txt>>data_log.txt')
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
108 handle3=open('temp_result_'+str(q)+'sdf.txt',"r")
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
109 sdf=""
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
110 for line in handle3:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
111 if "$$$" in line and "got a hit" in line:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
112 #print line,
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
113 sdf="+"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
114 if sdf!="+":
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
115 sdf="-"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
116
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
117 seronames=[]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
118 for i in range(len(phase1)):
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
119 fliC_combine=[]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
120 fljB_combine=[]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
121 if phaseO[i]==Otype:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
122 if phase1[i].count("[")==0:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
123 fliC_combine.append(phase1[i])
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
124 elif phase1[i].count("[")>=1:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
125 c=[]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
126 b=[]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
127 if phase1[i][0]=="[" and phase1[i][-1]=="]" and phase1[i].count("[")==1:#for specific situations like [1,5]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
128 content=phase1[i].replace("[","").replace("]","")
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
129 fliC_combine.append(content)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
130 fliC_combine.append("-")
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
131 else:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
132 for x in phase1[i].split(","):
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
133 if "[" in x:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
134 b.append(x.replace("[","").replace("]",""))
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
135 else:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
136 c.append(x)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
137 fliC_combine=Combine(b,c) #Combine will offer every possible combinations of the formula, like f,[g],t: f,t f,g,t
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
138 if phase2[i].count("[")==0:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
139 fljB_combine.append(phase2[i])
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
140 elif phase2[i].count("[")>=1:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
141 d=[]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
142 e=[]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
143 if phase2[i][0]=="[" and phase2[i][-1]=="]" and phase2[i].count("[")==1:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
144 content=phase2[i].replace("[","").replace("]","")
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
145 fljB_combine.append(content)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
146 fljB_combine.append("-")
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
147 else:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
148 for x in phase2[i].split(","):
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
149 if "[" in x:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
150 d.append(x.replace("[","").replace("]",""))
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
151 else:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
152 e.append(x)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
153 fljB_combine=Combine(d,e)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
154 new_fliC=fliC.split(",") #because some antigen like r,[i] not follow alphabetical order, so use this one to judge and can avoid missings
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
155 new_fliC.sort()
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
156 new_fliC=",".join(new_fliC)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
157 new_fljB=fljB.split(",")
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
158 new_fljB.sort()
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
159 new_fljB=",".join(new_fljB)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
160 if (new_fliC in fliC_combine or fliC in fliC_combine) and (new_fljB in fljB_combine or fljB in fljB_combine):
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
161 seronames.append(sero[i])
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
162 if len(seronames)==0:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
163 seronames=["N/A (The predicted antigenic profile does not exist in the White-Kauffmann-Le Minor scheme)"]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
164 star=""
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
165 star_line=""
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
166 if len(seronames)>1:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
167 star="*"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
168 star_line="The predicted serotypes share the same general formula:\t"+Otype+":"+fliC+":"+fljB+"\n"##
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
169 #print "$$$The most possible formula is: (by the order O:H1:H2) ",Otype,":",fliC,":",fljB
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
170 #print "$$$The possible serotyes are:",seronames
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
171 m=0
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
172 for y in seronames:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
173 if y in file1:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
174 #print "$$$ Is the judgement true? Answer:Yes!" #here we use file1, because we want ":", while file2 turned it to "__"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
175 answer="Yes"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
176 m=1
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
177 if m==0:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
178 #print "$$$ Is the judgement true? Answer: Need to check the records and file names"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
179 answer="Not sure"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
180 print "\n","\n"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
181 predict_form=Otype+":"+fliC+":"+fljB
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
182 predict_sero=(" or ").join(seronames)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
183 if predict_form=="9:g,m:-":#
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
184 predict_form=predict_form+"\nSdf prediction:"+sdf #
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
185 if sdf=="-":#
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
186 star="*"#
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
187 star_line="Additional characterization is necessary to assign a serotype to this strain. Commonly circulating strains of serotype Enteritidis are sdf+, although sdf- strains of serotype Enteritidis are known to exist. Serotype Gallinarum is typically sdf- but should be quite rare. Sdf- strains of serotype Enteritidis and serotype Gallinarum can be differentiated by phenotypic profile or genetic criteria.\n"##
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
188 predict_sero="See comments below"#
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
189 elif predict_form=="4:i:-":#03252016#
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
190 predict_sero="potential monophasic variant of Typhimurium"#03252016#
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
191 elif predict_form=="4:r:-":#03252016#
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
192 predict_sero="potential monophasic variant of Heidelberg"#03252016#
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
193 elif predict_form=="4:b:-":#03252016#
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
194 predict_sero="potential monophasic variant of Paratyphi B"#03252016#
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
195 elif predict_form=="8:e,h:1,2":#03282016#
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
196 predict_sero="Newport"#03282016#
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
197 star="*"##03282016#
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
198 star_line="Serotype Bardo shares the same antigenic profile with Newport, but Bardo is exceedingly rare."#03282016#
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
199 claim="The serotype(s) is/are the only serotype(s) with the indicated antigenic profile currently recognized in the Kauffmann White Scheme. New serotypes can emerge and the possibility exists that this antigenic profile may emerge in a different subspecies. Identification of strains to the subspecies level should accompany serotype determination; the same antigenic profile in different subspecies is considered different serotypes."##
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
200 if "N/A" in predict_sero:###added after standalone version, 2015/2/3
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
201 claim=""###added after standalone version, 2015/2/3
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
202 '''
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
203 new_file=open(file2+".txt","w")
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
204 new_file.write(file2+"\t"+"O-"+Otype+"\t"+fliC+"\t"+fljB+"\t"+Otype+":"+fliC+":"+fljB+"\t"+(" or ").join(seronames)+"\t"+answer+"\t"+suspect+"\n")
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
205 new_file.close()
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
206 '''
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
207 if "Typhimurium" in predict_sero and oafA=="-":#$$$$#03252016#
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
208 predict_sero=predict_sero.strip()+"(O5-)"#03252016#
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
209 star="*"#
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
210 star_line="Detected the deletion of O5-."
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
211 new_file=open("Seqsero_result.txt","w")
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
212 new_file.write("Input files:\t"+file2+"\n"+"O antigen prediction:\t"+"O-"+Otype+"\n"+"H1 antigen prediction(fliC):\t"+fliC+"\n"+"H2 antigen prediction(fljB):\t"+fljB+"\n"+"Predicted antigenic profile:\t"+predict_form+"\n"+"Predicted serotype(s):\t"+predict_sero+star+"\n"+star+star_line+claim+"\n")##
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
213 new_file.close()
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
214 os.system("rm temp_result_"+str(q)+"*.txt")###01/28/2015
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
215 os.system("rm result.txt")###01/28/2015
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
216 #os.system("rm -rf database")###01/28/2015
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
217 os.system("rm *.fasta *.xml *.fa")###01/28/2015
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
218
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
219
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
220 def main():
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
221 files1=[]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
222 files1.append(file_name)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
223 file_names=[]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
224 fastq_names=[]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
225 for file1 in files1:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
226 if file1[-6:]=='.fasta' or file1[-4:]=='.fna' or file1[-3:]=='.fa' or file1[-4:]=='.fsa':
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
227 file_names.append(file1)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
228 if file1[-9:]==".fastq.gz" or file1[-6:]==".fastq":
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
229 core_name=file1[:8]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
230 fastq_names.append(core_name)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
231 fastq_names=list(set(fastq_names))
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
232 file_names=file_names+fastq_names
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
233 for i in range(0,len(file_names),m):
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
234 jobs=[]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
235 txt_names=[]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
236 if len(file_names)>=i+m:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
237 for j in range(m):
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
238 p = multiprocessing.Process(target=Test,args=(file_names[j+i],i+j+1,i+j,))
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
239 jobs.append(p)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
240 p.start()
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
241 else:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
242 t=m+i-len(file_names)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
243 for j in range(m-t):
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
244 p = multiprocessing.Process(target=Test,args=(file_names[j+i],i+j+1,i+j,))
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
245 jobs.append(p)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
246 p.start()
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
247 '''
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
248 for j in xrange(len(jobs)):
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
249 jobs[j].join()
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
250 txt_names.append(file_names[j+i].replace(' ','_').replace(":","__").replace("[","").replace("]","")+".txt")
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
251 print txt_names
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
252 for j in xrange(len(txt_names)):
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
253 print i,"and",j
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
254 print i+j+1
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
255 file=open(txt_names[j],"r")
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
256 handle=list(file)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
257 b=handle[0].split("\t")
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
258 print b
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
259 sheet.write(i+j+1,0,b[0])
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
260 sheet.write(i+j+1,1,b[1])
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
261 sheet.write(i+j+1,2,b[2])
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
262 sheet.write(i+j+1,3,b[3])
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
263 sheet.write(i+j+1,4,b[4])
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
264 sheet.write(i+j+1,5,b[5])
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
265 sheet.write(i+j+1,6,b[6])
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
266 sheet.write(i+j+1,7,b[7])
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
267
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
268 print "End time,",time.time()
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
269 file3.save("Seqsero_result2.xls")
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
270 '''
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
271
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
272
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
273 if __name__ == '__main__':
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
274 main()
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
275
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
276
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
277
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
278
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
279
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
280
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
281
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
282