annotate SeqSero/libs/BWA_analysis_O_new_dependent.py @ 4:ab0802d77891 draft default tip

Uploaded
author estrain
date Thu, 12 Sep 2019 06:46:00 -0400
parents c577b57b7c74
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
1 #!/usr/bin/env python
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
2 #tyr_of_O2_O9.fasta should be in the same directory, in it, O9 should be first then O2
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
3
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
4 import os
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
5 from Bio import SeqIO
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
6 import sys
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
7 from Initial_functions import Uniq
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
8 from Bio.Blast import NCBIXML
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
9
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
10 def BWA_O_analysis(sra_name,additional_file,database,mapping_mode,file_mode):
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
11 if file_mode=="1":#interleaved
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
12 if sra_name[-3:]=="sra":
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
13 os.system("fastq-dump --split-files "+sra_name)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
14 del_fastq=1
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
15 for_fq=sra_name.replace(".sra","_1.fastq")
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
16 rev_fq=sra_name.replace(".sra","_2.fastq")
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
17 for_sai=sra_name.replace(".sra","_1.sai")
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
18 rev_sai=sra_name.replace(".sra","_2.sai")
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
19 sam=sra_name.replace(".sra",".sam")
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
20 bam=sra_name.replace(".sra",".bam")
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
21 else:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
22 del_fastq=0
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
23 core_id=sra_name.split(".fastq")[0]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
24 try:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
25 os.system("gunzip "+sra_name)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
26 except:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
27 pass
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
28 dirpath = os.path.abspath(os.path.dirname(os.path.realpath(__file__)))
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
29 os.system("perl "+dirpath+"/split_interleaved_fastq.pl --input "+core_id+".fastq --output "+core_id.replace(".","_")+".fastq")#######03152016
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
30 ori_size=os.path.getsize(core_id+".fastq")#######03152016
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
31 os.system("mv "+core_id.replace(".","_")+"-read1.fastq"+" "+core_id+"-read1.fastq")#######03152016
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
32 os.system("mv "+core_id.replace(".","_")+"-read2.fastq"+" "+core_id+"-read2.fastq")#######03152016
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
33 for_fq=core_id+"-read1.fastq"#######03152016
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
34 rev_fq=core_id+"-read2.fastq"#######03152016
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
35 if float(os.path.getsize(for_fq))/ori_size<=0.1 or float(os.path.getsize(rev_fq))/ori_size<=0.1:#09092015#12292015#######03152016
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
36 os.system("echo haha")#09092015
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
37 os.system("perl "+dirpath+"/splitPairedEndReads.pl "+core_id+".fastq")#09092015
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
38 os.system("mv "+core_id+".fastq_1 "+for_fq)##09092015
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
39 os.system("mv "+core_id+".fastq_2 "+rev_fq)##09092015
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
40 else:#09092015
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
41 os.system("echo hehe")#09092015
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
42 for_sai=core_id+"_1.sai"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
43 rev_sai=core_id+"_2.sai"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
44 sam=core_id+".sam"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
45 bam=core_id+".bam"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
46 elif file_mode=="2":#seperated
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
47 forword_seq=sra_name
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
48 reverse_seq=additional_file
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
49 try:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
50 os.system("gunzip "+forword_seq)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
51 except:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
52 pass
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
53 try:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
54 os.system("gunzip "+reverse_seq)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
55 except:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
56 pass
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
57 for_core_id=forword_seq.split(".fastq")[0]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
58 re_core_id=reverse_seq.split(".fastq")[0]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
59 for_fq=for_core_id+".fastq"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
60 rev_fq=re_core_id+".fastq"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
61 dirpath = os.path.abspath(os.path.dirname(os.path.realpath(__file__)))#######03152016
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
62 print "check fastq id and make them in accordance with each other...please wait..."
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
63 os.system("python "+dirpath+"/compare_and_change_two_fastq_id.py "+for_fq+" "+rev_fq)#######03152016
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
64 for_sai=for_core_id+".sai"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
65 rev_sai=re_core_id+".sai"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
66 sam=for_core_id+".sam"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
67 bam=sam.replace(".sam",".bam")
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
68 elif file_mode=="3":#single-end
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
69 if sra_name[-3:]=="sra":
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
70 os.system("fastq-dump --split-files "+sra_name)###01/28/2015
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
71 del_fastq=1
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
72 for_fq=sra_name.replace(".sra","_1.fastq")
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
73 for_sai=sra_name.replace(".sra","_1.sai")
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
74 sam=sra_name.replace(".sra",".sam")
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
75 bam=sra_name.replace(".sra",".bam")
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
76 else:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
77 del_fastq=0
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
78 core_id=sra_name.split(".fastq")[0]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
79 try:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
80 os.system("gunzip "+sra_name)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
81 except:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
82 pass
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
83 for_fq=core_id+".fastq"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
84 for_sai=core_id+"_1.sai"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
85 sam=core_id+".sam"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
86 bam=core_id+".bam"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
87
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
88 os.system("bwa index "+database)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
89 if file_mode!="3":
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
90 if mapping_mode=="sam":
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
91 os.system("bwa aln "+database+" "+for_fq+" > "+for_sai)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
92 os.system("bwa aln "+database+" "+rev_fq+" > "+rev_sai)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
93 os.system("bwa sampe "+database+" "+for_sai+" "+ rev_sai+" "+for_fq+" "+rev_fq+" > "+sam)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
94 elif mapping_mode=="mem":
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
95 os.system("bwa mem "+database+" "+for_fq+" "+rev_fq+" > "+sam) #2014/12/23
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
96 elif mapping_mode=="nanopore": ##
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
97 os.system("bwa mem -x ont2d "+database+" "+for_fq+" "+rev_fq+" > "+sam)##
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
98 else:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
99 if mapping_mode=="mem":
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
100 os.system("bwa mem "+database+" "+for_fq+" > "+sam) #2014/12/23
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
101 elif mapping_mode=="sam":
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
102 os.system("bwa aln "+database+" "+for_fq+" > "+for_sai)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
103 os.system("bwa samse "+database+" "+for_sai+" "+for_fq+" > "+sam)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
104 elif mapping_mode=="nanopore":##
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
105 os.system("bwa mem -x ont2d "+database+" "+for_fq+" > "+sam)##
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
106 os.system("samtools view -F 4 -Sbh "+sam+" > "+bam)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
107 os.system("samtools view -h -o "+sam+" "+bam)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
108
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
109 file=open(sam,"r")
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
110 handle=file.readlines()
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
111 name_list=[]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
112 for line in handle:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
113 if len(line)>300:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
114 name_list.append(line.split("\t")[2])
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
115 a,b=Uniq(name_list)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
116 c=dict(zip(a,b))
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
117 final_O=sorted(c.iteritems(), key=lambda d:d[1], reverse = True) #order from frequency high to low, but tuple while not list
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
118 Sero_list_O=[]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
119 print "Final_Otype_list:"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
120 print final_O
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
121 num_1=0#new inserted
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
122 O9_wbav=0
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
123 O310_wzx=0
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
124 O946_wzy=0
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
125 if len(final_O)>0: #new inserted
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
126 for x in final_O:#new inserted
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
127 num_1=num_1+x[1]#new inserted
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
128 if "O-9,46_wbaV" in x[0]:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
129 O9_wbaV=x[1]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
130 if "O-3,10_wzx" in x[0]:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
131 O310_wzx=x[1]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
132 if "O-9,46_wzy" in x[0]:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
133 O946_wzy=x[1]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
134 if "O-3,10_not_in_1,3,19" in x[0]:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
135 O310_no_1319=x[1]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
136 if "O-9,46,27_partial_wzy" in x[0]:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
137 O94627=x[1]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
138 O_list=[]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
139 O_choice=""
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
140
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
141
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
142 print "$$$Genome:",sra_name
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
143 if len(final_O)==0:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
144 print "$$$No Otype, due to no hit"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
145 else:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
146 if final_O[0][1]<8:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
147 print "$$$No Otype, due to the hit reads number is small."
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
148 else:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
149 for x in final_O:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
150 if x[1]>5:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
151 O_list.append(x[0])
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
152 qq=1#
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
153 for x in final_O:#
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
154 if "sdf" in x[0] and x[1]>3:#
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
155 qq=0#
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
156 print "$$$",x[0],"got a hit, reads:",x[1]#
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
157 final_O.remove(x)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
158 if qq!=0:#
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
159 print "$$$No sdf exists"#
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
160
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
161 if "O-9,46_wbaV" in O_list and float(O9_wbaV)/float(num_1) > 0.1:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
162 if "O-9,46_wzy" in O_list and float(O946_wzy)/float(num_1) > 0.1:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
163 O_choice="O-9,46"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
164 print "$$$Most possilble Otype: O-9,46"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
165 elif "O-9,46,27_partial_wzy" in O_list and float(O94627)/float(num_1) > 0.1:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
166 O_choice="O-9,46,27"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
167 print "$$$Most possilble Otype: O-9,46,27"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
168 else:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
169 O_choice="O-9"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
170 if file_mode=="3":
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
171 rev_fq=""
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
172 rev_sai=""
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
173 assembly(sra_name,O_choice,for_fq,rev_fq,for_sai,rev_sai,sam,bam,mapping_mode)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
174 else:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
175 assembly(sra_name,O_choice,for_fq,rev_fq,for_sai,rev_sai,sam,bam,mapping_mode)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
176 elif ("O-3,10_wzx" in O_list) and ("O-9,46_wzy" in O_list) and float(O310_wzx)/float(num_1) > 0.1 and float(O946_wzy)/float(num_1) > 0.1:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
177 if "O-3,10_not_in_1,3,19" in O_list and float(O310_no_1319)/float(num_1) > 0.1:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
178 O_choice="O-3,10"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
179 print "$$$Most possilble Otype: O-3,10"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
180 else:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
181 O_choice="O-1,3,19"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
182 print "$$$Most possilble Otype: O-1,3,19"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
183 else:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
184 try:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
185 O_choice=final_O[0][0].split("_")[0]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
186 if O_choice=="O-1,3,19":
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
187 O_choice=final_O[1][0].split("_")[0]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
188 print "$$$Most possilble Otype: ",O_choice
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
189 except:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
190 print "$$$No suitable Otype, or failure of mapping (please check the quality of raw reads)"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
191
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
192
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
193 def assembly(sra_name,potential_choice,for_fq,rev_fq,for_sai,rev_sai,sam,bam,mapping_mode):
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
194 database="ParaA_rfb.fasta"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
195 os.system("bwa index database/"+database)###01/28/2015
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
196 if rev_fq=="":#2015/09/09
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
197 if mapping_mode=="mem":
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
198 os.system("bwa mem database/"+database+" "+for_fq+" > "+sam) #2014/12/23
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
199 elif mapping_mode=="sam":
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
200 os.system("bwa aln database/"+database+" "+for_fq+" > "+for_sai)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
201 os.system("bwa samse database/"+database+" "+for_sai+" "+for_fq+" > "+sam)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
202 elif mapping_mode=="nanopore":##
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
203 os.system("bwa mem -x ont2d database/"+database+" "+for_fq+" > "+sam)##
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
204 else:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
205 if mapping_mode=="mem":
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
206 os.system("bwa mem database/"+database+" "+for_fq+" "+rev_fq+" > "+sam) #2014/12/23
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
207 elif mapping_mode=="sam":
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
208 os.system("bwa aln database/"+database+" "+for_fq+" > "+for_sai)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
209 os.system("bwa aln database/"+database+" "+rev_fq+" > "+rev_sai)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
210 os.system("bwa sampe database/"+database+" "+for_sai+" "+ rev_sai+" "+for_fq+" "+rev_fq+" > "+sam)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
211 elif mapping_mode=="nanopore":
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
212 os.system("bwa mem -x ont2d database/"+database+" "+for_fq+" "+rev_fq+" > "+sam)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
213 os.system("samtools view -F 4 -Sbh "+sam+" > "+bam)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
214 os.system("samtools view -h -o "+sam+" "+bam)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
215 os.system("cat "+sam+"|awk '{if ($5>0) {print $10}}'>"+sam+"_seq.txt")
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
216 os.system("cat "+sam+"|awk '{if ($5>0) {print $1}}'>"+sam+"_title.txt")
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
217 file1=open(sam+"_title.txt","r")
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
218 file2=open(sam+"_seq.txt","r")
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
219 file1=file1.readlines()
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
220 file2=file2.readlines()
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
221 file=open(sam+".fasta","w")
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
222 for i in range(len(file1)):
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
223 title=">"+file1[i]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
224 seq=file2[i]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
225 if len(seq)>=50 and len(title)>6:#generally,can be adjusted
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
226 file.write(title)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
227 file.write(seq)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
228 file.close()
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
229 database2="tyr_of_O2_O9.fasta"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
230 os.system('makeblastdb -in database/'+database2+' -out '+database2+'_db '+'-dbtype nucl')
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
231 os.system("blastn -query "+sam+".fasta"+" -db "+database2+"_db -out "+sam+"_vs_O29.xml -outfmt 5")
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
232 handle=open(sam+"_vs_O29.xml")
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
233 handle=NCBIXML.parse(handle)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
234 handle=list(handle)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
235 O9_bigger=0
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
236 O2_bigger=0
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
237 for x in handle:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
238 O9_score=0
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
239 O2_score=0
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
240 try:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
241 if 'O-9' in x.alignments[0].hit_def:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
242 O9_score=x.alignments[0].hsps[0].bits
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
243 O2_score=x.alignments[1].hsps[0].bits
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
244 elif 'O-2' in x.alignments[0].hit_def:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
245 O9_score=x.alignments[1].hsps[0].bits
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
246 O2_score=x.alignments[0].hsps[0].bits
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
247 if O9_score>O2_score:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
248 O9_bigger+=1
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
249 if O9_score<O2_score:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
250 O2_bigger+=1
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
251 except:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
252 continue
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
253 print "$$$Genome:",sra_name
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
254 if O9_bigger>O2_bigger:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
255 print "$$$Most possible Otype is O-9"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
256 elif O9_bigger<O2_bigger:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
257 print "$$$Most possible Otype is O-2"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
258 else:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
259 print "$$$No suitable one, because can't distinct it's O-9 or O-2, but ",potential_choice," has a more possibility."
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
260 print "O-9 number is:",O9_bigger
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
261 print "O-2 number is:",O2_bigger
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
262
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
263 os.system("rm "+sam+"_title.txt")###01/28/2015
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
264 os.system("rm "+sam+"_seq.txt")###01/28/2015
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
265 os.system("rm "+sam+".fasta")###01/28/2015
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
266 os.system("rm "+database2+"_db.*")###01/28/2015
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
267 os.system("rm "+sam+"_vs_O29.xml")###01/28/2015
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
268
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
269
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
270 target=sys.argv[1] #should be sra format
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
271 data_base=sys.argv[2]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
272 mapping_mode=sys.argv[3]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
273 if sys.argv[4] not in ("1","2","3"):
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
274 additional_file=sys.argv[4]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
275 file_mode=sys.argv[5]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
276 else:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
277 additional_file=""
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
278 file_mode=sys.argv[4]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
279
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
280 BWA_O_analysis(target,additional_file,data_base,mapping_mode,file_mode)