annotate SeqSero/libs/deletion_compare.py @ 0:c577b57b7c74 draft

Uploaded
author estrain
date Wed, 06 Dec 2017 15:59:29 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
1
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
2 import os
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
3 from Bio import SeqIO
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
4 import sys
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
5 from Initial_functions import Uniq
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
6 from Bio.Blast import NCBIXML
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
7
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
8
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
9 target=sys.argv[1] #should be sra format
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
10 test_gene=sys.argv[2]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
11 mapping_mode=sys.argv[3]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
12 if sys.argv[4] not in ("1","2","3"):
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
13 additional_file=sys.argv[4]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
14 file_mode=sys.argv[5]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
15 else:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
16 additional_file=""
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
17 file_mode=sys.argv[4]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
18
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
19
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
20
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
21
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
22 def Copenhagen(sra_name,additional_file,mapping_mode,file_mode):
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
23 if file_mode=="1":#interleaved
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
24 if sra_name[-3:]=="sra":
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
25 del_fastq=1
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
26 for_fq=sra_name.replace(".sra","_1.fastq")
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
27 rev_fq=sra_name.replace(".sra","_2.fastq")
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
28 for_sai=sra_name.replace(".sra","_1.sai")
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
29 rev_sai=sra_name.replace(".sra","_2.sai")
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
30 sam=sra_name.replace(".sra",".sam")
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
31 bam=sra_name.replace(".sra",".bam")
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
32 else:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
33 del_fastq=0
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
34 core_id=sra_name.split(".fastq")[0]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
35 for_fq=core_id+"-read1.fastq"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
36 rev_fq=core_id+"-read2.fastq"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
37 for_sai=core_id+"_1.sai"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
38 rev_sai=core_id+"_2.sai"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
39 sam=core_id+".sam"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
40 bam=core_id+".bam"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
41 elif file_mode=="2":#seperated
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
42 forword_seq=sra_name
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
43 reverse_seq=additional_file
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
44 for_core_id=forword_seq.split(".fastq")[0]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
45 re_core_id=reverse_seq.split(".fastq")[0]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
46 for_fq=for_core_id+".fastq"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
47 rev_fq=re_core_id+".fastq"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
48 for_sai=for_core_id+".sai"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
49 rev_sai=re_core_id+".sai"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
50 sam=for_core_id+".sam"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
51 bam=sam.replace(".sam",".bam")
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
52 elif file_mode=="3":#single-end
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
53 if sra_name[-3:]=="sra":
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
54 del_fastq=1
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
55 for_fq=sra_name.replace(".sra","_1.fastq")
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
56 rev_fq=sra_name.replace(".sra","_2.fastq")
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
57 for_sai=sra_name.replace(".sra","_1.sai")
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
58 rev_sai=sra_name.replace(".sra","_2.sai")
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
59 sam=sra_name.replace(".sra",".sam")
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
60 bam=sra_name.replace(".sra",".bam")
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
61 else:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
62 del_fastq=0
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
63 core_id=sra_name.split(".fastq")[0]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
64 for_fq=core_id+".fastq"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
65 rev_fq=core_id+".fastq"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
66 for_sai=core_id+"_1.sai"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
67 rev_sai=core_id+"_2.sai"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
68 sam=core_id+".sam"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
69 bam=core_id+".bam"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
70
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
71 database="complete_oafA.fasta"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
72 os.system("bwa index database/"+database)###01/28/2015
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
73 if mapping_mode=="mem":
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
74 os.system("bwa mem database/"+database+" "+for_fq+" "+rev_fq+" > "+sam) #2014/12/23
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
75 elif mapping_mode=="sam":
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
76 os.system("bwa aln database/"+database+" "+for_fq+" > "+for_sai)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
77 os.system("bwa aln database/"+database+" "+rev_fq+" > "+rev_sai)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
78 os.system("bwa sampe database/"+database+" "+for_sai+" "+ rev_sai+" "+for_fq+" "+rev_fq+" > "+sam)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
79 os.system("samtools view -F 4 -Sbh "+sam+" > "+bam)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
80 os.system("samtools view -h -o "+sam+" "+bam)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
81 os.system("cat "+sam+"|awk '{if ($5>0) {print $10}}'>"+sam+"_seq.txt")
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
82 os.system("cat "+sam+"|awk '{if ($5>0) {print $1}}'>"+sam+"_title.txt")
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
83 file1=open(sam+"_title.txt","r")
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
84 file2=open(sam+"_seq.txt","r")
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
85 file1=file1.readlines()
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
86 file2=file2.readlines()
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
87 file=open(sam+".fasta","w")
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
88 for i in range(len(file1)):
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
89 title=">"+file1[i]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
90 seq=file2[i]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
91 if len(seq)>40 and (len(title)>5 or ("@" not in title)):
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
92 file.write(title)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
93 file.write(seq)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
94 file.close()
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
95 database2="oafA_of_O4_O5.fasta"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
96 os.system('makeblastdb -in database/'+database2+' -out '+database2+'_db '+'-dbtype nucl')
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
97 os.system("blastn -query "+sam+".fasta"+" -db "+database2+"_db -out "+sam+"_vs_O45.xml -outfmt 5")
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
98 handle=open(sam+"_vs_O45.xml")
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
99 handle=NCBIXML.parse(handle)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
100 handle=list(handle)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
101 O9_bigger=0
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
102 O2_bigger=0
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
103 for x in handle:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
104 O9_score=0
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
105 O2_score=0
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
106 try:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
107 if 'O-4_full' in x.alignments[0].hit_def:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
108 O9_score=x.alignments[0].hsps[0].bits
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
109 O2_score=x.alignments[1].hsps[0].bits
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
110 elif 'O-4_5-' in x.alignments[0].hit_def:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
111 O9_score=x.alignments[1].hsps[0].bits
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
112 O2_score=x.alignments[0].hsps[0].bits
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
113 if O9_score>O2_score:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
114 O9_bigger+=1
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
115 if O9_score<O2_score:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
116 O2_bigger+=1
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
117 except:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
118 continue
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
119 print "$$$Genome:",sra_name
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
120 if O9_bigger>O2_bigger:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
121 print "$$$Typhimurium"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
122 elif O9_bigger<O2_bigger:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
123 print "$$$Typhimurium_O5-"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
124 else:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
125 print "$$$Typhimurium, even no 7 bases difference"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
126 print "O-4 number is:",O9_bigger
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
127 print "O-4_5- number is:",O2_bigger
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
128 os.system("rm "+sam+"_title.txt")###01/28/2015
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
129 os.system("rm "+sam+"_seq.txt")###01/28/2015
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
130 os.system("rm "+sam+".fasta")###01/28/2015
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
131 os.system("rm "+database2+"_db.*")###01/28/2015
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
132 os.system("rm "+sam+"_vs_O45.xml")###01/28/2015
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
133
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
134 if test_gene=="oafA":
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
135 Copenhagen(target,additional_file,mapping_mode,file_mode)