Mercurial > repos > estrain > seqsero_v1
comparison SeqSero/libs/deletion_compare.py @ 0:c577b57b7c74 draft
Uploaded
author | estrain |
---|---|
date | Wed, 06 Dec 2017 15:59:29 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:c577b57b7c74 |
---|---|
1 | |
2 import os | |
3 from Bio import SeqIO | |
4 import sys | |
5 from Initial_functions import Uniq | |
6 from Bio.Blast import NCBIXML | |
7 | |
8 | |
9 target=sys.argv[1] #should be sra format | |
10 test_gene=sys.argv[2] | |
11 mapping_mode=sys.argv[3] | |
12 if sys.argv[4] not in ("1","2","3"): | |
13 additional_file=sys.argv[4] | |
14 file_mode=sys.argv[5] | |
15 else: | |
16 additional_file="" | |
17 file_mode=sys.argv[4] | |
18 | |
19 | |
20 | |
21 | |
22 def Copenhagen(sra_name,additional_file,mapping_mode,file_mode): | |
23 if file_mode=="1":#interleaved | |
24 if sra_name[-3:]=="sra": | |
25 del_fastq=1 | |
26 for_fq=sra_name.replace(".sra","_1.fastq") | |
27 rev_fq=sra_name.replace(".sra","_2.fastq") | |
28 for_sai=sra_name.replace(".sra","_1.sai") | |
29 rev_sai=sra_name.replace(".sra","_2.sai") | |
30 sam=sra_name.replace(".sra",".sam") | |
31 bam=sra_name.replace(".sra",".bam") | |
32 else: | |
33 del_fastq=0 | |
34 core_id=sra_name.split(".fastq")[0] | |
35 for_fq=core_id+"-read1.fastq" | |
36 rev_fq=core_id+"-read2.fastq" | |
37 for_sai=core_id+"_1.sai" | |
38 rev_sai=core_id+"_2.sai" | |
39 sam=core_id+".sam" | |
40 bam=core_id+".bam" | |
41 elif file_mode=="2":#seperated | |
42 forword_seq=sra_name | |
43 reverse_seq=additional_file | |
44 for_core_id=forword_seq.split(".fastq")[0] | |
45 re_core_id=reverse_seq.split(".fastq")[0] | |
46 for_fq=for_core_id+".fastq" | |
47 rev_fq=re_core_id+".fastq" | |
48 for_sai=for_core_id+".sai" | |
49 rev_sai=re_core_id+".sai" | |
50 sam=for_core_id+".sam" | |
51 bam=sam.replace(".sam",".bam") | |
52 elif file_mode=="3":#single-end | |
53 if sra_name[-3:]=="sra": | |
54 del_fastq=1 | |
55 for_fq=sra_name.replace(".sra","_1.fastq") | |
56 rev_fq=sra_name.replace(".sra","_2.fastq") | |
57 for_sai=sra_name.replace(".sra","_1.sai") | |
58 rev_sai=sra_name.replace(".sra","_2.sai") | |
59 sam=sra_name.replace(".sra",".sam") | |
60 bam=sra_name.replace(".sra",".bam") | |
61 else: | |
62 del_fastq=0 | |
63 core_id=sra_name.split(".fastq")[0] | |
64 for_fq=core_id+".fastq" | |
65 rev_fq=core_id+".fastq" | |
66 for_sai=core_id+"_1.sai" | |
67 rev_sai=core_id+"_2.sai" | |
68 sam=core_id+".sam" | |
69 bam=core_id+".bam" | |
70 | |
71 database="complete_oafA.fasta" | |
72 os.system("bwa index database/"+database)###01/28/2015 | |
73 if mapping_mode=="mem": | |
74 os.system("bwa mem database/"+database+" "+for_fq+" "+rev_fq+" > "+sam) #2014/12/23 | |
75 elif mapping_mode=="sam": | |
76 os.system("bwa aln database/"+database+" "+for_fq+" > "+for_sai) | |
77 os.system("bwa aln database/"+database+" "+rev_fq+" > "+rev_sai) | |
78 os.system("bwa sampe database/"+database+" "+for_sai+" "+ rev_sai+" "+for_fq+" "+rev_fq+" > "+sam) | |
79 os.system("samtools view -F 4 -Sbh "+sam+" > "+bam) | |
80 os.system("samtools view -h -o "+sam+" "+bam) | |
81 os.system("cat "+sam+"|awk '{if ($5>0) {print $10}}'>"+sam+"_seq.txt") | |
82 os.system("cat "+sam+"|awk '{if ($5>0) {print $1}}'>"+sam+"_title.txt") | |
83 file1=open(sam+"_title.txt","r") | |
84 file2=open(sam+"_seq.txt","r") | |
85 file1=file1.readlines() | |
86 file2=file2.readlines() | |
87 file=open(sam+".fasta","w") | |
88 for i in range(len(file1)): | |
89 title=">"+file1[i] | |
90 seq=file2[i] | |
91 if len(seq)>40 and (len(title)>5 or ("@" not in title)): | |
92 file.write(title) | |
93 file.write(seq) | |
94 file.close() | |
95 database2="oafA_of_O4_O5.fasta" | |
96 os.system('makeblastdb -in database/'+database2+' -out '+database2+'_db '+'-dbtype nucl') | |
97 os.system("blastn -query "+sam+".fasta"+" -db "+database2+"_db -out "+sam+"_vs_O45.xml -outfmt 5") | |
98 handle=open(sam+"_vs_O45.xml") | |
99 handle=NCBIXML.parse(handle) | |
100 handle=list(handle) | |
101 O9_bigger=0 | |
102 O2_bigger=0 | |
103 for x in handle: | |
104 O9_score=0 | |
105 O2_score=0 | |
106 try: | |
107 if 'O-4_full' in x.alignments[0].hit_def: | |
108 O9_score=x.alignments[0].hsps[0].bits | |
109 O2_score=x.alignments[1].hsps[0].bits | |
110 elif 'O-4_5-' in x.alignments[0].hit_def: | |
111 O9_score=x.alignments[1].hsps[0].bits | |
112 O2_score=x.alignments[0].hsps[0].bits | |
113 if O9_score>O2_score: | |
114 O9_bigger+=1 | |
115 if O9_score<O2_score: | |
116 O2_bigger+=1 | |
117 except: | |
118 continue | |
119 print "$$$Genome:",sra_name | |
120 if O9_bigger>O2_bigger: | |
121 print "$$$Typhimurium" | |
122 elif O9_bigger<O2_bigger: | |
123 print "$$$Typhimurium_O5-" | |
124 else: | |
125 print "$$$Typhimurium, even no 7 bases difference" | |
126 print "O-4 number is:",O9_bigger | |
127 print "O-4_5- number is:",O2_bigger | |
128 os.system("rm "+sam+"_title.txt")###01/28/2015 | |
129 os.system("rm "+sam+"_seq.txt")###01/28/2015 | |
130 os.system("rm "+sam+".fasta")###01/28/2015 | |
131 os.system("rm "+database2+"_db.*")###01/28/2015 | |
132 os.system("rm "+sam+"_vs_O45.xml")###01/28/2015 | |
133 | |
134 if test_gene=="oafA": | |
135 Copenhagen(target,additional_file,mapping_mode,file_mode) |