annotate SeqSero/libs/special_gene_test_assemblies.py @ 4:ab0802d77891 draft default tip

Uploaded
author estrain
date Thu, 12 Sep 2019 06:46:00 -0400
parents c577b57b7c74
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
1 #just an possible use, we can use it to replace H**.py, treat fliC and fljB as the target genes?
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
2
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
3 from __future__ import division
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
4 import sys
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
5 import os
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
6 from Bio.Blast import NCBIXML
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
7 from Bio import SeqIO
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
8
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
9 def special_gene(target_fie,database,gene_list):
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
10 database=database.split("/")[-1]##########1/27/2015
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
11 os.system('makeblastdb -in database/'+database+' -out '+database+'_db -dbtype nucl')##########1/28/2015
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
12 os.system('blastn -query '+target_file+' -db '+database+'_db -out '+database+'_vs_'+target_file+'.xml '+'-outfmt 5')##########1/28/2015
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
13 xml_file=database+'_vs_'+target_file+'.xml'
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
14 result_handle=open(xml_file)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
15 blast_record=NCBIXML.parse(result_handle)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
16 records=list(blast_record)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
17 E_thresh=1e-10
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
18 for x in gene_list:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
19 handle=SeqIO.parse("database/"+database,"fasta")##########1/28/2015
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
20 length_list=[]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
21 for y in handle:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
22 if x in y.description:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
23 length_x=len(y.seq)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
24 length_list.append(length_x)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
25 aver_len=float(sum(length_list))/len(length_list)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
26 hspbit=[]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
27 alignmentlist=[]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
28 for record in records:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
29 for alignment in record.alignments:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
30 if x in alignment.hit_def: #multi gene database, so...
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
31 print x,"got a hit, evaluating the hit quality..."
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
32 score=0
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
33 for hsp in alignment.hsps:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
34 if hsp.expect<E_thresh:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
35 score+=hsp.bits
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
36 alignment=alignment.hit_def+':'+str(score)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
37 hspbit.append(score)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
38 alignmentlist.append(alignment)
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
39 scorelist=dict(zip(alignmentlist,hspbit))
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
40 score=0
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
41 for Htype in scorelist:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
42 if scorelist[Htype]>score:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
43 First_Choice=Htype
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
44 score=scorelist[Htype]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
45 if float(score)>=0.1*aver_len:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
46 print "$$$",First_Choice,"got a hit, score:",score
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
47 else:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
48 print "$$$No ",x,"exists"
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
49 os.system("rm "+database+"_db.*")##########1/28/2015
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
50 os.system("rm "+xml_file)##########1/28/2015
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
51
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
52 database=sys.argv[1]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
53 target_file=sys.argv[2]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
54 gene_list=[]
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
55 a=1
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
56 i=3
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
57 while a==1:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
58 try:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
59 gene_list.append(sys.argv[i])
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
60 i+=1
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
61 except:
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
62 a=0
c577b57b7c74 Uploaded
estrain
parents:
diff changeset
63 special_gene(target_file,database,gene_list)