Mercurial > repos > estrain > seqsero_v1
diff SeqSero/libs/special_gene_test_assemblies.py @ 0:c577b57b7c74 draft
Uploaded
author | estrain |
---|---|
date | Wed, 06 Dec 2017 15:59:29 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/SeqSero/libs/special_gene_test_assemblies.py Wed Dec 06 15:59:29 2017 -0500 @@ -0,0 +1,63 @@ +#just an possible use, we can use it to replace H**.py, treat fliC and fljB as the target genes? + +from __future__ import division +import sys +import os +from Bio.Blast import NCBIXML +from Bio import SeqIO + +def special_gene(target_fie,database,gene_list): + database=database.split("/")[-1]##########1/27/2015 + os.system('makeblastdb -in database/'+database+' -out '+database+'_db -dbtype nucl')##########1/28/2015 + os.system('blastn -query '+target_file+' -db '+database+'_db -out '+database+'_vs_'+target_file+'.xml '+'-outfmt 5')##########1/28/2015 + xml_file=database+'_vs_'+target_file+'.xml' + result_handle=open(xml_file) + blast_record=NCBIXML.parse(result_handle) + records=list(blast_record) + E_thresh=1e-10 + for x in gene_list: + handle=SeqIO.parse("database/"+database,"fasta")##########1/28/2015 + length_list=[] + for y in handle: + if x in y.description: + length_x=len(y.seq) + length_list.append(length_x) + aver_len=float(sum(length_list))/len(length_list) + hspbit=[] + alignmentlist=[] + for record in records: + for alignment in record.alignments: + if x in alignment.hit_def: #multi gene database, so... + print x,"got a hit, evaluating the hit quality..." + score=0 + for hsp in alignment.hsps: + if hsp.expect<E_thresh: + score+=hsp.bits + alignment=alignment.hit_def+':'+str(score) + hspbit.append(score) + alignmentlist.append(alignment) + scorelist=dict(zip(alignmentlist,hspbit)) + score=0 + for Htype in scorelist: + if scorelist[Htype]>score: + First_Choice=Htype + score=scorelist[Htype] + if float(score)>=0.1*aver_len: + print "$$$",First_Choice,"got a hit, score:",score + else: + print "$$$No ",x,"exists" + os.system("rm "+database+"_db.*")##########1/28/2015 + os.system("rm "+xml_file)##########1/28/2015 + +database=sys.argv[1] +target_file=sys.argv[2] +gene_list=[] +a=1 +i=3 +while a==1: + try: + gene_list.append(sys.argv[i]) + i+=1 + except: + a=0 +special_gene(target_file,database,gene_list) \ No newline at end of file