Mercurial > repos > estrain > seqsero_v1
view SeqSero/libs/special_gene_test_assemblies.py @ 0:c577b57b7c74 draft
Uploaded
author | estrain |
---|---|
date | Wed, 06 Dec 2017 15:59:29 -0500 |
parents | |
children |
line wrap: on
line source
#just an possible use, we can use it to replace H**.py, treat fliC and fljB as the target genes? from __future__ import division import sys import os from Bio.Blast import NCBIXML from Bio import SeqIO def special_gene(target_fie,database,gene_list): database=database.split("/")[-1]##########1/27/2015 os.system('makeblastdb -in database/'+database+' -out '+database+'_db -dbtype nucl')##########1/28/2015 os.system('blastn -query '+target_file+' -db '+database+'_db -out '+database+'_vs_'+target_file+'.xml '+'-outfmt 5')##########1/28/2015 xml_file=database+'_vs_'+target_file+'.xml' result_handle=open(xml_file) blast_record=NCBIXML.parse(result_handle) records=list(blast_record) E_thresh=1e-10 for x in gene_list: handle=SeqIO.parse("database/"+database,"fasta")##########1/28/2015 length_list=[] for y in handle: if x in y.description: length_x=len(y.seq) length_list.append(length_x) aver_len=float(sum(length_list))/len(length_list) hspbit=[] alignmentlist=[] for record in records: for alignment in record.alignments: if x in alignment.hit_def: #multi gene database, so... print x,"got a hit, evaluating the hit quality..." score=0 for hsp in alignment.hsps: if hsp.expect<E_thresh: score+=hsp.bits alignment=alignment.hit_def+':'+str(score) hspbit.append(score) alignmentlist.append(alignment) scorelist=dict(zip(alignmentlist,hspbit)) score=0 for Htype in scorelist: if scorelist[Htype]>score: First_Choice=Htype score=scorelist[Htype] if float(score)>=0.1*aver_len: print "$$$",First_Choice,"got a hit, score:",score else: print "$$$No ",x,"exists" os.system("rm "+database+"_db.*")##########1/28/2015 os.system("rm "+xml_file)##########1/28/2015 database=sys.argv[1] target_file=sys.argv[2] gene_list=[] a=1 i=3 while a==1: try: gene_list.append(sys.argv[i]) i+=1 except: a=0 special_gene(target_file,database,gene_list)