select_product: select_product.py comparison

comparison select_product.py @ 10:82186605ba75 draft default tip

Uploaded

author	brigidar
date	Fri, 09 Oct 2015 17:30:50 -0400
parents	53f1f83e80a1
children

comparison

equal deleted inserted replaced

-:0c769b205d18
+:82186605ba75
 sys.exit(error)
 return FH
-def feature_table(chr_id, source, orient, genes, transcripts, cds, exons, product, keys_1, n):
-"""
-Write the feature information
-"""
-lines = []
-for gname, ginfo in genes.items():
-line = [str(chr_id),
-ginfo[3],
-str(ginfo[0]-n),
-str(ginfo[1]+n),
-ginfo[2],
-str(product)]
-# to key on specfic products only
-for k in keys_1.split(","):
-if k in str(product):
-lines.append('\t'.join(line)+"\n")
-break
-return lines
 def gbk_parse(fname,keys_1,n,outfile=""):
 """
 Extract genome annotation recods from genbank format
 @args fname: gbk file name
 fhand = open_file(gbkfname)
 lines = []
 for record in SeqIO.parse(fhand, "genbank"):
 gene_tags = dict()
-tx_tags = collections.defaultdict(list)
-exon = collections.defaultdict(list)
-cds = collections.defaultdict(list)
 mol_type, chr_id = None, None
 for rec in record.features:
 product = "NA"
 if rec.qualifiers.has_key('product'):
 product = rec.qualifiers['product'][0]
 if rec.type == 'source':
 try:
 mol_type = rec.qualifiers['mol_type'][0]
 except:
 try:
 chr_id = rec.qualifiers['chromosome'][0]
 except:
 chr_id = record.name
 continue
-strand='-'
+start = int(rec.location.start)+1-n
-strand='+' if rec.strand>0 else strand
+stop = int(rec.location.end)+n
-fid = None
-try:
-fid = rec.qualifiers['gene'][0]
-except:
-pass
-transcript_id = None
+if start < 0:
-try:
+start = 1
-transcript_id = rec.qualifiers['transcript_id'][0]
-except:
+for k in keys_1.split(","):
-pass
+if str(k) in str(product):
+lines.append('\t'.join([str(chr_id),str(start), str(stop), product])+"\n")
-if re.search(r'gene', rec.type):
-gene_tags[fid] = (rec.location._start.position+1,
-rec.location._end.position,
-strand,
-rec.type
-)
-elif rec.type == 'exon':
-exon[fid].append((rec.location._start.position+1,
-rec.location._end.position))
-elif rec.type=='CDS':
-cds[fid].append((rec.location._start.position+1,
-rec.location._end.position))
-else:
-# get all transcripts
-if transcript_id:
-tx_tags[fid].append((rec.location._start.position+1,
-rec.location._end.position,
-transcript_id,
-rec.type))
-# record extracted, generate feature table
-lines += feature_table(chr_id, mol_type, strand, gene_tags, tx_tags, cds, exon, product,keys_1,n)
 fhand.close()
 return lines
 if __name__=='__main__':
 try:
 parser = argparse.ArgumentParser()
 parser.add_argument('-k', '--keys_1', type=str, help="list of products to key on in a comma delimited list")
 parser.add_argument('-g', '--gbk_file', help="genbank file of reference")

Mercurial > repos > brigidar > select_product

comparison select_product.py @ 10:82186605ba75 draft default tip