Mercurial > repos > brigidar > select_product
comparison select_product.py @ 10:82186605ba75 draft default tip
Uploaded
author | brigidar |
---|---|
date | Fri, 09 Oct 2015 17:30:50 -0400 |
parents | 53f1f83e80a1 |
children |
comparison
equal
deleted
inserted
replaced
9:0c769b205d18 | 10:82186605ba75 |
---|---|
36 sys.exit(error) | 36 sys.exit(error) |
37 | 37 |
38 return FH | 38 return FH |
39 | 39 |
40 | 40 |
41 def feature_table(chr_id, source, orient, genes, transcripts, cds, exons, product, keys_1, n): | |
42 """ | |
43 Write the feature information | |
44 """ | |
45 | |
46 lines = [] | |
47 | |
48 for gname, ginfo in genes.items(): | |
49 line = [str(chr_id), | |
50 ginfo[3], | |
51 str(ginfo[0]-n), | |
52 str(ginfo[1]+n), | |
53 ginfo[2], | |
54 str(product)] | |
55 | |
56 # to key on specfic products only | |
57 for k in keys_1.split(","): | |
58 if k in str(product): | |
59 | |
60 lines.append('\t'.join(line)+"\n") | |
61 break | |
62 | |
63 return lines | |
64 | |
65 | |
66 | |
67 | |
68 def gbk_parse(fname,keys_1,n,outfile=""): | 41 def gbk_parse(fname,keys_1,n,outfile=""): |
69 """ | 42 """ |
70 Extract genome annotation recods from genbank format | 43 Extract genome annotation recods from genbank format |
71 | 44 |
72 @args fname: gbk file name | 45 @args fname: gbk file name |
75 fhand = open_file(gbkfname) | 48 fhand = open_file(gbkfname) |
76 lines = [] | 49 lines = [] |
77 | 50 |
78 for record in SeqIO.parse(fhand, "genbank"): | 51 for record in SeqIO.parse(fhand, "genbank"): |
79 gene_tags = dict() | 52 gene_tags = dict() |
80 tx_tags = collections.defaultdict(list) | 53 |
81 exon = collections.defaultdict(list) | |
82 cds = collections.defaultdict(list) | |
83 mol_type, chr_id = None, None | 54 mol_type, chr_id = None, None |
84 | 55 |
85 for rec in record.features: | 56 for rec in record.features: |
86 | 57 |
87 | 58 |
88 product = "NA" | 59 product = "NA" |
89 | 60 |
90 if rec.qualifiers.has_key('product'): | 61 if rec.qualifiers.has_key('product'): |
91 | 62 |
92 product = rec.qualifiers['product'][0] | 63 product = rec.qualifiers['product'][0] |
93 | 64 |
94 | 65 |
95 if rec.type == 'source': | 66 if rec.type == 'source': |
96 try: | 67 try: |
97 mol_type = rec.qualifiers['mol_type'][0] | 68 mol_type = rec.qualifiers['mol_type'][0] |
98 except: | 69 except: |
101 try: | 72 try: |
102 chr_id = rec.qualifiers['chromosome'][0] | 73 chr_id = rec.qualifiers['chromosome'][0] |
103 except: | 74 except: |
104 chr_id = record.name | 75 chr_id = record.name |
105 continue | 76 continue |
77 | |
106 | 78 |
107 strand='-' | 79 start = int(rec.location.start)+1-n |
108 strand='+' if rec.strand>0 else strand | 80 stop = int(rec.location.end)+n |
109 | |
110 fid = None | |
111 try: | |
112 fid = rec.qualifiers['gene'][0] | |
113 except: | |
114 pass | |
115 | 81 |
116 transcript_id = None | 82 if start < 0: |
117 try: | 83 start = 1 |
118 transcript_id = rec.qualifiers['transcript_id'][0] | 84 |
119 except: | 85 for k in keys_1.split(","): |
120 pass | 86 |
87 if str(k) in str(product): | |
88 lines.append('\t'.join([str(chr_id),str(start), str(stop), product])+"\n") | |
121 | 89 |
122 if re.search(r'gene', rec.type): | 90 |
123 gene_tags[fid] = (rec.location._start.position+1, | |
124 rec.location._end.position, | |
125 strand, | |
126 rec.type | |
127 ) | |
128 elif rec.type == 'exon': | |
129 exon[fid].append((rec.location._start.position+1, | |
130 rec.location._end.position)) | |
131 elif rec.type=='CDS': | |
132 cds[fid].append((rec.location._start.position+1, | |
133 rec.location._end.position)) | |
134 else: | |
135 # get all transcripts | |
136 if transcript_id: | |
137 tx_tags[fid].append((rec.location._start.position+1, | |
138 rec.location._end.position, | |
139 transcript_id, | |
140 rec.type)) | |
141 # record extracted, generate feature table | |
142 lines += feature_table(chr_id, mol_type, strand, gene_tags, tx_tags, cds, exon, product,keys_1,n) | |
143 | |
144 fhand.close() | 91 fhand.close() |
145 | 92 |
146 return lines | 93 return lines |
147 | 94 |
148 if __name__=='__main__': | 95 if __name__=='__main__': |
149 | 96 |
150 try: | 97 try: |
151 parser = argparse.ArgumentParser() | 98 parser = argparse.ArgumentParser() |
152 parser.add_argument('-k', '--keys_1', type=str, help="list of products to key on in a comma delimited list") | 99 parser.add_argument('-k', '--keys_1', type=str, help="list of products to key on in a comma delimited list") |
153 parser.add_argument('-g', '--gbk_file', help="genbank file of reference") | 100 parser.add_argument('-g', '--gbk_file', help="genbank file of reference") |