Mercurial > repos > iuc > tbl2gff3
comparison tbl2gff3.py @ 0:965674d88d34 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tbl2gff3 commit 725ff8b6a4f01bf532052ce46400f529551a317d"
author | iuc |
---|---|
date | Tue, 07 Jul 2020 04:20:40 -0400 |
parents | |
children | 4a7f4b0cc0a3 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:965674d88d34 |
---|---|
1 #!/usr/bin/env python | |
2 import argparse | |
3 import csv | |
4 import sys | |
5 | |
6 from BCBio import GFF | |
7 from Bio.Seq import Seq | |
8 from Bio.SeqFeature import FeatureLocation, SeqFeature | |
9 from Bio.SeqRecord import SeqRecord | |
10 | |
11 | |
12 def c(row, v, default=None): | |
13 if v is None: | |
14 return default | |
15 | |
16 try: | |
17 _ = int(v) | |
18 return row[int(v) - 1] | |
19 except ValueError: | |
20 return v | |
21 | |
22 | |
23 def tbl2gff3( | |
24 table, | |
25 rid, | |
26 begin, | |
27 end, | |
28 source=None, | |
29 type=None, | |
30 score=None, | |
31 frame=None, | |
32 a=None, | |
33 strand_column=None, | |
34 strand_value=None, | |
35 ): | |
36 | |
37 records = {} | |
38 | |
39 for row in csv.reader(table, delimiter="\t"): | |
40 # print(', '.join(row)) | |
41 | |
42 # if we haven't seen this record before, populate it. | |
43 recid = c(row, rid) | |
44 if recid not in records: | |
45 records[recid] = SeqRecord(Seq("ACTG"), id=recid) | |
46 | |
47 r = records[recid] | |
48 q = {} | |
49 if c(row, score) is not None: | |
50 q["score"] = float(c(row, score)) | |
51 | |
52 q["source"] = c(row, source, "tbl2gff3") | |
53 | |
54 _str = None | |
55 if strand_column is not None: | |
56 _str = int(c(row, strand_column)) | |
57 elif strand_value is not None: | |
58 _str = int(strand_value) | |
59 | |
60 for x in a: | |
61 k, v = x.split(":", 1) | |
62 _v = c(row, v) | |
63 if k in q: | |
64 q[k].append(_v) | |
65 else: | |
66 q[k] = [_v] | |
67 | |
68 f = SeqFeature( | |
69 FeatureLocation(int(c(row, begin)), int(c(row, end))), | |
70 type=c(row, type), | |
71 strand=_str, | |
72 qualifiers=q, | |
73 ) | |
74 r.features.append(f) | |
75 | |
76 return records | |
77 | |
78 | |
79 if __name__ == "__main__": | |
80 parser = argparse.ArgumentParser(description="Convert tables to gff3", epilog="") | |
81 parser.add_argument("table", type=argparse.FileType("r"), help="Tabular Input") | |
82 parser.add_argument("rid", help="id column") | |
83 parser.add_argument("begin", help="begin column") | |
84 parser.add_argument("end", help="end column") | |
85 parser.add_argument("--type", help="feature type column") | |
86 parser.add_argument("--score", help="score column") | |
87 parser.add_argument("--source", help="source column") | |
88 parser.add_argument("--strand_column", help="strand column") | |
89 parser.add_argument("--strand_value", help="strand value") | |
90 # parser.add_argument('--frame', help='frame column') | |
91 parser.add_argument("-a", action="append", help="attribute column (-a k:v)") | |
92 args = parser.parse_args() | |
93 | |
94 for rid, rec in tbl2gff3(**vars(args)).items(): | |
95 GFF.write([rec], sys.stdout) |