Mercurial > repos > iuc > tbl2gff3
diff tbl2gff3.py @ 0:965674d88d34 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tbl2gff3 commit 725ff8b6a4f01bf532052ce46400f529551a317d"
author | iuc |
---|---|
date | Tue, 07 Jul 2020 04:20:40 -0400 |
parents | |
children | 4a7f4b0cc0a3 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tbl2gff3.py Tue Jul 07 04:20:40 2020 -0400 @@ -0,0 +1,95 @@ +#!/usr/bin/env python +import argparse +import csv +import sys + +from BCBio import GFF +from Bio.Seq import Seq +from Bio.SeqFeature import FeatureLocation, SeqFeature +from Bio.SeqRecord import SeqRecord + + +def c(row, v, default=None): + if v is None: + return default + + try: + _ = int(v) + return row[int(v) - 1] + except ValueError: + return v + + +def tbl2gff3( + table, + rid, + begin, + end, + source=None, + type=None, + score=None, + frame=None, + a=None, + strand_column=None, + strand_value=None, +): + + records = {} + + for row in csv.reader(table, delimiter="\t"): + # print(', '.join(row)) + + # if we haven't seen this record before, populate it. + recid = c(row, rid) + if recid not in records: + records[recid] = SeqRecord(Seq("ACTG"), id=recid) + + r = records[recid] + q = {} + if c(row, score) is not None: + q["score"] = float(c(row, score)) + + q["source"] = c(row, source, "tbl2gff3") + + _str = None + if strand_column is not None: + _str = int(c(row, strand_column)) + elif strand_value is not None: + _str = int(strand_value) + + for x in a: + k, v = x.split(":", 1) + _v = c(row, v) + if k in q: + q[k].append(_v) + else: + q[k] = [_v] + + f = SeqFeature( + FeatureLocation(int(c(row, begin)), int(c(row, end))), + type=c(row, type), + strand=_str, + qualifiers=q, + ) + r.features.append(f) + + return records + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Convert tables to gff3", epilog="") + parser.add_argument("table", type=argparse.FileType("r"), help="Tabular Input") + parser.add_argument("rid", help="id column") + parser.add_argument("begin", help="begin column") + parser.add_argument("end", help="end column") + parser.add_argument("--type", help="feature type column") + parser.add_argument("--score", help="score column") + parser.add_argument("--source", help="source column") + parser.add_argument("--strand_column", help="strand column") + parser.add_argument("--strand_value", help="strand value") + # parser.add_argument('--frame', help='frame column') + parser.add_argument("-a", action="append", help="attribute column (-a k:v)") + args = parser.parse_args() + + for rid, rec in tbl2gff3(**vars(args)).items(): + GFF.write([rec], sys.stdout)