Mercurial > repos > iuc > tbl2gff3
view tbl2gff3.py @ 0:965674d88d34 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tbl2gff3 commit 725ff8b6a4f01bf532052ce46400f529551a317d"
author | iuc |
---|---|
date | Tue, 07 Jul 2020 04:20:40 -0400 |
parents | |
children | 4a7f4b0cc0a3 |
line wrap: on
line source
#!/usr/bin/env python import argparse import csv import sys from BCBio import GFF from Bio.Seq import Seq from Bio.SeqFeature import FeatureLocation, SeqFeature from Bio.SeqRecord import SeqRecord def c(row, v, default=None): if v is None: return default try: _ = int(v) return row[int(v) - 1] except ValueError: return v def tbl2gff3( table, rid, begin, end, source=None, type=None, score=None, frame=None, a=None, strand_column=None, strand_value=None, ): records = {} for row in csv.reader(table, delimiter="\t"): # print(', '.join(row)) # if we haven't seen this record before, populate it. recid = c(row, rid) if recid not in records: records[recid] = SeqRecord(Seq("ACTG"), id=recid) r = records[recid] q = {} if c(row, score) is not None: q["score"] = float(c(row, score)) q["source"] = c(row, source, "tbl2gff3") _str = None if strand_column is not None: _str = int(c(row, strand_column)) elif strand_value is not None: _str = int(strand_value) for x in a: k, v = x.split(":", 1) _v = c(row, v) if k in q: q[k].append(_v) else: q[k] = [_v] f = SeqFeature( FeatureLocation(int(c(row, begin)), int(c(row, end))), type=c(row, type), strand=_str, qualifiers=q, ) r.features.append(f) return records if __name__ == "__main__": parser = argparse.ArgumentParser(description="Convert tables to gff3", epilog="") parser.add_argument("table", type=argparse.FileType("r"), help="Tabular Input") parser.add_argument("rid", help="id column") parser.add_argument("begin", help="begin column") parser.add_argument("end", help="end column") parser.add_argument("--type", help="feature type column") parser.add_argument("--score", help="score column") parser.add_argument("--source", help="source column") parser.add_argument("--strand_column", help="strand column") parser.add_argument("--strand_value", help="strand value") # parser.add_argument('--frame', help='frame column') parser.add_argument("-a", action="append", help="attribute column (-a k:v)") args = parser.parse_args() for rid, rec in tbl2gff3(**vars(args)).items(): GFF.write([rec], sys.stdout)