view tbl2gff3.py @ 0:965674d88d34 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tbl2gff3 commit 725ff8b6a4f01bf532052ce46400f529551a317d"
author iuc
date Tue, 07 Jul 2020 04:20:40 -0400
parents
children 4a7f4b0cc0a3
line wrap: on
line source

#!/usr/bin/env python
import argparse
import csv
import sys

from BCBio import GFF
from Bio.Seq import Seq
from Bio.SeqFeature import FeatureLocation, SeqFeature
from Bio.SeqRecord import SeqRecord


def c(row, v, default=None):
    if v is None:
        return default

    try:
        _ = int(v)
        return row[int(v) - 1]
    except ValueError:
        return v


def tbl2gff3(
    table,
    rid,
    begin,
    end,
    source=None,
    type=None,
    score=None,
    frame=None,
    a=None,
    strand_column=None,
    strand_value=None,
):

    records = {}

    for row in csv.reader(table, delimiter="\t"):
        # print(', '.join(row))

        # if we haven't seen this record before, populate it.
        recid = c(row, rid)
        if recid not in records:
            records[recid] = SeqRecord(Seq("ACTG"), id=recid)

        r = records[recid]
        q = {}
        if c(row, score) is not None:
            q["score"] = float(c(row, score))

        q["source"] = c(row, source, "tbl2gff3")

        _str = None
        if strand_column is not None:
            _str = int(c(row, strand_column))
        elif strand_value is not None:
            _str = int(strand_value)

        for x in a:
            k, v = x.split(":", 1)
            _v = c(row, v)
            if k in q:
                q[k].append(_v)
            else:
                q[k] = [_v]

        f = SeqFeature(
            FeatureLocation(int(c(row, begin)), int(c(row, end))),
            type=c(row, type),
            strand=_str,
            qualifiers=q,
        )
        r.features.append(f)

    return records


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Convert tables to gff3", epilog="")
    parser.add_argument("table", type=argparse.FileType("r"), help="Tabular Input")
    parser.add_argument("rid", help="id column")
    parser.add_argument("begin", help="begin column")
    parser.add_argument("end", help="end column")
    parser.add_argument("--type", help="feature type column")
    parser.add_argument("--score", help="score column")
    parser.add_argument("--source", help="source column")
    parser.add_argument("--strand_column", help="strand column")
    parser.add_argument("--strand_value", help="strand value")
    # parser.add_argument('--frame', help='frame column')
    parser.add_argument("-a", action="append", help="attribute column (-a k:v)")
    args = parser.parse_args()

    for rid, rec in tbl2gff3(**vars(args)).items():
        GFF.write([rec], sys.stdout)