view pafcount.py @ 27:4ff1ed2065cc draft

planemo upload for repository https://github.com/usegalaxy-eu/temporary-tools/tree/master/jbrowse2 commit d733fb18a02929c9e9959ad30bd0f1368118e924-dirty
author fubar
date Wed, 07 Feb 2024 22:37:10 +0000
parents 39b717d934a8
children
line wrap: on
line source

import sys

"""

Col     Type    Description
1   string  Query sequence name
2   int     Query sequence length
3   int     Query start (0-based; BED-like; closed)
4   int     Query end (0-based; BED-like; open)
5   char    Relative strand: "+" or "-"
6   string  Target sequence name
7   int     Target sequence length
8   int     Target start on original strand (0-based)
9   int     Target end on original strand (0-based)
10  int     Number of residue matches
11  int     Alignment block length
12  int     Mapping quality (0-255; 255 for missing)
"""

qcis = {}
tcis = {}
qtrans = {}
ttrans = {}
pafname = sys.argv[1]
pf = open(pafname, "r").readlines()
for row in pf:
    qn, ql, qs, qe, qrs, tn, tl, ts, te, nm, abl, mq = row.strip().split("\t")[:12]

    if qn == tn:  # cis
        print("cis", qn, tn)
        tcis.setdefault(tn, 0)
        tcis[tn] = tcis[tn] + 1
        qcis.setdefault(qn, 0)
        qcis[qn] = qcis[qn] + 1
    else:  # trans
        print("trans", qn, tn)
        k = "%s_%s" % (qn, tn)
        ttrans.setdefault(k, 0)
        ttrans[k] = ttrans[k] + 1
        qtrans.setdefault(k, 0)
        qtrans[k] = qtrans[k] + 1
# print('qcis', qcis,'\nqtrans', qtrans,'\ntcis', tcis,'\ntt', ttrans)
# print('\nqtrans', qtrans,'\nttrans', ttrans)
chroms = list(qtrans.keys())
print("chroms=", chroms)
# print('chrom\tqcis\ttcis\tqtrans\tttrans')
print("chrom\tqtrans\tttrans")
for cr in chroms:
    # print('%s\t%d\t%d\t%d\t%d' % (cr, qcis[cr], tcis[cr], qtrans[cr], ttrans[cr]))
    print("%s\t%d\t%d" % (cr, qtrans[cr], ttrans[cr]))