Mercurial > repos > iuc > cat_summarise
annotate tabpad.py @ 3:a8216def322b draft default tip
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 47be73f0ca2b3a3bcb79a1fa7cf15a63c7facd19
author | iuc |
---|---|
date | Fri, 16 Feb 2024 10:44:47 +0000 |
parents | db1360aae489 |
children |
rev | line source |
---|---|
0
db1360aae489
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
1 #!/usr/bin/env python |
db1360aae489
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
2 |
db1360aae489
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
3 import argparse |
db1360aae489
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
4 import re |
db1360aae489
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
5 |
db1360aae489
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
6 |
db1360aae489
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
7 def padfile(infile, outfile, fieldcnt=None): |
db1360aae489
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
8 with open(infile, 'r') as fh: |
db1360aae489
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
9 out = open(outfile, 'w') |
db1360aae489
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
10 commentlines = [] |
db1360aae489
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
11 tabs = '\t' * fieldcnt if fieldcnt is not None else None |
db1360aae489
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
12 |
db1360aae489
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
13 def pad_line(txtline, tabs=None): |
db1360aae489
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
14 line = txtline.rstrip('\r\n') |
db1360aae489
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
15 fields = line.split('\t') |
db1360aae489
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
16 if not tabs: |
db1360aae489
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
17 tabs = '\t' * len(fields) |
db1360aae489
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
18 out.write('%s%s\n' % (line, tabs[len(fields):])) |
db1360aae489
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
19 |
db1360aae489
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
20 for i, txtline in enumerate(fh): |
db1360aae489
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
21 if txtline.lstrip().startswith('#'): |
db1360aae489
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
22 commentlines.append(txtline) |
db1360aae489
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
23 else: |
db1360aae489
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
24 if commentlines: |
db1360aae489
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
25 for i in range(len(commentlines) - 1): |
db1360aae489
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
26 out.write(commentlines[i]) |
db1360aae489
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
27 pad_line(commentlines[-1], tabs=tabs) |
db1360aae489
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
28 commentlines = [] |
db1360aae489
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
29 pad_line(txtline, tabs=tabs) |
db1360aae489
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
30 out.close() |
db1360aae489
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
31 |
db1360aae489
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
32 |
db1360aae489
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
33 def fieldcount(infile): |
db1360aae489
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
34 fieldcnt = 0 |
db1360aae489
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
35 with open(infile, 'r') as fh: |
db1360aae489
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
36 for i, line in enumerate(fh): |
db1360aae489
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
37 fieldcnt = max(fieldcnt, len(line.rstrip('\r\n').split('\t'))) |
db1360aae489
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
38 return fieldcnt |
db1360aae489
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
39 |
db1360aae489
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
40 |
db1360aae489
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
41 def tsvname(infile): |
db1360aae489
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
42 return re.sub('.txt$', '', infile) + '.tsv' |
db1360aae489
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
43 |
db1360aae489
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
44 |
db1360aae489
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
45 def __main__(): |
db1360aae489
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
46 parser = argparse.ArgumentParser( |
db1360aae489
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
47 description='Pad a file with TABS for equal field size across lines') |
db1360aae489
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
48 parser.add_argument( |
db1360aae489
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
49 '-i', '--input', help='input file') |
db1360aae489
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
50 parser.add_argument( |
db1360aae489
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
51 '-o', '--output', help='output file') |
db1360aae489
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
52 parser.add_argument( |
db1360aae489
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
53 'files', nargs='*', help='.txt files') |
db1360aae489
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
54 args = parser.parse_args() |
db1360aae489
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
55 |
db1360aae489
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
56 if args.input: |
db1360aae489
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
57 outfile = args.output if args.output else tsvname(args.input) |
db1360aae489
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
58 fieldcnt = fieldcount(args.input) |
db1360aae489
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
59 padfile(args.input, outfile, fieldcnt=fieldcnt) |
db1360aae489
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
60 for infile in args.files: |
db1360aae489
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
61 outfile = tsvname(infile) |
db1360aae489
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
62 fieldcnt = fieldcount(infile) |
db1360aae489
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
63 padfile(infile, outfile, fieldcnt=fieldcnt) |
db1360aae489
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
64 |
db1360aae489
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
65 |
db1360aae489
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
66 if __name__ == "__main__": |
db1360aae489
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
67 __main__() |