Mercurial > repos > iuc > cat_prepare
annotate tabpad.py @ 0:b6c5e7343617 draft
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
author | iuc |
---|---|
date | Tue, 10 Dec 2019 16:07:39 -0500 |
parents | |
children |
rev | line source |
---|---|
0
b6c5e7343617
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
1 #!/usr/bin/env python |
b6c5e7343617
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
2 |
b6c5e7343617
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
3 import argparse |
b6c5e7343617
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
4 import re |
b6c5e7343617
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
5 |
b6c5e7343617
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
6 |
b6c5e7343617
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
7 def padfile(infile, outfile, fieldcnt=None): |
b6c5e7343617
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
8 with open(infile, 'r') as fh: |
b6c5e7343617
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
9 out = open(outfile, 'w') |
b6c5e7343617
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
10 commentlines = [] |
b6c5e7343617
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
11 tabs = '\t' * fieldcnt if fieldcnt is not None else None |
b6c5e7343617
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
12 |
b6c5e7343617
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
13 def pad_line(txtline, tabs=None): |
b6c5e7343617
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
14 line = txtline.rstrip('\r\n') |
b6c5e7343617
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
15 fields = line.split('\t') |
b6c5e7343617
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
16 if not tabs: |
b6c5e7343617
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
17 tabs = '\t' * len(fields) |
b6c5e7343617
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
18 out.write('%s%s\n' % (line, tabs[len(fields):])) |
b6c5e7343617
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
19 |
b6c5e7343617
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
20 for i, txtline in enumerate(fh): |
b6c5e7343617
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
21 if txtline.lstrip().startswith('#'): |
b6c5e7343617
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
22 commentlines.append(txtline) |
b6c5e7343617
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
23 else: |
b6c5e7343617
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
24 if commentlines: |
b6c5e7343617
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
25 for i in range(len(commentlines) - 1): |
b6c5e7343617
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
26 out.write(commentlines[i]) |
b6c5e7343617
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
27 pad_line(commentlines[-1], tabs=tabs) |
b6c5e7343617
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
28 commentlines = [] |
b6c5e7343617
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
29 pad_line(txtline, tabs=tabs) |
b6c5e7343617
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
30 out.close() |
b6c5e7343617
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
31 |
b6c5e7343617
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
32 |
b6c5e7343617
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
33 def fieldcount(infile): |
b6c5e7343617
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
34 fieldcnt = 0 |
b6c5e7343617
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
35 with open(infile, 'r') as fh: |
b6c5e7343617
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
36 for i, line in enumerate(fh): |
b6c5e7343617
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
37 fieldcnt = max(fieldcnt, len(line.rstrip('\r\n').split('\t'))) |
b6c5e7343617
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
38 return fieldcnt |
b6c5e7343617
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
39 |
b6c5e7343617
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
40 |
b6c5e7343617
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
41 def tsvname(infile): |
b6c5e7343617
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
42 return re.sub('.txt$', '', infile) + '.tsv' |
b6c5e7343617
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
43 |
b6c5e7343617
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
44 |
b6c5e7343617
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
45 def __main__(): |
b6c5e7343617
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
46 parser = argparse.ArgumentParser( |
b6c5e7343617
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
47 description='Pad a file with TABS for equal field size across lines') |
b6c5e7343617
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
48 parser.add_argument( |
b6c5e7343617
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
49 '-i', '--input', help='input file') |
b6c5e7343617
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
50 parser.add_argument( |
b6c5e7343617
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
51 '-o', '--output', help='output file') |
b6c5e7343617
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
52 parser.add_argument( |
b6c5e7343617
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
53 'files', nargs='*', help='.txt files') |
b6c5e7343617
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
54 args = parser.parse_args() |
b6c5e7343617
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
55 |
b6c5e7343617
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
56 if args.input: |
b6c5e7343617
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
57 outfile = args.output if args.output else tsvname(args.input) |
b6c5e7343617
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
58 fieldcnt = fieldcount(args.input) |
b6c5e7343617
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
59 padfile(args.input, outfile, fieldcnt=fieldcnt) |
b6c5e7343617
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
60 for infile in args.files: |
b6c5e7343617
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
61 outfile = tsvname(infile) |
b6c5e7343617
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
62 fieldcnt = fieldcount(infile) |
b6c5e7343617
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
63 padfile(infile, outfile, fieldcnt=fieldcnt) |
b6c5e7343617
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
64 |
b6c5e7343617
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
65 |
b6c5e7343617
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
66 if __name__ == "__main__": |
b6c5e7343617
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
67 __main__() |