Mercurial > repos > iuc > cat_bins
annotate tabpad.py @ 1:18676df0cb3a draft
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit b6c674376eade4fdf9ffb31380f6689ede84a091"
author | iuc |
---|---|
date | Wed, 08 Jan 2020 13:03:51 -0500 |
parents | 0094893f5001 |
children |
rev | line source |
---|---|
0
0094893f5001
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
1 #!/usr/bin/env python |
0094893f5001
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
2 |
0094893f5001
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
3 import argparse |
0094893f5001
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
4 import re |
0094893f5001
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
5 |
0094893f5001
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
6 |
0094893f5001
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
7 def padfile(infile, outfile, fieldcnt=None): |
0094893f5001
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
8 with open(infile, 'r') as fh: |
0094893f5001
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
9 out = open(outfile, 'w') |
0094893f5001
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
10 commentlines = [] |
0094893f5001
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
11 tabs = '\t' * fieldcnt if fieldcnt is not None else None |
0094893f5001
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
12 |
0094893f5001
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
13 def pad_line(txtline, tabs=None): |
0094893f5001
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
14 line = txtline.rstrip('\r\n') |
0094893f5001
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
15 fields = line.split('\t') |
0094893f5001
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
16 if not tabs: |
0094893f5001
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
17 tabs = '\t' * len(fields) |
0094893f5001
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
18 out.write('%s%s\n' % (line, tabs[len(fields):])) |
0094893f5001
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
19 |
0094893f5001
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
20 for i, txtline in enumerate(fh): |
0094893f5001
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
21 if txtline.lstrip().startswith('#'): |
0094893f5001
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
22 commentlines.append(txtline) |
0094893f5001
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
23 else: |
0094893f5001
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
24 if commentlines: |
0094893f5001
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
25 for i in range(len(commentlines) - 1): |
0094893f5001
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
26 out.write(commentlines[i]) |
0094893f5001
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
27 pad_line(commentlines[-1], tabs=tabs) |
0094893f5001
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
28 commentlines = [] |
0094893f5001
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
29 pad_line(txtline, tabs=tabs) |
0094893f5001
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
30 out.close() |
0094893f5001
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
31 |
0094893f5001
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
32 |
0094893f5001
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
33 def fieldcount(infile): |
0094893f5001
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
34 fieldcnt = 0 |
0094893f5001
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
35 with open(infile, 'r') as fh: |
0094893f5001
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
36 for i, line in enumerate(fh): |
0094893f5001
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
37 fieldcnt = max(fieldcnt, len(line.rstrip('\r\n').split('\t'))) |
0094893f5001
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
38 return fieldcnt |
0094893f5001
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
39 |
0094893f5001
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
40 |
0094893f5001
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
41 def tsvname(infile): |
0094893f5001
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
42 return re.sub('.txt$', '', infile) + '.tsv' |
0094893f5001
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
43 |
0094893f5001
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
44 |
0094893f5001
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
45 def __main__(): |
0094893f5001
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
46 parser = argparse.ArgumentParser( |
0094893f5001
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
47 description='Pad a file with TABS for equal field size across lines') |
0094893f5001
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
48 parser.add_argument( |
0094893f5001
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
49 '-i', '--input', help='input file') |
0094893f5001
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
50 parser.add_argument( |
0094893f5001
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
51 '-o', '--output', help='output file') |
0094893f5001
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
52 parser.add_argument( |
0094893f5001
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
53 'files', nargs='*', help='.txt files') |
0094893f5001
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
54 args = parser.parse_args() |
0094893f5001
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
55 |
0094893f5001
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
56 if args.input: |
0094893f5001
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
57 outfile = args.output if args.output else tsvname(args.input) |
0094893f5001
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
58 fieldcnt = fieldcount(args.input) |
0094893f5001
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
59 padfile(args.input, outfile, fieldcnt=fieldcnt) |
0094893f5001
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
60 for infile in args.files: |
0094893f5001
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
61 outfile = tsvname(infile) |
0094893f5001
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
62 fieldcnt = fieldcount(infile) |
0094893f5001
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
63 padfile(infile, outfile, fieldcnt=fieldcnt) |
0094893f5001
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
64 |
0094893f5001
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
65 |
0094893f5001
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
66 if __name__ == "__main__": |
0094893f5001
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
iuc
parents:
diff
changeset
|
67 __main__() |