Mercurial > repos > iuc > cat_prepare
comparison tabpad.py @ 0:b6c5e7343617 draft
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
author | iuc |
---|---|
date | Tue, 10 Dec 2019 16:07:39 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:b6c5e7343617 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 import argparse | |
4 import re | |
5 | |
6 | |
7 def padfile(infile, outfile, fieldcnt=None): | |
8 with open(infile, 'r') as fh: | |
9 out = open(outfile, 'w') | |
10 commentlines = [] | |
11 tabs = '\t' * fieldcnt if fieldcnt is not None else None | |
12 | |
13 def pad_line(txtline, tabs=None): | |
14 line = txtline.rstrip('\r\n') | |
15 fields = line.split('\t') | |
16 if not tabs: | |
17 tabs = '\t' * len(fields) | |
18 out.write('%s%s\n' % (line, tabs[len(fields):])) | |
19 | |
20 for i, txtline in enumerate(fh): | |
21 if txtline.lstrip().startswith('#'): | |
22 commentlines.append(txtline) | |
23 else: | |
24 if commentlines: | |
25 for i in range(len(commentlines) - 1): | |
26 out.write(commentlines[i]) | |
27 pad_line(commentlines[-1], tabs=tabs) | |
28 commentlines = [] | |
29 pad_line(txtline, tabs=tabs) | |
30 out.close() | |
31 | |
32 | |
33 def fieldcount(infile): | |
34 fieldcnt = 0 | |
35 with open(infile, 'r') as fh: | |
36 for i, line in enumerate(fh): | |
37 fieldcnt = max(fieldcnt, len(line.rstrip('\r\n').split('\t'))) | |
38 return fieldcnt | |
39 | |
40 | |
41 def tsvname(infile): | |
42 return re.sub('.txt$', '', infile) + '.tsv' | |
43 | |
44 | |
45 def __main__(): | |
46 parser = argparse.ArgumentParser( | |
47 description='Pad a file with TABS for equal field size across lines') | |
48 parser.add_argument( | |
49 '-i', '--input', help='input file') | |
50 parser.add_argument( | |
51 '-o', '--output', help='output file') | |
52 parser.add_argument( | |
53 'files', nargs='*', help='.txt files') | |
54 args = parser.parse_args() | |
55 | |
56 if args.input: | |
57 outfile = args.output if args.output else tsvname(args.input) | |
58 fieldcnt = fieldcount(args.input) | |
59 padfile(args.input, outfile, fieldcnt=fieldcnt) | |
60 for infile in args.files: | |
61 outfile = tsvname(infile) | |
62 fieldcnt = fieldcount(infile) | |
63 padfile(infile, outfile, fieldcnt=fieldcnt) | |
64 | |
65 | |
66 if __name__ == "__main__": | |
67 __main__() |