comparison tabpad.py @ 0:ad7507073c3f draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
author jjohnson
date Sun, 24 Nov 2019 21:56:00 -0500
parents
children 9a01840eac52
comparison
equal deleted inserted replaced
-1:000000000000 0:ad7507073c3f
1 #!/usr/bin/env python
2
3 import argparse
4 import re
5
6
7 def padfile(infile, outfile, fieldcnt=None):
8 with open(infile, 'r') as fh:
9 out = open(outfile, 'w')
10 tabs = '\t' * fieldcnt if fieldcnt is not None else None
11 for i, txtline in enumerate(fh):
12 line = txtline.rstrip('\r\n')
13 fields = line.split('\t')
14 if not tabs:
15 tabs = '\t' * len(fields)
16 out.write('%s%s\n' % (line, tabs[len(fields):]))
17 out.close()
18
19
20 def fieldcount(infile):
21 fieldcnt = 0
22 with open(infile, 'r') as fh:
23 for i, line in enumerate(fh):
24 fieldcnt = max(fieldcnt, len(line.rstrip('\r\n').split('\t')))
25 return fieldcnt
26
27
28 def tsvname(infile):
29 return re.sub('\.txt$', '', infile) + '.tsv'
30
31
32 def __main__():
33 parser = argparse.ArgumentParser(
34 description='Pad a file with TABS for equal field size across lines')
35 parser.add_argument(
36 '-i', '--input', help='input file')
37 parser.add_argument(
38 '-o', '--output', help='output file')
39 parser.add_argument(
40 'files', nargs='*', help='.txt files')
41 args = parser.parse_args()
42
43 if args.input:
44 outfile = args.output if args.output else tsvname(args.input)
45 fieldcnt = fieldcount(args.input)
46 padfile(args.input, outfile, fieldcnt=fieldcnt)
47 for infile in args.files:
48 outfile = tsvname(infile)
49 fieldcnt = fieldcount(infile)
50 padfile(infile, outfile, fieldcnt=fieldcnt)
51
52
53 if __name__ == "__main__":
54 __main__()