Mercurial > repos > jjohnson > contig_annotation_tool
annotate tabpad.py @ 5:226949352e31 draft default tip
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit a1c079107b72dc08612fa664897bb9d627624e52-dirty
author | jjohnson |
---|---|
date | Wed, 27 Nov 2019 12:11:14 -0500 |
parents | 9a01840eac52 |
children |
rev | line source |
---|---|
0
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
1 #!/usr/bin/env python |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
2 |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
3 import argparse |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
4 import re |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
5 |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
6 |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
7 def padfile(infile, outfile, fieldcnt=None): |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
8 with open(infile, 'r') as fh: |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
9 out = open(outfile, 'w') |
2
9a01840eac52
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 6b978e61a7f66160f2577d907f52dd641f103986-dirty
jjohnson
parents:
0
diff
changeset
|
10 commentlines = [] |
0
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
11 tabs = '\t' * fieldcnt if fieldcnt is not None else None |
2
9a01840eac52
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 6b978e61a7f66160f2577d907f52dd641f103986-dirty
jjohnson
parents:
0
diff
changeset
|
12 def pad_line(txtline, tabs=None): |
0
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
13 line = txtline.rstrip('\r\n') |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
14 fields = line.split('\t') |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
15 if not tabs: |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
16 tabs = '\t' * len(fields) |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
17 out.write('%s%s\n' % (line, tabs[len(fields):])) |
2
9a01840eac52
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 6b978e61a7f66160f2577d907f52dd641f103986-dirty
jjohnson
parents:
0
diff
changeset
|
18 for i, txtline in enumerate(fh): |
9a01840eac52
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 6b978e61a7f66160f2577d907f52dd641f103986-dirty
jjohnson
parents:
0
diff
changeset
|
19 if txtline.lstrip().startswith('#'): |
9a01840eac52
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 6b978e61a7f66160f2577d907f52dd641f103986-dirty
jjohnson
parents:
0
diff
changeset
|
20 commentlines.append(txtline) |
9a01840eac52
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 6b978e61a7f66160f2577d907f52dd641f103986-dirty
jjohnson
parents:
0
diff
changeset
|
21 else: |
9a01840eac52
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 6b978e61a7f66160f2577d907f52dd641f103986-dirty
jjohnson
parents:
0
diff
changeset
|
22 if commentlines: |
9a01840eac52
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 6b978e61a7f66160f2577d907f52dd641f103986-dirty
jjohnson
parents:
0
diff
changeset
|
23 for i in range(len(commentlines)-1): |
9a01840eac52
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 6b978e61a7f66160f2577d907f52dd641f103986-dirty
jjohnson
parents:
0
diff
changeset
|
24 out.write(commentlines[i]) |
9a01840eac52
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 6b978e61a7f66160f2577d907f52dd641f103986-dirty
jjohnson
parents:
0
diff
changeset
|
25 pad_line(commentlines[-1], tabs=tabs) |
9a01840eac52
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 6b978e61a7f66160f2577d907f52dd641f103986-dirty
jjohnson
parents:
0
diff
changeset
|
26 commentlines = [] |
9a01840eac52
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 6b978e61a7f66160f2577d907f52dd641f103986-dirty
jjohnson
parents:
0
diff
changeset
|
27 pad_line(txtline, tabs=tabs) |
0
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
28 out.close() |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
29 |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
30 |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
31 def fieldcount(infile): |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
32 fieldcnt = 0 |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
33 with open(infile, 'r') as fh: |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
34 for i, line in enumerate(fh): |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
35 fieldcnt = max(fieldcnt, len(line.rstrip('\r\n').split('\t'))) |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
36 return fieldcnt |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
37 |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
38 |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
39 def tsvname(infile): |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
40 return re.sub('\.txt$', '', infile) + '.tsv' |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
41 |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
42 |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
43 def __main__(): |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
44 parser = argparse.ArgumentParser( |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
45 description='Pad a file with TABS for equal field size across lines') |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
46 parser.add_argument( |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
47 '-i', '--input', help='input file') |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
48 parser.add_argument( |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
49 '-o', '--output', help='output file') |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
50 parser.add_argument( |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
51 'files', nargs='*', help='.txt files') |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
52 args = parser.parse_args() |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
53 |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
54 if args.input: |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
55 outfile = args.output if args.output else tsvname(args.input) |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
56 fieldcnt = fieldcount(args.input) |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
57 padfile(args.input, outfile, fieldcnt=fieldcnt) |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
58 for infile in args.files: |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
59 outfile = tsvname(infile) |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
60 fieldcnt = fieldcount(infile) |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
61 padfile(infile, outfile, fieldcnt=fieldcnt) |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
62 |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
63 |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
64 if __name__ == "__main__": |
ad7507073c3f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit f80f020c77d04c2e13b89aaea3d784314b940931-dirty
jjohnson
parents:
diff
changeset
|
65 __main__() |