Mercurial > repos > iuc > column_remove_by_header
annotate column_remove_by_header.py @ 1:2040e4c2750a draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 6a87239f5139390963930673f36b869cde76fdf9
author | iuc |
---|---|
date | Sat, 16 Jul 2022 06:55:49 +0000 |
parents | 372967836e98 |
children |
rev | line source |
---|---|
0
372967836e98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 2150a3264364471090b650bdffde9f9c0b47ac39
iuc
parents:
diff
changeset
|
1 #!/usr/bin/env python |
372967836e98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 2150a3264364471090b650bdffde9f9c0b47ac39
iuc
parents:
diff
changeset
|
2 |
1
2040e4c2750a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 6a87239f5139390963930673f36b869cde76fdf9
iuc
parents:
0
diff
changeset
|
3 import argparse |
0
372967836e98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 2150a3264364471090b650bdffde9f9c0b47ac39
iuc
parents:
diff
changeset
|
4 |
1
2040e4c2750a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 6a87239f5139390963930673f36b869cde76fdf9
iuc
parents:
0
diff
changeset
|
5 parser = argparse.ArgumentParser() |
2040e4c2750a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 6a87239f5139390963930673f36b869cde76fdf9
iuc
parents:
0
diff
changeset
|
6 parser.add_argument( |
2040e4c2750a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 6a87239f5139390963930673f36b869cde76fdf9
iuc
parents:
0
diff
changeset
|
7 "-i", "--input", required=True, help="Tabular Input File Name" |
2040e4c2750a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 6a87239f5139390963930673f36b869cde76fdf9
iuc
parents:
0
diff
changeset
|
8 ) |
2040e4c2750a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 6a87239f5139390963930673f36b869cde76fdf9
iuc
parents:
0
diff
changeset
|
9 parser.add_argument( |
2040e4c2750a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 6a87239f5139390963930673f36b869cde76fdf9
iuc
parents:
0
diff
changeset
|
10 "-o", "--output", required=True, help="Tabular Output File" |
2040e4c2750a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 6a87239f5139390963930673f36b869cde76fdf9
iuc
parents:
0
diff
changeset
|
11 ) |
2040e4c2750a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 6a87239f5139390963930673f36b869cde76fdf9
iuc
parents:
0
diff
changeset
|
12 parser.add_argument( |
2040e4c2750a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 6a87239f5139390963930673f36b869cde76fdf9
iuc
parents:
0
diff
changeset
|
13 "-c", "--columns", dest="names", nargs="+", |
2040e4c2750a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 6a87239f5139390963930673f36b869cde76fdf9
iuc
parents:
0
diff
changeset
|
14 help="Column headers to operate on" |
2040e4c2750a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 6a87239f5139390963930673f36b869cde76fdf9
iuc
parents:
0
diff
changeset
|
15 ) |
2040e4c2750a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 6a87239f5139390963930673f36b869cde76fdf9
iuc
parents:
0
diff
changeset
|
16 parser.add_argument("-d", "--delimiter", default='\t', help="Column delimiter") |
2040e4c2750a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 6a87239f5139390963930673f36b869cde76fdf9
iuc
parents:
0
diff
changeset
|
17 parser.add_argument( |
2040e4c2750a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 6a87239f5139390963930673f36b869cde76fdf9
iuc
parents:
0
diff
changeset
|
18 "-k", "--keep", action="store_true", |
2040e4c2750a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 6a87239f5139390963930673f36b869cde76fdf9
iuc
parents:
0
diff
changeset
|
19 help="Drop non-selected columns instead of selected ones" |
2040e4c2750a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 6a87239f5139390963930673f36b869cde76fdf9
iuc
parents:
0
diff
changeset
|
20 ) |
2040e4c2750a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 6a87239f5139390963930673f36b869cde76fdf9
iuc
parents:
0
diff
changeset
|
21 parser.add_argument( |
2040e4c2750a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 6a87239f5139390963930673f36b869cde76fdf9
iuc
parents:
0
diff
changeset
|
22 "-s", "--strip_chars", default=None, |
2040e4c2750a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 6a87239f5139390963930673f36b869cde76fdf9
iuc
parents:
0
diff
changeset
|
23 help="Ignore these leading characters when extracting the name of the " |
2040e4c2750a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 6a87239f5139390963930673f36b869cde76fdf9
iuc
parents:
0
diff
changeset
|
24 "first line" |
2040e4c2750a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 6a87239f5139390963930673f36b869cde76fdf9
iuc
parents:
0
diff
changeset
|
25 ) |
2040e4c2750a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 6a87239f5139390963930673f36b869cde76fdf9
iuc
parents:
0
diff
changeset
|
26 parser.add_argument( |
2040e4c2750a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 6a87239f5139390963930673f36b869cde76fdf9
iuc
parents:
0
diff
changeset
|
27 "--unicode-escaped-cols", action="store_true", |
2040e4c2750a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 6a87239f5139390963930673f36b869cde76fdf9
iuc
parents:
0
diff
changeset
|
28 help="Indicate that the --columns names use unicode escape sequences " |
2040e4c2750a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 6a87239f5139390963930673f36b869cde76fdf9
iuc
parents:
0
diff
changeset
|
29 "that should be decoded back before comparing them to the input file " |
2040e4c2750a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 6a87239f5139390963930673f36b869cde76fdf9
iuc
parents:
0
diff
changeset
|
30 "header" |
2040e4c2750a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 6a87239f5139390963930673f36b869cde76fdf9
iuc
parents:
0
diff
changeset
|
31 ) |
2040e4c2750a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 6a87239f5139390963930673f36b869cde76fdf9
iuc
parents:
0
diff
changeset
|
32 args = parser.parse_args() |
0
372967836e98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 2150a3264364471090b650bdffde9f9c0b47ac39
iuc
parents:
diff
changeset
|
33 |
1
2040e4c2750a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 6a87239f5139390963930673f36b869cde76fdf9
iuc
parents:
0
diff
changeset
|
34 # The delimiter can only be parsed reliably from the input if it's from |
2040e4c2750a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 6a87239f5139390963930673f36b869cde76fdf9
iuc
parents:
0
diff
changeset
|
35 # the ASCII range of characters |
2040e4c2750a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 6a87239f5139390963930673f36b869cde76fdf9
iuc
parents:
0
diff
changeset
|
36 try: |
2040e4c2750a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 6a87239f5139390963930673f36b869cde76fdf9
iuc
parents:
0
diff
changeset
|
37 bytes_delimiter = args.delimiter.encode(encoding="ascii") |
2040e4c2750a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 6a87239f5139390963930673f36b869cde76fdf9
iuc
parents:
0
diff
changeset
|
38 except UnicodeEncodeError: |
2040e4c2750a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 6a87239f5139390963930673f36b869cde76fdf9
iuc
parents:
0
diff
changeset
|
39 raise ValueError("Only ASCII characters are allowed as column delimiters") |
2040e4c2750a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 6a87239f5139390963930673f36b869cde76fdf9
iuc
parents:
0
diff
changeset
|
40 # handle unicode escape sequences in --columns argument |
2040e4c2750a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 6a87239f5139390963930673f36b869cde76fdf9
iuc
parents:
0
diff
changeset
|
41 if args.unicode_escaped_cols: |
2040e4c2750a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 6a87239f5139390963930673f36b869cde76fdf9
iuc
parents:
0
diff
changeset
|
42 names = [n.encode().decode('unicode_escape') for n in args.names] |
2040e4c2750a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 6a87239f5139390963930673f36b869cde76fdf9
iuc
parents:
0
diff
changeset
|
43 else: |
2040e4c2750a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 6a87239f5139390963930673f36b869cde76fdf9
iuc
parents:
0
diff
changeset
|
44 names = args.names |
0
372967836e98
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 2150a3264364471090b650bdffde9f9c0b47ac39
iuc
parents:
diff
changeset
|
45 |
1
2040e4c2750a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 6a87239f5139390963930673f36b869cde76fdf9
iuc
parents:
0
diff
changeset
|
46 with open(args.input, "r", encoding="utf-8", errors="surrogateescape") as fh: |
2040e4c2750a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 6a87239f5139390963930673f36b869cde76fdf9
iuc
parents:
0
diff
changeset
|
47 header_cols = fh.readline().strip("\n").split(args.delimiter) |
2040e4c2750a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 6a87239f5139390963930673f36b869cde76fdf9
iuc
parents:
0
diff
changeset
|
48 columns = set() |
2040e4c2750a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 6a87239f5139390963930673f36b869cde76fdf9
iuc
parents:
0
diff
changeset
|
49 for i, key in enumerate(header_cols): |
2040e4c2750a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 6a87239f5139390963930673f36b869cde76fdf9
iuc
parents:
0
diff
changeset
|
50 if i == 0 and args.strip_chars: |
2040e4c2750a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 6a87239f5139390963930673f36b869cde76fdf9
iuc
parents:
0
diff
changeset
|
51 key = key.lstrip(args.strip_chars) |
2040e4c2750a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 6a87239f5139390963930673f36b869cde76fdf9
iuc
parents:
0
diff
changeset
|
52 if (args.keep and key in names) or (not args.keep and key not in names): |
2040e4c2750a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 6a87239f5139390963930673f36b869cde76fdf9
iuc
parents:
0
diff
changeset
|
53 columns.add(i) |
2040e4c2750a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 6a87239f5139390963930673f36b869cde76fdf9
iuc
parents:
0
diff
changeset
|
54 print("Kept", len(columns), "of", len(header_cols), "columns.") |
2040e4c2750a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 6a87239f5139390963930673f36b869cde76fdf9
iuc
parents:
0
diff
changeset
|
55 |
2040e4c2750a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 6a87239f5139390963930673f36b869cde76fdf9
iuc
parents:
0
diff
changeset
|
56 with open(args.input, "rb") as i: |
2040e4c2750a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 6a87239f5139390963930673f36b869cde76fdf9
iuc
parents:
0
diff
changeset
|
57 with open(args.output, "wb") as o: |
2040e4c2750a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 6a87239f5139390963930673f36b869cde76fdf9
iuc
parents:
0
diff
changeset
|
58 for line in i: |
2040e4c2750a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 6a87239f5139390963930673f36b869cde76fdf9
iuc
parents:
0
diff
changeset
|
59 fields = [ |
2040e4c2750a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 6a87239f5139390963930673f36b869cde76fdf9
iuc
parents:
0
diff
changeset
|
60 f for idx, f in enumerate( |
2040e4c2750a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 6a87239f5139390963930673f36b869cde76fdf9
iuc
parents:
0
diff
changeset
|
61 line.rstrip(b"\r\n").split(bytes_delimiter) |
2040e4c2750a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 6a87239f5139390963930673f36b869cde76fdf9
iuc
parents:
0
diff
changeset
|
62 ) if idx in columns |
2040e4c2750a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 6a87239f5139390963930673f36b869cde76fdf9
iuc
parents:
0
diff
changeset
|
63 ] |
2040e4c2750a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 6a87239f5139390963930673f36b869cde76fdf9
iuc
parents:
0
diff
changeset
|
64 o.write(bytes_delimiter.join(fields) + b"\n") |