annotate customizemapping.py @ 1:67b835aace09 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 948cdafe28ae232b641a58bb7dc044d320feb294"
author iuc
date Wed, 19 May 2021 17:03:14 +0000
parents 10fdefbf5920
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
10fdefbf5920 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 077b8f34e081e6c427acb0fde0fbb97d1b241e0b"
iuc
parents:
diff changeset
1 #!/usr/bin/env python
10fdefbf5920 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 077b8f34e081e6c427acb0fde0fbb97d1b241e0b"
iuc
parents:
diff changeset
2 # -*- coding: utf-8 -*-
10fdefbf5920 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 077b8f34e081e6c427acb0fde0fbb97d1b241e0b"
iuc
parents:
diff changeset
3
10fdefbf5920 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 077b8f34e081e6c427acb0fde0fbb97d1b241e0b"
iuc
parents:
diff changeset
4 import argparse
10fdefbf5920 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 077b8f34e081e6c427acb0fde0fbb97d1b241e0b"
iuc
parents:
diff changeset
5 from pathlib import Path
10fdefbf5920 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 077b8f34e081e6c427acb0fde0fbb97d1b241e0b"
iuc
parents:
diff changeset
6
10fdefbf5920 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 077b8f34e081e6c427acb0fde0fbb97d1b241e0b"
iuc
parents:
diff changeset
7
10fdefbf5920 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 077b8f34e081e6c427acb0fde0fbb97d1b241e0b"
iuc
parents:
diff changeset
8 if __name__ == '__main__':
10fdefbf5920 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 077b8f34e081e6c427acb0fde0fbb97d1b241e0b"
iuc
parents:
diff changeset
9 # Read command line
10fdefbf5920 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 077b8f34e081e6c427acb0fde0fbb97d1b241e0b"
iuc
parents:
diff changeset
10 parser = argparse.ArgumentParser(description='Customize HUMAnN utility mapping')
10fdefbf5920 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 077b8f34e081e6c427acb0fde0fbb97d1b241e0b"
iuc
parents:
diff changeset
11 parser.add_argument('--in_mapping', help="Path to mapping file to reduce")
10fdefbf5920 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 077b8f34e081e6c427acb0fde0fbb97d1b241e0b"
iuc
parents:
diff changeset
12 parser.add_argument('--features', help="Path to tabular file with features to keep in first column")
10fdefbf5920 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 077b8f34e081e6c427acb0fde0fbb97d1b241e0b"
iuc
parents:
diff changeset
13 parser.add_argument('--elements', help="Path to tabular file with elements to keep in other columns")
10fdefbf5920 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 077b8f34e081e6c427acb0fde0fbb97d1b241e0b"
iuc
parents:
diff changeset
14 parser.add_argument('--out_mapping', help="Path to reduced mapping file")
10fdefbf5920 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 077b8f34e081e6c427acb0fde0fbb97d1b241e0b"
iuc
parents:
diff changeset
15 args = parser.parse_args()
10fdefbf5920 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 077b8f34e081e6c427acb0fde0fbb97d1b241e0b"
iuc
parents:
diff changeset
16
10fdefbf5920 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 077b8f34e081e6c427acb0fde0fbb97d1b241e0b"
iuc
parents:
diff changeset
17 in_mapping_fp = Path(args.in_mapping)
10fdefbf5920 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 077b8f34e081e6c427acb0fde0fbb97d1b241e0b"
iuc
parents:
diff changeset
18 feature_fp = Path(args.features)
10fdefbf5920 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 077b8f34e081e6c427acb0fde0fbb97d1b241e0b"
iuc
parents:
diff changeset
19 element_fp = Path(args.elements)
10fdefbf5920 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 077b8f34e081e6c427acb0fde0fbb97d1b241e0b"
iuc
parents:
diff changeset
20 out_mapping_fp = Path(args.out_mapping)
10fdefbf5920 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 077b8f34e081e6c427acb0fde0fbb97d1b241e0b"
iuc
parents:
diff changeset
21
10fdefbf5920 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 077b8f34e081e6c427acb0fde0fbb97d1b241e0b"
iuc
parents:
diff changeset
22 # extract features to keep
10fdefbf5920 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 077b8f34e081e6c427acb0fde0fbb97d1b241e0b"
iuc
parents:
diff changeset
23 features = set()
10fdefbf5920 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 077b8f34e081e6c427acb0fde0fbb97d1b241e0b"
iuc
parents:
diff changeset
24 with open(feature_fp, 'r') as feature_f:
10fdefbf5920 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 077b8f34e081e6c427acb0fde0fbb97d1b241e0b"
iuc
parents:
diff changeset
25 for line in feature_f.readlines():
10fdefbf5920 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 077b8f34e081e6c427acb0fde0fbb97d1b241e0b"
iuc
parents:
diff changeset
26 features.add(line.split("\t")[0])
10fdefbf5920 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 077b8f34e081e6c427acb0fde0fbb97d1b241e0b"
iuc
parents:
diff changeset
27 print(features)
10fdefbf5920 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 077b8f34e081e6c427acb0fde0fbb97d1b241e0b"
iuc
parents:
diff changeset
28
10fdefbf5920 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 077b8f34e081e6c427acb0fde0fbb97d1b241e0b"
iuc
parents:
diff changeset
29 # extract elements to keep
10fdefbf5920 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 077b8f34e081e6c427acb0fde0fbb97d1b241e0b"
iuc
parents:
diff changeset
30 elements = set()
10fdefbf5920 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 077b8f34e081e6c427acb0fde0fbb97d1b241e0b"
iuc
parents:
diff changeset
31 with open(element_fp, 'r') as element_f:
10fdefbf5920 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 077b8f34e081e6c427acb0fde0fbb97d1b241e0b"
iuc
parents:
diff changeset
32 for line in element_f.readlines():
10fdefbf5920 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 077b8f34e081e6c427acb0fde0fbb97d1b241e0b"
iuc
parents:
diff changeset
33 elements.add(line.split("\t")[0])
10fdefbf5920 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 077b8f34e081e6c427acb0fde0fbb97d1b241e0b"
iuc
parents:
diff changeset
34 print(elements)
10fdefbf5920 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 077b8f34e081e6c427acb0fde0fbb97d1b241e0b"
iuc
parents:
diff changeset
35
10fdefbf5920 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 077b8f34e081e6c427acb0fde0fbb97d1b241e0b"
iuc
parents:
diff changeset
36 # write mapping for features to keep while keeping only elements
10fdefbf5920 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 077b8f34e081e6c427acb0fde0fbb97d1b241e0b"
iuc
parents:
diff changeset
37 with open(in_mapping_fp, 'r') as in_mapping_f:
10fdefbf5920 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 077b8f34e081e6c427acb0fde0fbb97d1b241e0b"
iuc
parents:
diff changeset
38 with open(out_mapping_fp, 'w') as out_mapping_f:
10fdefbf5920 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 077b8f34e081e6c427acb0fde0fbb97d1b241e0b"
iuc
parents:
diff changeset
39 for line in in_mapping_f.readlines():
10fdefbf5920 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 077b8f34e081e6c427acb0fde0fbb97d1b241e0b"
iuc
parents:
diff changeset
40 l_split = line.split("\t")
10fdefbf5920 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 077b8f34e081e6c427acb0fde0fbb97d1b241e0b"
iuc
parents:
diff changeset
41 feat = l_split[0]
10fdefbf5920 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 077b8f34e081e6c427acb0fde0fbb97d1b241e0b"
iuc
parents:
diff changeset
42 if feat in features:
10fdefbf5920 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 077b8f34e081e6c427acb0fde0fbb97d1b241e0b"
iuc
parents:
diff changeset
43 to_write = [feat]
10fdefbf5920 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 077b8f34e081e6c427acb0fde0fbb97d1b241e0b"
iuc
parents:
diff changeset
44 for e in l_split[1:]:
10fdefbf5920 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 077b8f34e081e6c427acb0fde0fbb97d1b241e0b"
iuc
parents:
diff changeset
45 if e in elements:
10fdefbf5920 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 077b8f34e081e6c427acb0fde0fbb97d1b241e0b"
iuc
parents:
diff changeset
46 to_write.append(e)
10fdefbf5920 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 077b8f34e081e6c427acb0fde0fbb97d1b241e0b"
iuc
parents:
diff changeset
47 out_mapping_f.write("%s\n" % '\t'.join(to_write))