diff format_taxon_list.py @ 0:878d742dacf0 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/checkm commit 2a3b068a98bf0e913dc03e0d5c2182cfd102cf27
author iuc
date Fri, 29 Jul 2022 20:34:20 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/format_taxon_list.py	Fri Jul 29 20:34:20 2022 +0000
@@ -0,0 +1,22 @@
+from pathlib import Path
+
+from checkm.taxonParser import TaxonParser
+from checkm.util.taxonomyUtils import taxonomicRanks
+
+if __name__ == '__main__':
+    tool_data_dp = Path('tool-data')
+
+    # get all available marker sets
+    taxonParser = TaxonParser()
+    taxonMarkerSets = taxonParser.readMarkerSets()
+
+    # create a table per rank
+    for rank in taxonomicRanks:
+        rank_fp = tool_data_dp / Path("%s.loc.sample" % rank)
+        with rank_fp.open('w') as rank_f:
+            rank_f.write('# File generated by format_taxon_list.py script\n')
+            rank_f.write('# taxon\tdescription\n')
+            for taxon in sorted(taxonMarkerSets[rank]):
+                markerSet = taxonMarkerSets[rank][taxon]
+                numMarkers, numMarkerSets = markerSet.size()
+                rank_f.write('{taxon}\t{taxon} ({markerSet.numGenomes} genomes, {numMarkers} marker genes, {numMarkerSets} marker sets)\n')