diff rps2tsv.py @ 4:adcf06db3030 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
author iuc
date Tue, 13 May 2025 11:52:27 +0000
parents 40fb54cc6628
children
line wrap: on
line diff
--- a/rps2tsv.py	Sun Sep 08 14:09:31 2024 +0000
+++ b/rps2tsv.py	Tue May 13 11:52:27 2025 +0000
@@ -5,6 +5,7 @@
 # Author: Marie Lefebvre - INRAE
 # Aims: Convert rpsblast xml output to csv and add taxonomy
 
+"""Module which converts rpsblast xml output to tsv and add taxonomy"""
 
 import argparse
 import json
@@ -19,6 +20,9 @@
 
 
 def main():
+    """
+    Main function
+    """
     options = _set_options()
     _set_log_level(options.verbosity)
     hits = _read_xml(options)
@@ -44,6 +48,12 @@
                 hit_evalue = hit.expect  # evalue
                 hit_startQ = hit.query_start
                 hit_endQ = hit.query_end
+                hit_identity = hit.identities
+                hit_aln_length = hit.align_length
+                pident = "%0.3f" % (100 * float(hit_identity) / float(hit_aln_length))
+            if float(pident) < 0.1:
+                continue
+            hsp["pident"] = pident
             hsp["frame"] = hit_frame
             hsp["evalue"] = hit_evalue
             hsp["startQ"] = hit_startQ
@@ -83,7 +93,8 @@
                             taxonomy = names
                         if len(taxonomy) != 0:
                             kingdoms.append(taxonomy[0])
-                frequency = {kingdom: kingdoms.count(kingdom) for kingdom in kingdoms}  # {'Pseudomonadota': 9, 'cellular organisms': 4}
+                # {'Pseudomonadota': 9, 'cellular organisms': 4}
+                frequency = {kingdom: kingdoms.count(kingdom) for kingdom in kingdoms}
                 sorted_freq = dict(sorted(frequency.items(), key=lambda x: x[1], reverse=True))
                 concat_freq = ";".join("{}({})".format(k, v) for k, v in sorted_freq.items())
                 hsp["taxonomy"] = concat_freq
@@ -96,29 +107,40 @@
     Write output
     """
     log.info("Write output file " + options.output)
-    headers = "#query_id\tquery_length\tcdd_id\thit_id\tevalue\tstartQ\tendQ\tframe\tdescription\tsuperkingdom\n"
+    headers = "#query_id\tquery_length\tcdd_id\thit_id\tevalue\tstartQ\tendQ\tframe\tdescription\tsuperkingdom\tpident\n"
     f = open(options.output, "w+")
     f.write(headers)
     for h in hits:
         f.write(h + "\t" + str(hits[h]["query_length"]) + "\t")
         f.write(hits[h]["cdd_id"] + "\t" + hits[h]["hit_id"] + "\t" + str(hits[h]["evalue"]) + "\t")
-        f.write(str(hits[h]["startQ"]) + "\t" + str(hits[h]["endQ"]) + "\t" + str(hits[h]["frame"]) + "\t")
-        f.write(hits[h]["description"] + "\t" + hits[h]["taxonomy"])
+        f.write(str(hits[h]["startQ"]) + "\t" + str(hits[h]["endQ"]) + "\t"
+                + str(hits[h]["frame"]) + "\t")
+        f.write(hits[h]["description"] + "\t" + hits[h]["taxonomy"] + "\t" + hits[h]["pident"])
         f.write("\n")
     f.close()
 
 
 def _set_options():
+    """
+    Script parameters
+    """
     parser = argparse.ArgumentParser()
-    parser.add_argument('-x', '--xml', help='XML files with results of blast', action='store', required=True, dest='xml_file')
-    parser.add_argument('-e', '--max_evalue', help='Max evalue', action='store', type=float, default=0.0001, dest='max_evalue')
-    parser.add_argument('-o', '--out', help='The output file (.tab).', action='store', type=str, default='./rps2tsv_output.tab', dest='output')
-    parser.add_argument('-v', '--verbosity', help='Verbose level', action='store', type=int, choices=[1, 2, 3, 4], default=1)
+    parser.add_argument('-x', '--xml', help='XML files with results of blast', action='store',
+                        required=True, dest='xml_file')
+    parser.add_argument('-e', '--max_evalue', help='Max evalue', action='store',
+                        type=float, default=0.0001, dest='max_evalue')
+    parser.add_argument('-o', '--out', help='The output file (.tab).', action='store',
+                        type=str, default='./rps2tsv_output.tab', dest='output')
+    parser.add_argument('-v', '--verbosity', help='Verbose level', action='store',
+                        type=int, choices=[1, 2, 3, 4], default=1)
     args = parser.parse_args()
     return args
 
 
 def _set_log_level(verbosity):
+    """
+    Debbug
+    """
     if verbosity == 1:
         log_format = '%(asctime)s %(levelname)-8s %(message)s'
         log.basicConfig(level=log.INFO, format=log_format)