# HG changeset patch # User devteam # Date 1599647259 0 # Node ID fbff957a3655dbfe8e057bcfc874a095ddebb4ae # Parent 03ca082aeb2e12d414b5ea0904128dc1a9ac3c27 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/megablast_xml_parser commit 3ccddd4e2032535ead030efa401e690ffb80d145" diff -r 03ca082aeb2e -r fbff957a3655 megablast_xml_parser.py --- a/megablast_xml_parser.py Mon May 19 12:33:19 2014 -0400 +++ b/megablast_xml_parser.py Wed Sep 09 10:27:39 2020 +0000 @@ -1,78 +1,64 @@ #!/usr/bin/env python - -import sys, os, re -if sys.version_info[:2] >= ( 2, 5 ): - import xml.etree.cElementTree as ElementTree -else: - from galaxy import eggs - import pkg_resources; pkg_resources.require( "elementtree" ) - from elementtree import ElementTree +import re +import sys +import xml.etree.cElementTree as ElementTree -def stop_err( msg ): - sys.stderr.write( "%s\n" % msg ) - sys.exit() def __main__(): - source = sys.argv[1] - hspTags = [ - "Hsp_bit-score", - "Hsp_evalue", - "Hsp_query-from", - "Hsp_query-to", - "Hsp_hit-from", - "Hsp_hit-to", - "Hsp_query-frame", - "Hsp_hit-frame", - "Hsp_identity", - "Hsp_align-len", - "Hsp_qseq", - "Hsp_hseq", - "Hsp_midline" - ] - hspData = [] + source = sys.argv[1] + hspTags = ["Hsp_bit-score", + "Hsp_evalue", + "Hsp_query-from", + "Hsp_query-to", + "Hsp_hit-from", + "Hsp_hit-to", + "Hsp_query-frame", + "Hsp_hit-frame", + "Hsp_identity", + "Hsp_align-len", + "Hsp_qseq", + "Hsp_hseq", + "Hsp_midline"] # get an iterable - try: - context = ElementTree.iterparse( source, events=( "start", "end" ) ) - except: - stop_err( "Invalid data format." ) + try: + context = ElementTree.iterparse(source, events=("start", "end")) + except Exception: + sys.exit("Invalid data format.") # turn it into an iterator - context = iter( context ) + context = iter(context) # get the root element try: - event, root = context.next() - except: - stop_err( "Invalid data format." ) + event, root = next(context) + except Exception: + sys.exit("Invalid data format.") - outfile = open( sys.argv[2], 'w' ) - try: - for event, elem in context: - # for every tag - if event == "end" and elem.tag == "Iteration": - query = elem.findtext( "Iteration_query-def" ) - qLen = elem.findtext( "Iteration_query-len" ) - # for every within - for hit in elem.findall( "Iteration_hits/Hit" ): - subject = hit.findtext( "Hit_id" ) - if re.search( '^gi', subject ): - subject = subject.split('|')[1] - sLen = hit.findtext( "Hit_len" ) - # for every within - for hsp in hit.findall( "Hit_hsps/Hsp" ): - outfile.write( "%s\t%s\t%s\t%s" % ( query, qLen, subject, sLen ) ) - for tag in hspTags: - outfile.write("\t%s" %(hsp.findtext( tag ))) - #hspData.append( hsp.findtext( tag ) ) - #hspData = [] - outfile.write('\n') - # prevents ElementTree from growing large datastructure - root.clear() - elem.clear() - except: - outfile.close() - stop_err( "The input data is malformed, or there is more than one dataset in the input file. Error: %s" % sys.exc_info()[1] ) + with open(sys.argv[2], 'w') as outfile: + try: + for event, elem in context: + # for every tag + if event == "end" and elem.tag == "Iteration": + query = elem.findtext("Iteration_query-def") + qLen = elem.findtext("Iteration_query-len") + # for every within + for hit in elem.findall("Iteration_hits/Hit"): + subject = hit.findtext("Hit_id") + if re.search('^gi', subject): + subject = subject.split('|')[1] + sLen = hit.findtext("Hit_len") + # for every within + for hsp in hit.findall("Hit_hsps/Hsp"): + outfile.write("%s\t%s\t%s\t%s" % (query, qLen, subject, sLen)) + for tag in hspTags: + outfile.write("\t%s" % (hsp.findtext(tag))) + outfile.write('\n') + # prevents ElementTree from growing large datastructure + root.clear() + elem.clear() + except Exception: + sys.exit("The input data is malformed, or there is more than one dataset in the input file. Error: %s" % sys.exc_info()[1]) - outfile.close() -if __name__ == "__main__": __main__() +if __name__ == "__main__": + __main__() diff -r 03ca082aeb2e -r fbff957a3655 megablast_xml_parser.xml --- a/megablast_xml_parser.xml Mon May 19 12:33:19 2014 -0400 +++ b/megablast_xml_parser.xml Wed Sep 09 10:27:39 2020 +0000 @@ -1,18 +1,23 @@ - - -megablast_xml_parser.py $input1 $output1 - - - - - - - - - - - - + + + + python + + + + + + + + + + + + + + **What it does** @@ -54,7 +59,5 @@ Note that this form of output does not contain alignment identify value. However, it can be computed by dividing the number of identical bases within the alignment (Field 13) by the alignment length (Field 14) using *Text Manipulation->Compute* tool - - - + diff -r 03ca082aeb2e -r fbff957a3655 repository_dependencies.xml --- a/repository_dependencies.xml Mon May 19 12:33:19 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,4 +0,0 @@ - - - -