Mercurial > repos > iuc > xpath
changeset 0:7e01c6a6dbed draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/xpath commit e0575333e6f08ef02fc66c2764b43ebd15c6b04b
author | iuc |
---|---|
date | Fri, 10 Jun 2016 15:08:32 -0400 |
parents | |
children | 1ba5c66e39c9 |
files | macros.xml test-data/1.xml test-data/2.xml test-data/3.xml test-data/test.xml tool_dependencies.xml xpath xpath.xml |
diffstat | 8 files changed, 361 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Fri Jun 10 15:08:32 2016 -0400 @@ -0,0 +1,25 @@ +<?xml version="1.0"?> +<macros> + <xml name="requirements"> + <requirements> + <requirement type="package" version="1.13">package_perl_xml_xpath</requirement> + <yield/> + </requirements> + </xml> + <token name="@WRAPPER_VERSION@">1.0</token> + <xml name="stdio"> + <stdio> + <exit_code range="1:"/> + <exit_code range=":-1"/> + <regex match="Error:"/> + <regex match="Exception:"/> + </stdio> + </xml> + <token name="@ATTRIBUTION@"><![CDATA[ +**Attribution** + +This Galaxy tool relies on the ``xpath`` tool provided in `libxml-xpath-perl` +by the Debian Maintainers. + ]]> + </token> +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/1.xml Fri Jun 10 15:08:32 2016 -0400 @@ -0,0 +1,1 @@ +<b attr="value">1</b>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/2.xml Fri Jun 10 15:08:32 2016 -0400 @@ -0,0 +1,2 @@ +<b attr="value">1</b> +<b attr="none">2</b>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/3.xml Fri Jun 10 15:08:32 2016 -0400 @@ -0,0 +1,3 @@ +1 +2 +3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test.xml Fri Jun 10 15:08:32 2016 -0400 @@ -0,0 +1,12 @@ +<root> + <a> + <b attr="value">1</b> + <b attr="none">2</b> + <b>3</b> + </a> + <c> + <d>4</d> + <d>5</d> + </c> +</root> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Fri Jun 10 15:08:32 2016 -0400 @@ -0,0 +1,6 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="package_perl_xml_xpath" version="1.13"> + <repository changeset_revision="b0e8fcff8c27" name="package_perl_xml_xpath_1_13" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" /> + </package> +</tool_dependency>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xpath Fri Jun 10 15:08:32 2016 -0400 @@ -0,0 +1,201 @@ +#!/usr/bin/perl -w +eval 'exec /usr/bin/perl -w -S $0 ${1+"$@"}' + if 0; # not running under some shell +use strict; + +$| = 1; + +use XML::XPath; + +my @paths; +my $pipeline; +my $SUFFIX = "\n"; +my $PREFIX = ""; +my $quiet = 0; + + +PARSE: while ((@ARGV >= 1) && ($ARGV[0] =~ /^-./ )) { + OPTIONS: { + if ($ARGV[0] eq "-e") { + shift; + push @paths, shift; + last OPTIONS; + } + if ($ARGV[0] eq "-p") { + shift; + $PREFIX = shift; + last OPTIONS; + } + if ($ARGV[0] eq "-s") { + shift; + $SUFFIX = shift; + last OPTIONS; + } + if ($ARGV[0] eq "-q") { + $quiet = 1; + shift; + last OPTIONS; + } + print STDERR "Unknown option ignore: ", shift; + } +} + +unless (@paths >= 1) { + print STDERR qq(Usage: +$0 [options] -e query [-e query...] [filename...] + + If no filenams are given, supply XML on STDIN. + You must provide at least one query. Each supplementary + query is done in order, the previous query giving the + context of the next one. + + Options: + + -q quiet. Only output the resulting PATH + -s suffix use suffix instead of linefeed. + -p postfix use prefix instead of nothing. +); + exit; +} + +do +{ + my $xpath; + my @curpaths = @paths; + my $filename; + if (@ARGV >= 1) { + $filename = shift @ARGV; + $xpath = XML::XPath->new(filename => $filename); + } + else { + $filename = 'stdin'; + $xpath = XML::XPath->new(ioref => \*STDIN); + } + + my $nodes = $xpath->find(shift @curpaths); + + if ($nodes->isa('XML::XPath::NodeSet')) { + while (@curpaths >= 1) { + $nodes = find_more($xpath, shift @curpaths, $nodes); + last unless $nodes->isa('XML::XPath::NodeSet'); + } + } + + if ($nodes->isa('XML::XPath::NodeSet')) { + if ($nodes->size) { + print STDERR "Found ", $nodes->size, " nodes in $filename:\n" unless $quiet; + foreach my $node ($nodes->get_nodelist) { + print STDERR "-- NODE --\n" unless $quiet; + print $PREFIX, $node->toString, $SUFFIX; + } + } + else { + print STDERR "No nodes found in $filename\n" unless $quiet; + } + } + else { + print STDERR "Query didn't return a nodeset. Value: "; + print $nodes->value, "\n"; + } + +} until (@ARGV < 1); + +exit; + +sub find_more { + my $xpath = shift; + my $find = shift; + my ($nodes) = @_; + + my $newnodes = XML::XPath::NodeSet->new; + + foreach my $node ($nodes->get_nodelist) { + my $new = $xpath->find($find, $node); + if ($new->isa('XML::XPath::NodeSet')) { + $newnodes->append($new); + } + else { + warn "Not a nodeset: ", $new->value, "\n"; + } + } + + return $newnodes; +} + +__END__ + +=head1 NAME + +xpath - a script to query XPath statements in XML documents. + +=head1 SYNOPSIS + +B<xpath [-s suffix] [-p prefix] [-q] -e query [-e query] ... [file] ...> + +=head1 DESCRIPTION + +B<xpath> uses the L<XML::XPath|XML::XPath> perl module to make XPath queries +to any XML document. The L<XML::XPath|XML::XPath> module aims to comply exactly +to the XPath specification at C<http://www.w3.org/TR/xpath> and yet +allows extensions to be added in the form of functions. + +The script takes any number of XPath pointers and tries to apply them +to each XML document given on the command line. If no file arguments +are given, the query is done using C<STDIN> as an XML document. + +When multiple queries exist, the result of the last query is used as +context for the next query and only the result of the last one is output. +The context of the first query is always the root of the current document. + +=head1 OPTIONS + +=head2 B<-q> + +Be quiet. Output only errors (and no separator) on stderr. + +=head2 B<-s suffix> + +Place C<suffix> at the end of each entry. Default is a linefeed. + +=head2 B<-p prefix> + +Place C<prefix> preceding each entry. Default is nothing. + +=head1 BUGS + +The author of this man page is not very fluant in english. Please, +send him (L<fabien@tzone.org>) any corrections concerning this text. + +See also L<XML::XPath(3pm)>. + +=head1 SEE ALSO + +L<XML::XPath(3pm)>. + +=head1 HISTORY + +This module is copyright 2000 Fastnet Software Ltd. This is free +software, and as such comes with NO WARRANTY. No dates are used in this +module. You may distribute this module under the terms of either the +Gnu GPL, or under specific licencing from Fastnet Software Ltd. +Special free licencing consideration will be given to similarly free +software. Please don't flame me for this licence - I've put a lot of +hours into this code, and if someone uses my software in their product +I expect them to have the courtesy to contact me first. + +Full support for this module is available from Fastnet Software Ltd on +a pay per incident basis. Alternatively subscribe to the Perl-XML +mailing list by mailing lyris@activestate.com with the text: + + SUBSCRIBE Perl-XML + +in the body of the message. There are lots of friendly people on the +list, including myself, and we'll be glad to get you started. + +Matt Sergeant, matt@sergeant.org + +This man page was added as well as some serious modifications to the script +by Fabien Ninoles <fabien@debian.org> for the Debian Project. + +=cut +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xpath.xml Fri Jun 10 15:08:32 2016 -0400 @@ -0,0 +1,111 @@ +<?xml version="1.0"?> +<tool id="xpath" name="XPath" version="@WRAPPER_VERSION@.0"> + <description>compute xpath expressions on XML data</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"/> + <expand macro="stdio"/> + <command interpreter="bash"><![CDATA[xpath + -q + -e '$expression' $input > $output + ]]></command> + <inputs> + <param name="input" type="data" format="xml" label="Input XML data"/> + <param name="expression" type="text" label="XPath Query"> + <sanitizer> + <valid> + <add value="""/> + <add value="["/> + <add value="]"/> + <add value="@"/> + </valid> + </sanitizer> + </param> + </inputs> + <outputs> + <!-- TODO: sometimes there's text output (e.g. text() queries) --> + <data format="xml" name="output" label="XPath expression on $input.name"/> + </outputs> + <tests> + <test> + <param name="input" value="test.xml"/> + <param name="expression" value="//b[@attr="value"]" /> + <output name="output" file="1.xml"/> + </test> + <test> + <param name="input" value="test.xml"/> + <param name="expression" value="//b[@attr]" /> + <output name="output" file="2.xml"/> + </test> + <test> + <param name="input" value="test.xml"/> + <param name="expression" value="//b/text()" /> + <output name="output" file="3.xml"/> + </test> + </tests> + <help><![CDATA[ +**What it does** + +Query XML files with XPath expressions. + +For an example input file:: + + <root> + <IdList> + <Id>1234</Id> + <Id>1235</Id> + <Id>1236</Id> + <Id>1237</Id> + </IdList> + </root> + +One could query out the IDs specifically with a query like:: + + //Id/text() + +**XPath Expressions** + +There are many helpful tutorials on the internet for XPath expressions. +`Wikipedia <https://en.wikipedia.org/wiki/XPath>`__ has a number of good +examples. + +**Some More Examples** + +For an XML document like the following:: + + <root> + <a> + <b attr="value">1</b> + <b attr="none">2</b> + <b>3</b> + </a> + <c> + <d>4</d> + <e>5</e> + </c> + </root> + +Here are some example queries and their outputs: + ++====================+===================================================+ +| Query | Result | ++--------------------+---------------------------------------------------+ +| //b[@attr="value"] | <b attr="value">1</b> | ++--------------------+---------------------------------------------------+ +| //b[@attr] | <b attr="value">1</b><b attr="none">2</b> | ++--------------------+---------------------------------------------------+ +| //b | <b attr="value">1</b><b attr="none">2</b><b>3</b> | ++--------------------+---------------------------------------------------+ +| //b/text() | 1\n2\n3 | ++--------------------+---------------------------------------------------+ +| /root/*/*/text() | 1\n2\n3\n4\n5 | ++--------------------+---------------------------------------------------+ + + + +@ATTRIBUTION@ +]]></help> + <citations> + </citations> +</tool>