diff sappDocker/matrix.xml @ 31:957156367442 draft

Uploaded
author jjkoehorst
date Wed, 29 Jun 2016 01:36:58 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sappDocker/matrix.xml	Wed Jun 29 01:36:58 2016 -0400
@@ -0,0 +1,63 @@
+<tool id="DMatrix" name="MATSPARQL" version="1.0.1">
+  <description/>
+  <requirements>
+    <container type="docker">jjkoehorst/sappdocker:MATRIX</container>
+  </requirements>
+  <command interpreter="docker">java -jar /sparql/target/sparqljava-0.0.1-SNAPSHOT-jar-with-dependencies.jar '$separate' '-rdf' '$input' '-format' 'TURTLE' '-query' '$query' '-output' '$output' &amp;&amp; Rscript $__tool_directory__/matrix.R '$output' '$output' </command>
+  <inputs>
+    <param format="ttl" label="Genome Database" multiple="True" name="input" type="data"/>
+    <param area="True" label="SPARQL query" name="query" type="text" value="YOUR QUERY HERE"/>
+    <param checked="False" falsevalue="" help="Use this option if you run into memory or performance problems. Each genome will be queried independently of each other and therefor advanced comparison SPARQL queries will not work" label="Treath genomes separately" name="separate" truevalue="-separate" type="boolean"/>
+  </inputs>
+  <outputs>
+    <data format="tsv" label="matrix.tsv" name="output"/>
+  </outputs>
+  <help>The creation of a  matrix from a created SPARQL query. One should use a query that creates 3 columns for the X and Y coordinates and Z for the value. 
+
+    A header for a SPARQL query would look like SELECT ?genome ?protein ?value or SELECT ?genome ?domain (COUNT(?domain) AS ?domainC)
+
+-----------------------------
+Genome Interpro Matrix
+-----------------------------
+The following query results in a matrix of genomes by Pfam accessions ::
+
+    PREFIX biopax:&lt;http://www.biopax.org/release/bp-level3.owl#&gt;
+    PREFIX ssb:&lt;http://csb.wur.nl/genome/&gt;
+    SELECT DISTINCT ?genome ?id (COUNT(?id) AS ?value)
+    WHERE { 
+      ?genome a ssb:Genome .
+      ?genome ssb:dnaobject ?dna .
+      ?dna ssb:feature ?feature .
+      ?feature ssb:tool ?tool .
+      ?feature ssb:protein ?protein .
+      ?protein ssb:feature ?domain .
+      ?domain ssb:signature ?signature .
+      ?signature biopax:xref ?xref .
+      ?xref biopax:db 'pfam' .
+      ?xref biopax:id ?id .
+      } GROUP BY ?genome ?id
+
+-------------------
+Enzyme based matrix
+-------------------
+
+The following query results in a matrix of genomes by EC numbers ::
+
+   PREFIX ssb:&lt;http://csb.wur.nl/genome/&gt;
+   SELECT  ?genome ?ec (COUNT(?ec) AS ?ecCount)
+   WHERE {
+        ?gene a ssb:Cds .
+        ?gene ssb:locus_tag ?locus .
+        ?gene ssb:source ?source .
+        ?gene ssb:protein ?protein .
+        ?protein ssb:feature ?feature .
+        {
+            ?feature ssb:kegg ?ec .
+        } UNION {
+            ?feature ssb:ec_number ?ec .
+        }
+   } GROUP BY ?genome ?ec
+
+**If you require specific questions related to the database that you created feel free to contact us.**
+  </help>
+</tool>
\ No newline at end of file