diff tools/plotting/venn_list.xml @ 2:c96bef0643dc draft

Uploaded v0.0.3 again to revert upload of wrong file. Sigh.
author peterjc
date Mon, 06 May 2013 14:05:13 -0400
parents baf7031d470e
children 6aae6bc0802d
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/plotting/venn_list.xml	Mon May 06 14:05:13 2013 -0400
@@ -0,0 +1,113 @@
+<tool id="venn_list" name="Venn Diagram" version="0.0.3">
+  <description>from lists</description>
+  <command interpreter="python">
+venn_list.py
+#if $universe.type_select=="implicit":
+  - -
+#else:
+  $main $main.ext
+#end if
+"$main_lab"
+#for $s in $sets:
+  $s.set $s.set.ext "$s.lab"
+#end for
+$PDF</command>
+  <inputs>
+    <param name="main_lab" size="30" type="text" value="Venn Diagram" label="Plot title"/>
+    <conditional name="universe">
+       <param name="type_select" type="select" label="Implicit or explicit full ID list?">
+         <option value="explicit">Explicit</option>
+         <option value="implicit">Implicit (use union of sets below)</option>
+       </param>
+       <when value="explicit">
+           <param name="main" type="data" format="tabular,fasta,fastq,sff" label="Full dataset (with all identifiers)" help="Tabular file (uses column one), FASTA, FASTQ or SFF file."/>
+       </when>
+       <when value="implicit"/>
+    </conditional>
+    <repeat name="sets" min="1" max="3" title="Sets">
+      <param name="set" type="data" format="tabular,fasta,fastq,sff" label="Members of set" help="Tabular file (uses column one), FASTA, FASTQ or SFF file."/>
+      <param name="lab" size="30" type="text" value="Group" label="Caption for set"/>
+    </repeat>
+  </inputs>
+  <outputs>
+    <data format="pdf" name="PDF" />
+  </outputs>
+  <requirements>
+    <requirement type="python-module">rpy</requirement>
+    <requirement type="python-module">Bio</requirement>
+  </requirements>
+  <tests>
+    <!-- Doesn't seem to work properly, manages to get two sets, both
+         with same FASTA file, but second with default "Group" label.
+    <test>
+      <param name="type_select" value="explicit"/>
+      <param name="main" value="venn_list.tabular" ftype="tabular"/>
+      <param name="main_lab" value="Some Proteins"/>
+      <param name="set" value="rhodopsin_proteins.fasta"/>
+      <param name="lab" value="Rhodopsins"/>
+      <output name="PDF" file="venn_list1.pdf" ftype="pdf"/>
+    </test>
+    -->
+    <!-- Can't use more than one repeat value in tests (yet)
+    <test>
+      <param name="type_select" value="explicit"/>
+      <param name="main" value="venn_list.tabular" ftype="tabular"/>
+      <param name="main_lab" value="Some Proteins"/>
+      <param name="count" value="3"/>
+      <param name="set" value="rhodopsin_proteins.fasta"/>
+      <param name="lab" value="Rhodopsins"/>
+      <param name="set" value="four_human_proteins.fasta"/>
+      <param name="lab" value="Human"/>
+      <param name="set" value="blastp_four_human_vs_rhodopsin.tabular"/>
+      <param name="lab" value="Human vs Rhodopsin BLAST"/>
+      <output name="PDF" file="venn_list3.pdf" ftype="pdf"/>
+    </test>
+    -->
+  </tests>
+  <help>
+
+.. class:: infomark
+
+**TIP:** If your data is in tabular files, the identifier is assumed to be in column one.
+
+**What it does**
+
+Draws Venn Diagram for one, two or three sets (as a PDF file).
+
+You must supply one, two or three sets of identifiers -- corresponding
+to one, two or three circles on the Venn Diagram.
+
+In general you should also give the full list of all the identifiers
+explicitly. This is used to calculate the number of identifers outside
+the circles (and check the identifiers in the other files match up).
+The full list can be omitted by implicitly taking the union of the
+category sets. In this case, the count outside the categories (circles)
+will always be zero.
+
+The identifiers can be taken from the first column of a tabular file
+(e.g. query names in BLAST tabular output, or signal peptide predictions
+after filtering, etc), or from a sequence file (FASTA, FASTQ, SFF).
+
+For example, you may have a set of NGS reads (as a FASTA, FASTQ or SFF
+file), and the results of several different read mappings (e.g. to
+different references) as tabular files (filtered to have just the mapped
+reads). You could then show the different mappings (and their overlaps)
+as a Venn Diagram, and the outside count would be the unmapped reads.
+
+**Citations**
+
+The Venn Diagrams are drawn using Gordon Smyth's limma package from
+R/Bioconductor, http://www.bioconductor.org/
+
+The R library is called from Python via rpy, http://rpy.sourceforge.net/
+
+This tool uses Biopython to read SFF files. If you use this tool with
+SFF files in scientific work leading to a publication, please cite the
+Biopython application note:
+
+Cock et al 2009. Biopython: freely available Python tools for computational
+molecular biology and bioinformatics. Bioinformatics 25(11) 1422-3.
+http://dx.doi.org/10.1093/bioinformatics/btp163 pmid:19304878.
+
+  </help>
+</tool>