Mercurial > repos > peterjc > venn_list
changeset 3:6aae6bc0802d draft
Uploaded v0.0.6, basic unit test, MIT licence, RST README, citation information, development moved to GitHub
author | peterjc |
---|---|
date | Wed, 18 Sep 2013 06:19:51 -0400 |
parents | c96bef0643dc |
children | 991342eca214 |
files | test-data/rhodopsin_proteins.fasta test-data/venn_list.tabular test-data/venn_list1.pdf tools/plotting/venn_list.py tools/plotting/venn_list.rst tools/plotting/venn_list.txt tools/plotting/venn_list.xml |
diffstat | 7 files changed, 175 insertions(+), 88 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/rhodopsin_proteins.fasta Wed Sep 18 06:19:51 2013 -0400 @@ -0,0 +1,43 @@ +>gi|57163783|ref|NP_001009242.1| rhodopsin [Felis catus] +MNGTEGPNFYVPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRT +PLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVC +KPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVV +HFTIPMIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQG +SNFGPIFMTLPAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTGSKTETSQVAPA + +>gi|3024260|sp|P56514.1|OPSD_BUFBU RecName: Full=Rhodopsin +MNGTEGPNFYIPMSNKTGVVRSPFEYPQYYLAEPWQYSILCAYMFLLILLGFPINFMTLYVTIQHKKLRT +PLNYILLNLAFANHFMVLCGFTVTMYSSMNGYFILGATGCYVEGFFATLGGEIALWSLVVLAIERYVVVC +KPMSNFRFSENHAVMGVAFTWIMALSCAVPPLLGWSRYIPEGMQCSCGVDYYTLKPEVNNESFVIYMFVV +HFTIPLIIIFFCYGRLVCTVKEAAAQQQESATTQKAEKEVTRMVIIMVVFFLICWVPYASVAFFIFSNQG +SEFGPIFMTVPAFFAKSSSIYNPVIYIMLNKQFRNCMITTLCCGKNPFGEDDASSAATSKTEASSVSSSQ +VSPA + +>gi|283855846|gb|ADB45242.1| rhodopsin [Cynopterus brachyotis] +VPFSNKTGVVRSPFEHPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLA +VADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGE +NHAIMGLALTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIF +FCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGSNFGPIFMTL +PAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS + +>gi|283855823|gb|ADB45229.1| rhodopsin [Myotis pilosus] +VPFSNKTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPINFLTLYVTVQHKKLRTPLNYILLNLA +VANLFMVFGGFTTTLYTSMHGYFVFGATGCNLEGFFATLGGEIALWSLVVLAIERYVVVCKPMSNFRFGE +NHAIMGLAFTWVMALACAAPPLAGWSRYIPEGMQCSCGIDYYTLKPEVNNESFVIYMFVVHFTIPMIVIF +FCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVVAFLICWLPYASVAFYIFTHQGSNFGPVFMTI +PAFFAKSSSIYNPVIYIMMNKQFRNCMLTTLCCGKNPLGDDEASTTAS + +>gi|223523|prf||0811197A rhodopsin [Bos taurus] +MNGTEGPNFYVPFSNKTGVVRSPFEAPQYYLAEPWQFSMLAAYMFLLIMLGFPINFLTLYVTVQHKKLRT +PLNYILLNLAVADLFMVFGGFTTTLYTSLHGYFVFGPTGCNLEGFFATLGGEIALWSLVVLAIERYVVVC +KPMSNFRFGENHAIMGVAFTWVMALACAAPPLVGWSRYIPEGMQCSCGIDYTPHEETNNESFVIYMFVVH +FIIPLIVIFFCYGQLVFTVKEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWLPYAGVAFYIFTHQGS +DFGPIFMTIPAFFAKTSAVYNPVIYIMMNKQFRNCMVTTLCCGKNPLGDDEASTTVSKTETSQVAPA + +>gi|12583665|dbj|BAB21486.1| fresh water form rod opsin [Conger myriaster] +MNGTEGPNFYIPMSNATGVVRSPFEYPQYYLAEPWAFSALSAYMFFLIIAGFPINFLTLYVTIEHKKLRT +PLNYILLNLAVADLFMVFGGFTTTMYTSMHGYFVFGPTGCNIEGFFATLGGEIALWCLVVLAIERWMVVC +KPVTNFRFGESHAIMGVMVTWTMALACALPPLFGWSRYIPEGLQCSCGIDYYTRAPGINNESFVIYMFTC +HFSIPLAVISFCYGRLVCTVKEAAAQQQESETTQRAEREVTRMVVIMVISFLVCWVPYASVAWYIFTHQG +STFGPIFMTIPSFFAKSSALYNPMIYICMNKQFRHCMITTLCCGKNPFEEEDGASATSSKTEASSVSSSS +VSPA
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/venn_list.tabular Wed Sep 18 06:19:51 2013 -0400 @@ -0,0 +1,10 @@ +gi|57163783|ref|NP_001009242.1| +gi|3024260|sp|P56514.1|OPSD_BUFBU +gi|283855846|gb|ADB45242.1| +gi|283855823|gb|ADB45229.1| +gi|223523|prf||0811197A +gi|12583665|dbj|BAB21486.1| +gi|155008482|gb|ABS89278.1| +gi|4887221|gb|AAD32241.1| +gi|4506527|ref|NP_000530.1| +gi|20465209|gb|AAM11432.1|
--- a/tools/plotting/venn_list.py Mon May 06 14:05:13 2013 -0400 +++ b/tools/plotting/venn_list.py Wed Sep 18 06:19:51 2013 -0400 @@ -5,7 +5,7 @@ (formerly SCRI), UK. All rights reserved. See accompanying text file for licence details (MIT/BSD style). -This is version 0.0.3 of the script. +This is version 0.0.4 of the script. """ @@ -46,8 +46,9 @@ def load_ids(filename, filetype): if filetype=="tabular": for line in open(filename): - if not line.startswith("#"): - yield line.rstrip("\n").split("\t",1)[0] + line = line.rstrip("\n") + if line and not line.startswith("#"): + yield line.split("\t",1)[0] elif filetype=="fasta": for line in open(filename): if line.startswith(">"):
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/plotting/venn_list.rst Wed Sep 18 06:19:51 2013 -0400 @@ -0,0 +1,97 @@ +Galaxy tool to draw a Venn Diagram with up to 3 sets +==================================================== + +This tool is copyright 2011 by Peter Cock, The James Hutton Institute +(formerly SCRI, Scottish Crop Research Institute), UK. All rights reserved. +See the licence text below. + +This tool is a short Python script (using both the Galaxy and Biopython library +functions) to extract ID lists from tabular, FASTA, FASTQ or SFF files to build +sets, which are then drawn using the R limma package function vennDiagram +(called from Python using rpy). + +There are just two files to install: + +* venn_list.py (the Python script) +* venn_list.xml (the Galaxy tool definition) + +The suggested location is in the Galaxy folder tools/plotting next to other +graph drawing tools. + +You will also need to install Biopython 1.54 or later, and the R/Bioconductor +pacakge limma. You should already have rpy installed for other Galaxy tools. + +You will also need to modify the tools_conf.xml file to tell Galaxy to offer the +tool. The suggested location is in the "Graph/Display Data" section. Simply add +the line:: + + <tool file="plotting/venn_list.xml" /> + +If you wish to run the unit tests, also add this to tools_conf.xml.sample and +move/copy the test-data files under Galaxy's test-data folder. Then:: + + ./run_functional_tests.sh -id venn_list + + +History +======= + +======= ====================================================================== +Version Changes +------- ---------------------------------------------------------------------- +v0.0.3 - Initial public release. +v0.0.4 - Ignore blank lines when loading IDs from tabular files +v0.0.5 - Explicit Galaxy error handling of return codes +v0.0.6 - Added unit tests. + - Use reStructuredText for this README file. + - Adopt standard MIT licence. + - Updated citation information (Cock et al. 2013). + - Development moved to GitHub, https://github.com/peterjc/pico_galaxy +======= ====================================================================== + + +Developers +========== + +This script and related tools are being developed on the following hg branch: +http://bitbucket.org/peterjc/galaxy-central/src/tools + +Development has now moved to a dedicated GitHub repository: +https://github.com/peterjc/pico_galaxy/tree/master/tools + +For making the "Galaxy Tool Shed" http://toolshed.g2.bx.psu.edu/ tarball use +the following command from the Galaxy root folder:: + + $ tar -czf venn_list.tar.gz tools/plotting/venn_list.* test-data/venn_list1.pdf test-data/venn_list.tabular test-data/rhodopsin_proteins.fasta + +Check this worked:: + + $ tar -tzf venn_list.tar.gz + tools/plotting/venn_list.py + tools/plotting/venn_list.rst + tools/plotting/venn_list.xml + test-data/venn_list1.pdf + test-data/venn_list.tabular + test-data/rhodopsin_proteins.fasta + + +Licence (MIT) +============= + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE.
--- a/tools/plotting/venn_list.txt Mon May 06 14:05:13 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,75 +0,0 @@ -Galaxy tool to draw a Venn Diagram with up to 3 sets -==================================================== - -This tool is copyright 2011 by Peter Cock, The James Hutton Institute -(formerly SCRI, Scottish Crop Research Institute), UK. All rights reserved. -See the licence text below. - -This tool is a short Python script (using both the Galaxy and Biopython library -functions) to extract ID lists from tabular, FASTA, FASTQ or SFF files to build -sets, which are then drawn using the R limma package function vennDiagram -(called from Python using rpy). - -There are just two files to install: - -* venn_list.py (the Python script) -* venn_list.xml (the Galaxy tool definition) - -The suggested location is in the Galaxy folder tools/plotting next to other -graph drawing tools. - -You will also need to modify the tools_conf.xml file to tell Galaxy to offer the -tool. The suggested location is in the "Graph/Display Data" section. Simply add -the line: - -<tool file="plotting/venn_list.xml" /> - -You will also need to install Biopython 1.54 or later, and the R/Bioconductor -pacakge limma. You should already have rpy installed for other Galaxy tools. - - -History -======= - -v0.0.3 - Initial public release. - - -Developers -========== - -This script and related tools are being developed on the following hg branch: -http://bitbucket.org/peterjc/galaxy-central/src/tools - -For making the "Galaxy Tool Shed" http://community.g2.bx.psu.edu/ tarball use -the following command from the Galaxy root folder: - -tar -czf venn_list.tar.gz tools/plotting/venn_list.* - -Check this worked: - -$ tar -tzf venn_list.tar.gz -tools/plotting/venn_list.py -tools/plotting/venn_list.txt -tools/plotting/venn_list.xml - - -Licence (MIT/BSD style) -======================= - -Permission to use, copy, modify, and distribute this software and its -documentation with or without modifications and for any purpose and -without fee is hereby granted, provided that any copyright notices -appear in all copies and that both those copyright notices and this -permission notice appear in supporting documentation, and that the -names of the contributors or copyright holders not be used in -advertising or publicity pertaining to distribution of the software -without specific prior permission. - -THE CONTRIBUTORS AND COPYRIGHT HOLDERS OF THIS SOFTWARE DISCLAIM ALL -WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL THE -CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT -OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS -OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE -OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE -OR PERFORMANCE OF THIS SOFTWARE.
--- a/tools/plotting/venn_list.xml Mon May 06 14:05:13 2013 -0400 +++ b/tools/plotting/venn_list.xml Wed Sep 18 06:19:51 2013 -0400 @@ -1,5 +1,9 @@ -<tool id="venn_list" name="Venn Diagram" version="0.0.3"> +<tool id="venn_list" name="Venn Diagram" version="0.0.6"> <description>from lists</description> + <requirements> + <requirement type="python-module">rpy</requirement> + <requirement type="python-module">Bio</requirement> + </requirements> <command interpreter="python"> venn_list.py #if $universe.type_select=="implicit": @@ -12,6 +16,11 @@ $s.set $s.set.ext "$s.lab" #end for $PDF</command> + <stdio> + <!-- Anything other than zero is an error --> + <exit_code range="1:" /> + <exit_code range=":-1" /> + </stdio> <inputs> <param name="main_lab" size="30" type="text" value="Venn Diagram" label="Plot title"/> <conditional name="universe"> @@ -32,13 +41,9 @@ <outputs> <data format="pdf" name="PDF" /> </outputs> - <requirements> - <requirement type="python-module">rpy</requirement> - <requirement type="python-module">Bio</requirement> - </requirements> <tests> <!-- Doesn't seem to work properly, manages to get two sets, both - with same FASTA file, but second with default "Group" label. + with same FASTA file, but second with default "Group" label. --> <test> <param name="type_select" value="explicit"/> <param name="main" value="venn_list.tabular" ftype="tabular"/> @@ -47,7 +52,6 @@ <param name="lab" value="Rhodopsins"/> <output name="PDF" file="venn_list1.pdf" ftype="pdf"/> </test> - --> <!-- Can't use more than one repeat value in tests (yet) <test> <param name="type_select" value="explicit"/> @@ -101,9 +105,16 @@ The R library is called from Python via rpy, http://rpy.sourceforge.net/ -This tool uses Biopython to read SFF files. If you use this tool with -SFF files in scientific work leading to a publication, please cite the -Biopython application note: +If you use this Galaxy tool in work leading to a scientific publication please +cite: + +Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013). +Galaxy tools and workflows for sequence analysis with applications +in molecular plant pathology. PeerJ 1:e167 +http://dx.doi.org/10.7717/peerj.167 + +This tool uses Biopython to read and write SFF files, so you may also wish to +cite the Biopython application note (and Galaxy too of course): Cock et al 2009. Biopython: freely available Python tools for computational molecular biology and bioinformatics. Bioinformatics 25(11) 1422-3.