# HG changeset patch # User crs4 # Date 1378743377 14400 # Node ID 9c8ffce71f7c15f2e71a56ef9772a87bd05a2bf3 Uploaded diff -r 000000000000 -r 9c8ffce71f7c COPYING --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/COPYING Mon Sep 09 12:16:17 2013 -0400 @@ -0,0 +1,23 @@ +Copyright © 2013 CRS4 Srl. http://www.crs4.it/ +Created by: +Andrea Pinna +Nicola Soranzo + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be included +in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff -r 000000000000 -r 9c8ffce71f7c Galaxy-Workflow-Glimmer.ga --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Galaxy-Workflow-Glimmer.ga Mon Sep 09 12:16:17 2013 -0400 @@ -0,0 +1,175 @@ +{ + "a_galaxy_workflow": "true", + "annotation": "glimmer-from-scratch", + "format-version": "0.1", + "name": "Glimmer", + "steps": { + "0": { + "annotation": "", + "id": 0, + "input_connections": {}, + "inputs": [ + { + "description": "", + "name": "Genome sequence" + } + ], + "name": "Input dataset", + "outputs": [], + "position": { + "left": 158, + "top": 200 + }, + "tool_errors": null, + "tool_id": null, + "tool_state": "{\"name\": \"Genome sequence\"}", + "tool_version": null, + "type": "data_input", + "user_outputs": [] + }, + "1": { + "annotation": "", + "id": 1, + "input_connections": { + "loSequence": { + "id": 0, + "output_name": "output" + } + }, + "inputs": [], + "name": "Long-ORFs", + "outputs": [ + { + "name": "logfile", + "type": "txt" + }, + { + "name": "loOutput", + "type": "glimmer_coords" + } + ], + "position": { + "left": 248, + "top": 336 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "long-orfs_wrapper", + "tool_state": "{\"loFixed\": \"\\\"False\\\"\", \"__page__\": 0, \"loMaxOverlap\": \"\\\"\\\"\", \"loMinLen\": \"\\\"\\\"\", \"loStartCodons\": \"\\\"\\\"\", \"loTransTable\": \"\\\"\\\"\", \"loNoHeader\": \"\\\"False\\\"\", \"loLengthOpt\": \"\\\"False\\\"\", \"loCutoff\": \"\\\"\\\"\", \"loStopCodons\": \"\\\"\\\"\", \"loLinear\": \"\\\"False\\\"\", \"loWithoutStops\": \"\\\"False\\\"\", \"loEntropy\": \"null\", \"loSequence\": \"null\"}", + "tool_version": "0.2", + "type": "tool", + "user_outputs": [] + }, + "2": { + "annotation": "", + "id": 2, + "input_connections": { + "exCoords": { + "id": 1, + "output_name": "loOutput" + }, + "exSequence": { + "id": 0, + "output_name": "output" + } + }, + "inputs": [], + "name": "Extract", + "outputs": [ + { + "name": "logfile", + "type": "txt" + }, + { + "name": "exOutput", + "type": "fasta" + } + ], + "position": { + "left": 659, + "top": 231 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "extract_wrapper", + "tool_state": "{\"__page__\": 0, \"exSequence\": \"null\", \"exCoords\": \"null\", \"exDir\": \"\\\"False\\\"\", \"exNoStop\": \"\\\"False\\\"\", \"exNoStart\": \"\\\"False\\\"\", \"exNoWrap\": \"\\\"False\\\"\", \"exMinLen\": \"\\\"\\\"\", \"ex2Fields\": \"\\\"False\\\"\"}", + "tool_version": "0.2", + "type": "tool", + "user_outputs": [] + }, + "3": { + "annotation": "", + "id": 3, + "input_connections": { + "biSequence": { + "id": 2, + "output_name": "exOutput" + } + }, + "inputs": [], + "name": "Build-ICM", + "outputs": [ + { + "name": "logfile", + "type": "txt" + }, + { + "name": "biIcm", + "type": "glimmer_icm" + } + ], + "position": { + "left": 177, + "top": 715 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "build-icm_wrapper", + "tool_state": "{\"__page__\": 0, \"biStopCodons\": \"\\\"\\\"\", \"biPeriod\": \"\\\"3\\\"\", \"biTransTable\": \"\\\"\\\"\", \"biNoStops\": \"\\\"False\\\"\", \"biDepth\": \"\\\"7\\\"\", \"biReverse\": \"\\\"False\\\"\", \"biSequence\": \"null\", \"biWidth\": \"\\\"12\\\"\"}", + "tool_version": "0.2", + "type": "tool", + "user_outputs": [] + }, + "4": { + "annotation": "", + "id": 4, + "input_connections": { + "glIcm": { + "id": 3, + "output_name": "biIcm" + }, + "glSequence": { + "id": 0, + "output_name": "output" + } + }, + "inputs": [], + "name": "Glimmer3", + "outputs": [ + { + "name": "logfile", + "type": "txt" + }, + { + "name": "glDetail", + "type": "txt" + }, + { + "name": "glPredict", + "type": "txt" + } + ], + "position": { + "left": 508, + "top": 715 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "glimmer3_wrapper", + "tool_state": "{\"__page__\": 0, \"glFirstCodon\": \"\\\"False\\\"\", \"glNoIndep\": \"\\\"False\\\"\", \"glIcm\": \"null\", \"glRbsPwm\": \"null\", \"glEntropy\": \"null\", \"glSeparateGenes\": \"\\\"False\\\"\", \"glOrfCoords\": \"null\", \"glMaxOverlap\": \"\\\"\\\"\", \"glIgnoreScoreLen\": \"\\\"\\\"\", \"glGcPercent\": \"\\\"\\\"\", \"glSequence\": \"null\", \"glGeneLen\": \"\\\"\\\"\", \"glStopCodons\": \"\\\"\\\"\", \"glExtend\": \"\\\"False\\\"\", \"glLinear\": \"\\\"False\\\"\", \"glTransTable\": \"\\\"\\\"\", \"glThreshold\": \"\\\"\\\"\", \"glStartCodons\": \"\\\"\\\"\", \"glStartProbs\": \"\\\"\\\"\"}", + "tool_version": "0.2", + "type": "tool", + "user_outputs": [] + } + } +} diff -r 000000000000 -r 9c8ffce71f7c anomaly_wrapper.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/anomaly_wrapper.py Mon Sep 09 12:16:17 2013 -0400 @@ -0,0 +1,68 @@ +# -*- coding: utf-8 -*- +""" +Glimmer --> anomaly +version 0.2 (andrea.pinna@crs4.it) +""" + +import optparse +import subprocess +import sys + +def __main__(): + # load arguments + print 'Parsing Anomaly input options...' + parser = optparse.OptionParser() + parser.add_option('--anSequence', dest='sequence', help='') + parser.add_option('--anCoords', dest='coords', help='') + parser.add_option('--anCheckFirstCodon', action='store_true', dest='check_first_codon', help='') + parser.add_option('--anCheckStopCodon', action='store_true', dest='check_stop_codon', help='') + parser.add_option('--anStartCodons', dest='start_codons', help='') + parser.add_option('--anStopCodons', dest='stop_codons', help='') + parser.add_option('--anOutput', dest='output', help='') + parser.add_option('--logfile', dest='logfile', help='') + (options, args) = parser.parse_args() + if len(args) > 0: + parser.error('Wrong number of arguments') + + # build Anomaly command to be executed + # sequence file + sequence = options.sequence + coords = options.coords + if options.start_codons: + start_codons = '-A %s' % (options.start_codons) + else: + start_codons = '' + if options.stop_codons: + stop_codons = '-Z %s' % (options.stop_codons) + else: + stop_codons = '' + if options.check_first_codon: + check_first_codon = '-s' + else: + check_first_codon = '' + if options.check_stop_codon: + check_stop_codon = '-t' + else: + check_stop_codon = '' + output = options.output + logfile = options.logfile + + # Build Anomaly command + cmd = 'anomaly %s %s %s %s %s %s > %s' % (start_codons, check_first_codon, check_stop_codon, stop_codons, sequence, coords, output) + print '\nAnomaly command to be executed: \n %s' % (cmd) + + print 'Executing Anomaly...' + if logfile: + log = open(logfile, 'w') + else: + log = sys.stdout + try: + subprocess.check_call(cmd, stdout=log, stderr=subprocess.STDOUT, shell=True) + finally: + if log != sys.stdout: + log.close() + print 'Anomaly executed!' + + +if __name__ == "__main__": + __main__() diff -r 000000000000 -r 9c8ffce71f7c anomaly_wrapper.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/anomaly_wrapper.xml Mon Sep 09 12:16:17 2013 -0400 @@ -0,0 +1,68 @@ + + + + glimmer + + + anomaly_wrapper.py --anSequence $anSequence --anCoords $anCoords + #if $anStartCodons + --anStartCodons="$anStartCodons" + #end if + #if $anCheckFirstCodon + --anCheckFirstCodon + #end if + #if $anCheckStopCodon + --anCheckStopCodon + #end if + #if $anStopCodons + --anStopCodons="$anStopCodons" + #end if + --anOutput $anOutput --logfile $logfile + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +Read DNA sequence in "sequence-file" and for each region specified by the coordinates in "coord-file", check whether the region represents a normal gene, i.e., it begins with a start codon, ends with a stop codon, and has no frame shifts. + +**License and citation** + +This Galaxy tool is Copyright © 2013 `CRS4 Srl.`_ and is released under the `MIT license`_. + +.. _CRS4 Srl.: http://www.crs4.it/ +.. _MIT license: http://opensource.org/licenses/MIT + +If you use this tool in Galaxy, please cite |Cuccuru2013|_. + +.. |Cuccuru2013| replace:: Cuccuru, G., Orsini, M., Pinna, A., Sbardellati, A., Soranzo, N., Travaglione, A., Uva, P., Zanetti, G., Fotia, G. (2013) Orione, a web-based framework for NGS analysis in microbiology. *Submitted* +.. _Cuccuru2013: http://orione.crs4.it/ + +This tool uses `Glimmer`_, which is licensed separately. Please cite |Delcher2007|_. + +.. _Glimmer: http://ccb.jhu.edu/software/glimmer/index.shtml +.. |Delcher2007| replace:: Delcher, A. L., Bratke, K. A., Powers, E. C., Salzberg, S. L. (2007) Identifying bacterial genes and endosymbiont DNA with Glimmer. *Bioinformatics* 23(6), 673-679 +.. _Delcher2007: http://bioinformatics.oxfordjournals.org/content/23/6/673 + + diff -r 000000000000 -r 9c8ffce71f7c build-icm_wrapper.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/build-icm_wrapper.py Mon Sep 09 12:16:17 2013 -0400 @@ -0,0 +1,80 @@ +# -*- coding: utf-8 -*- +""" +Glimmer --> build-icm +version 0.2 (andrea.pinna@crs4.it) +""" + +import optparse +import subprocess +import sys + +def __main__(): + # load arguments + print 'Parsing Build-ICM input options...' + parser = optparse.OptionParser() + parser.add_option('--biSequence', dest='sequence', help='') + parser.add_option('--biDepth', dest='depth', type='int', help='') + parser.add_option('--biNoStops', action='store_true', dest='no_stops', help='') + parser.add_option('--biPeriod', dest='period', type='int', help='') + parser.add_option('--biReverse', action='store_true', dest='reverse', help='') + parser.add_option('--biWidth', dest='width', type='int', help='') + parser.add_option('--biTransTable', dest='trans_table', type='int', help='') + parser.add_option('--biStopCodons', dest='stop_codons', help='') + parser.add_option('--biIcm', dest='output', help='') + parser.add_option('--logfile', dest='logfile', help='') + (options, args) = parser.parse_args() + if len(args) > 0: + parser.error('Wrong number of arguments') + + # build Build-ICM command to be executed + sequence = options.sequence + if options.depth is not None: + depth = '--depth %d' % (options.depth) + else: + depth = '' + if options.no_stops: + no_stops = '--no_stops' + else: + no_stops = '' + if options.period is not None: + period = '--period %d' % (options.period) + else: + period = '' + if options.reverse: + reverse = '--reverse' + else: + reverse = '' + if options.width is not None: + width = '--width %d' % (options.width) + else: + width = '' + if options.trans_table is not None: + trans_table = '--trans_table %d' % (options.trans_table) + else: + trans_table = '' + if options.stop_codons: + stop_codons = '--stop_codons %s' % (options.stop_codons) + else: + stop_codons = '' + output = options.output + logfile = options.logfile + + # Build Build-ICM command + cmd = 'build-icm %s %s %s %s %s %s %s %s < %s ' % (depth, no_stops, period, reverse, width, trans_table, stop_codons, output, sequence) + print '\nBuild-ICM command to be executed: \n %s' % (cmd) + + print 'Executing Build-ICM...' + if logfile: + log = open(logfile, 'w') + else: + log = sys.stdout + try: + subprocess.check_call(cmd, stdout=log, stderr=subprocess.STDOUT, shell=True) + finally: + if log != sys.stdout: + log.close() + print 'Build-ICM executed!' + + +if __name__ == "__main__": + __main__() diff -r 000000000000 -r 9c8ffce71f7c build-icm_wrapper.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/build-icm_wrapper.xml Mon Sep 09 12:16:17 2013 -0400 @@ -0,0 +1,92 @@ + + + + glimmer + + + build-icm_wrapper.py --biSequence $biSequence + #if str($biDepth) + --biDepth=$biDepth + #end if + #if $biNoStops + --biNoStops + #end if + #if str($biPeriod) + --biPeriod=$biPeriod + #end if + #if $biReverse + --biReverse + #end if + #if str($biWidth) + --biWidth=$biWidth + #end if + #if str($biTransTable) + --biTransTable=$biTransTable + #end if + #if $biStopCodons + --biStopCodons="$biStopCodons" + #end if + --biIcm $biIcm --logfile $logfile + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +A probability model of coding sequences, called an interpolated context model or ICM, must be built. This is done by the program *build-icm* from a set of training sequences. These sequences can be obtained in several ways: + +1) From known genes in the genome, e.g. genes identified by homology searches. +2) From long, non-overlapping ORFs in the genome as produced by the program *long-orfs*. +3) From genes in a highly similar species/strain. + + +**License and citation** + +This Galaxy tool is Copyright © 2013 `CRS4 Srl.`_ and is released under the `MIT license`_. + +.. _CRS4 Srl.: http://www.crs4.it/ +.. _MIT license: http://opensource.org/licenses/MIT + +If you use this tool in Galaxy, please cite |Cuccuru2013|_. + +.. |Cuccuru2013| replace:: Cuccuru, G., Orsini, M., Pinna, A., Sbardellati, A., Soranzo, N., Travaglione, A., Uva, P., Zanetti, G., Fotia, G. (2013) Orione, a web-based framework for NGS analysis in microbiology. *Submitted* +.. _Cuccuru2013: http://orione.crs4.it/ + +This tool uses `Glimmer`_, which is licensed separately. Please cite |Delcher2007|_. + +.. _Glimmer: http://ccb.jhu.edu/software/glimmer/index.shtml +.. |Delcher2007| replace:: Delcher, A. L., Bratke, K. A., Powers, E. C., Salzberg, S. L. (2007) Identifying bacterial genes and endosymbiont DNA with Glimmer. *Bioinformatics* 23(6), 673-679 +.. _Delcher2007: http://bioinformatics.oxfordjournals.org/content/23/6/673 + + diff -r 000000000000 -r 9c8ffce71f7c datatypes_conf.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/datatypes_conf.xml Mon Sep 09 12:16:17 2013 -0400 @@ -0,0 +1,9 @@ + + + + + + + + + diff -r 000000000000 -r 9c8ffce71f7c extract_wrapper.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/extract_wrapper.py Mon Sep 09 12:16:17 2013 -0400 @@ -0,0 +1,77 @@ +# -*- coding: utf-8 -*- +""" +Glimmer --> extract +version 0.2 (andrea.pinna@crs4.it) +""" + +import optparse +import subprocess +import sys + +def __main__(): + # load arguments + print 'Parsing Extract input options...' + parser = optparse.OptionParser() + parser.add_option('--exSequence', dest='sequence', help='') + parser.add_option('--exCoords', dest='coords', help='') + parser.add_option('--ex2Fields', action='store_true', dest='twofields', help='') + parser.add_option('--exDir', action='store_true', dest='dir', help='') + parser.add_option('--exMinLen', dest='minlen', type='int', help='') + parser.add_option('--exNoStart', action='store_true', dest='nostart', help='') + parser.add_option('--exNoStop', action='store_true', dest='nostop', help='') + parser.add_option('--exNoWrap', action='store_true', dest='nowrap', help='') + parser.add_option('--exOutput', dest='output', help='') + parser.add_option('--logfile', dest='logfile', help='') + (options, args) = parser.parse_args() + if len(args) > 0: + parser.error('Wrong number of arguments') + + # build Extract command to be executed + sequence = options.sequence + coords = options.coords + if options.twofields: + twofields = '--2_fields' + else: + twofields = '' + if options.dir: + direct = '--dir' + else: + direct = '' + if options.minlen is not None: + minlen = '--minlen %d' % (options.minlen) + else: + minlen = '' + if options.nostart: + nostart = '--nostart' + else: + nostart = '' + if options.nostop: + nostop = '--nostop' + else: + nostop = '' + if options.nowrap: + nowrap = '--nowrap' + else: + nowrap = '' + output = options.output + logfile = options.logfile + + # Build Extract command + cmd = 'extract %s %s %s %s %s %s %s %s > %s' % (twofields, direct, minlen, nostart, nostop, nowrap, sequence, coords, output) + print '\nExtract command to be executed: \n %s' % (cmd) + + print 'Executing Extract...' + if logfile: + log = open(logfile, 'w') + else: + log = sys.stdout + try: + subprocess.check_call(cmd, stdout=log, stderr=subprocess.STDOUT, shell=True) + finally: + if log != sys.stdout: + log.close() + print 'Extract executed!' + + +if __name__ == "__main__": + __main__() diff -r 000000000000 -r 9c8ffce71f7c extract_wrapper.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/extract_wrapper.xml Mon Sep 09 12:16:17 2013 -0400 @@ -0,0 +1,80 @@ + + + + glimmer + + + extract_wrapper.py --exSequence $exSequence --exCoords $exCoords + #if $ex2Fields + --ex2Fields + #end if + #if $exDir + --exDir + #end if + #if str($exMinLen) + --exMinLen=$exMinLen + #end if + #if $exNoStart + --exNoStart + #end if + #if $exNoStop + --exNoStop + #end if + #if $exNoWrap + --exNoWrap + #end if + --exOutput $exOutput --logfile $logfile + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +Read FASTA-format *sequence-file* and extract from it the subsequences specified by *coords*. By default, *coords* is the name of a file containing lines of the form *tag* *start* *stop* [*frame*] ... +Coordinates are inclusive counting from 1, e.g. *1 3* represents the 1st 3 characters of the sequence. For each line the corresponding region of *sequence-file* is extracted and output (after reverse-complementing if necessary). + + +**License and citation** + +This Galaxy tool is Copyright © 2013 `CRS4 Srl.`_ and is released under the `MIT license`_. + +.. _CRS4 Srl.: http://www.crs4.it/ +.. _MIT license: http://opensource.org/licenses/MIT + +If you use this tool in Galaxy, please cite |Cuccuru2013|_. + +.. |Cuccuru2013| replace:: Cuccuru, G., Orsini, M., Pinna, A., Sbardellati, A., Soranzo, N., Travaglione, A., Uva, P., Zanetti, G., Fotia, G. (2013) Orione, a web-based framework for NGS analysis in microbiology. *Submitted* +.. _Cuccuru2013: http://orione.crs4.it/ + +This tool uses `Glimmer`_, which is licensed separately. Please cite |Delcher2007|_. + +.. _Glimmer: http://ccb.jhu.edu/software/glimmer/index.shtml +.. |Delcher2007| replace:: Delcher, A. L., Bratke, K. A., Powers, E. C., Salzberg, S. L. (2007) Identifying bacterial genes and endosymbiont DNA with Glimmer. *Bioinformatics* 23(6), 673-679 +.. _Delcher2007: http://bioinformatics.oxfordjournals.org/content/23/6/673 + + diff -r 000000000000 -r 9c8ffce71f7c glimmer3_wrapper.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/glimmer3_wrapper.py Mon Sep 09 12:16:17 2013 -0400 @@ -0,0 +1,164 @@ +# -*- coding: utf-8 -*- +""" +Glimmer --> glimmer3 +version 0.2 (andrea.pinna@crs4.it) +""" + +import optparse +import shutil +import subprocess +import sys + +def __main__(): + # load arguments + print 'Parsing input options...' + parser = optparse.OptionParser() + parser.add_option('--glSequence', dest='sequence', help='') + parser.add_option('--glIcm', dest='icm', help='') + #parser.add_option('--glPrefix', dest='prefix', help='') + parser.add_option('--glStartCodons', dest='start_codons', help='') + parser.add_option('--glRbsPwm', dest='rbs_pwm', help='') + parser.add_option('--glGcPercent', dest='gc_percent', type='float', help='') + parser.add_option('--glEntropy', dest='entropy', help='') + parser.add_option('--glFirstCodon', action='store_true', dest='first_codon', help='') + parser.add_option('--glGeneLen', dest='gene_len', type='int', help='') + parser.add_option('--glIgnore', dest='ignore', help='') + parser.add_option('--glLinear', action='store_true', dest='linear', help='') + parser.add_option('--glOrfCoords', dest='orf_coords', help='') + parser.add_option('--glSeparateGenes', action='store_true', dest='separate_genes', help='') + parser.add_option('--glMaxOverlap', dest='max_olap', type='int', help='') + parser.add_option('--glStartProbs', dest='start_probs', help='') + parser.add_option('--glIgnoreScoreLen', dest='ignore_score_len', type='int', help='') + parser.add_option('--glNoIndep', action='store_true', dest='no_indep', help='') + parser.add_option('--glThreshold', dest='threshold', type='int', help='') + parser.add_option('--glExtend', action='store_true', dest='extend', help='') + parser.add_option('--glTransTable', dest='trans_table', type='int', help='') + parser.add_option('--glStopCodons', dest='stop_codons', help='') + parser.add_option('--glDetail', dest='detail', help='') + parser.add_option('--glPredict', dest='predict', help='') + parser.add_option('--logfile', dest='logfile', help='') + (options, args) = parser.parse_args() + if len(args) > 0: + parser.error('Wrong number of arguments') + + # build Glimmer3 command to be executed + # sequence file + sequence = options.sequence + # icm file + icm = options.icm + # prefix (not needed) + prefix = 'prefix' + # start codons + if options.start_codons: + start_codons = '--start_codons %s' % (options.start_codons) + else: + start_codons = '' + # rbs_pwm + if options.rbs_pwm: + rbs_pwm = '--rbs_pwm %s' % (options.rbs_pwm) + else: + rbs_pwm = '' + # gc percentage + if options.gc_percent is not None: + gc_percent = '--gc_percent %s' % (options.gc_percent) + else: + gc_percent = '' + # entropy + if options.entropy: + entropy = "--entropy '%s'" % (options.entropy) + else: + entropy = '' + # first_codon + if options.first_codon: + first_codon = '--first_codon' + else: + first_codon = '' + # gene length + if options.gene_len is not None: + gene_len = '--gene_len %d' % (options.gene_len) + else: + gene_len = '' + # ignore + if options.ignore: + ignore = '--ignore %s' % (options.ignore) + else: + ignore = '' + # linear + if options.linear: + linear = '--linear' + else: + linear = '' + # orf_coords + if options.orf_coords: + orf_coords = '--orf_coords %s' % (options.orf_coords) + else: + orf_coords = '' + # separate genes + if options.separate_genes: + separate_genes = '--separate_genes' + else: + separate_genes = '' + # max overlap + if options.max_olap is not None: + max_olap = '--max_olap %d' % (options.max_olap) + else: + max_olap = '' + # start probs + if options.start_probs: + start_probs = '--start_probs %s' % (options.start_probs) + else: + start_probs = '' + # ignore score length + if options.ignore_score_len is not None: + ignore_score_len = '--ignore_score_len %d' % (options.ignore_score_len) + else: + ignore_score_len = '' + # no indep + if options.no_indep: + no_indep = '--no_indep' + else: + no_indep = '' + # threshold + if options.threshold is not None: + threshold = '--threshold %d' % (options.threshold) + else: + threshold = '' + # extend + if options.extend: + extend = '--extend' + else: + extend = '' + # trans table + if options.trans_table is not None: + trans_table = '--trans_table %d' % (options.trans_table) + else: + trans_table = '' + # stop codons + if options.stop_codons: + stop_codons = '--stop_codons %s' % (options.stop_codons) + else: + stop_codons = '' + logfile = options.logfile + + # Build Glimmer3 command + cmd = 'glimmer3 %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s' % (start_codons, rbs_pwm, gc_percent, entropy, first_codon, gene_len, ignore, linear, orf_coords, separate_genes, max_olap, start_probs, ignore_score_len, no_indep, threshold, extend, trans_table, stop_codons, sequence, icm, prefix) + print '\nGlimmer3 command to be executed: \n %s' % (cmd) + + print 'Executing Glimmer3...' + if logfile: + log = open(logfile, 'w') + else: + log = sys.stdout + try: + subprocess.check_call(cmd, stdout=log, stderr=subprocess.STDOUT, shell=True) + finally: + if log != sys.stdout: + log.close() + print 'Glimmer3 executed!' + + shutil.move(prefix + ".detail", options.detail) + shutil.move(prefix + ".predict", options.predict) + + +if __name__ == "__main__": + __main__() diff -r 000000000000 -r 9c8ffce71f7c glimmer3_wrapper.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/glimmer3_wrapper.xml Mon Sep 09 12:16:17 2013 -0400 @@ -0,0 +1,147 @@ + + + + glimmer + + + glimmer3_wrapper.py --glSequence $glSequence --glIcm $glIcm + #if $glStartCodons + --glStartCodons="$glStartCodons" + #end if + #if $glRbsPwm + --glRbsPwm=$glRbsPwm + #end if + #if str($glGcPercent) + --glGcPercent=$glGcPercent + #end if + #if $glEntropy.glEntropy_select == 'default' + --glEntropy='#' + #else if $glEntropy.glEntropy_select == 'fromfile' + --glEntropy=${glEntropy.glEntropyFile} + #end if + #if $glFirstCodon + --glFirstCodon + #end if + #if str($glGeneLen) + --glGeneLen=$glGeneLen + #end if + #if $glLinear + --glLinear + #end if + #if $glOrfCoords + --glOrfCoords=$glOrfCoords + #end if + #if $glSeparateGenes + --glSeparateGenes + #end if + #if str($glMaxOverlap) + --glMaxOverlap=$glMaxOverlap + #end if + #if $glStartProbs + --glStartProbs="$glStartProbs" + #end if + #if str($glIgnoreScoreLen) + --glIgnoreScoreLen=$glIgnoreScoreLen + #end if + #if $glNoIndep + --glNoIndep + #end if + #if str($glThreshold) + --glThreshold=$glThreshold + #end if + #if $glExtend + --glExtend + #end if + #if str($glTransTable) + --glTransTable=$glTransTable + #end if + #if $glStopCodons + --glStopCodons="$glStopCodons" + #end if + --glDetail $glDetail --glPredict $glPredict --logfile $logfile + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +Read DNA sequences and predict genes in them using an Interpolated Context Model (ICM). Output details go to file *detail* and predictions go to file *predict*. + +**License and citation** + +This Galaxy tool is Copyright © 2013 `CRS4 Srl.`_ and is released under the `MIT license`_. + +.. _CRS4 Srl.: http://www.crs4.it/ +.. _MIT license: http://opensource.org/licenses/MIT + +If you use this tool in Galaxy, please cite |Cuccuru2013|_. + +.. |Cuccuru2013| replace:: Cuccuru, G., Orsini, M., Pinna, A., Sbardellati, A., Soranzo, N., Travaglione, A., Uva, P., Zanetti, G., Fotia, G. (2013) Orione, a web-based framework for NGS analysis in microbiology. *Submitted* +.. _Cuccuru2013: http://orione.crs4.it/ + +This tool uses `Glimmer`_, which is licensed separately. Please cite |Delcher2007|_. + +.. _Glimmer: http://ccb.jhu.edu/software/glimmer/index.shtml +.. |Delcher2007| replace:: Delcher, A. L., Bratke, K. A., Powers, E. C., Salzberg, S. L. (2007) Identifying bacterial genes and endosymbiont DNA with Glimmer. *Bioinformatics* 23(6), 673-679 +.. _Delcher2007: http://bioinformatics.oxfordjournals.org/content/23/6/673 + + diff -r 000000000000 -r 9c8ffce71f7c long-orfs_wrapper.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/long-orfs_wrapper.py Mon Sep 09 12:16:17 2013 -0400 @@ -0,0 +1,110 @@ +# -*- coding: utf-8 -*- +""" +Glimmer --> long-orfs +version 0.2 (andrea.pinna@crs4.it) +""" + +import optparse +import subprocess +import sys + +def __main__(): + # load arguments + print 'Parsing Long-ORFs input options...' + parser = optparse.OptionParser() + parser.add_option('--loSequence', dest='sequence', help='') + parser.add_option('--loStartCodons', dest='start_codons', help='') + parser.add_option('--loEntropy', dest='entropy', help='') + parser.add_option('--loFixed', action='store_true', dest='fixed', help='') + parser.add_option('--loMinLen', dest='min_len', type='int', help='') + parser.add_option('--loIgnore', dest='ignore', help='') + parser.add_option('--loLinear', action='store_true', dest='linear', help='') + parser.add_option('--loLengthOpt', action='store_true', dest='length_opt', help='') + parser.add_option('--loNoHeader', action='store_true', dest='no_header', help='') + parser.add_option('--loMaxOverlap', dest='max_olap', type='int', help='') + parser.add_option('--loCutoff', dest='cutoff', type='float', help='') + parser.add_option('--loWithoutStops', action='store_true', dest='without_stops', help='') + parser.add_option('--loTransTable', dest='trans_table', type='int', help='') + parser.add_option('--loStopCodons', dest='stop_codons', help='') + parser.add_option('--loOutput', dest='output', help='') + parser.add_option('--logfile', dest='logfile', help='') + (options, args) = parser.parse_args() + if len(args) > 0: + parser.error('Wrong number of arguments') + + # build Long-ORFs command to be executed + sequence = options.sequence + if options.start_codons: + start_codons = '--start_codons %s' % (options.start_codons) + else: + start_codons = '' + if options.entropy: + entropy = '--entropy %s' % (options.entropy) + else: + entropy = '' + if options.fixed: + fixed = '--fixed' + else: + fixed = '' + if options.min_len is not None: + min_len = '--min_len %d' % (options.min_len) + else: + min_len = '' + if options.ignore: + ignore = '--ignore %s' % (options.ignore) + else: + ignore = '' + if options.linear: + linear = '--linear' + else: + linear = '' + if options.length_opt: + length_opt = '--length_opt' + else: + length_opt = '' + if options.no_header: + no_header = '--no_header' + else: + no_header = '' + if options.max_olap is not None: + max_olap = '--max_olap %d' % (options.max_olap) + else: + max_olap = '' + if options.cutoff is not None: + cutoff = '--cutoff %s' % (options.cutoff) + else: + cutoff = '' + if options.without_stops: + without_stops = '--without_stops' + else: + without_stops = '' + if options.trans_table is not None: + trans_table = '--trans_table %s' % (options.trans_table) + else: + trans_table = '' + if options.stop_codons: + stop_codons = '--stop_codons %s' % (options.stop_codons) + else: + stop_codons = '' + output = options.output + logfile = options.logfile + + # Build Long-ORFs command + cmd = 'long-orfs %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s' % (sequence, start_codons, entropy, fixed, min_len, ignore, linear, length_opt, no_header, max_olap, cutoff, without_stops, trans_table, stop_codons, output) + print '\nLong-ORFs command to be executed: \n %s' % (cmd) + + print 'Executing Long-ORFs...' + if logfile: + log = open(logfile, 'w') + else: + log = sys.stdout + try: + subprocess.check_call(cmd, stdout=log, stderr=subprocess.STDOUT, shell=True) + finally: + if log != sys.stdout: + log.close() + print 'Long-ORFs executed!' + + +if __name__ == "__main__": + __main__() diff -r 000000000000 -r 9c8ffce71f7c long-orfs_wrapper.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/long-orfs_wrapper.xml Mon Sep 09 12:16:17 2013 -0400 @@ -0,0 +1,107 @@ + + + + glimmer + + + long-orfs_wrapper.py --loSequence $loSequence + #if $loStartCodons + --loStartCodons="$loStartCodons" + #end if + #if $loEntropy + --loEntropy=$loEntropy + #end if + #if $loFixed + --loFixed + #end if + #if str($loMinLen) + --loMinLen=$loMinLen + #end if + #if $loLinear + --loLinear + #end if + #if $loLengthOpt + --loLengthOpt + #end if + #if $loNoHeader + --loNoHeader + #end if + #if str($loMaxOverlap) + --loMaxOverlap=$loMaxOverlap + #end if + #if str($loCutoff) + --loCutoff=$loCutoff + #end if + #if $loWithoutStops + --loWithoutStops + #end if + #if str($loTransTable) + --loTransTable=$loTransTable + #end if + #if $loStopCodons + --loStopCodons="$loStopCodons" + #end if + --loOutput $loOutput --logfile $logfile + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +This program identifies long, non-overlapping open reading frames (ORFs) in a DNA sequence file. These ORFs are very likely to contain genes, and can be used as a set of training sequences for Build-ICM. More specifically, among all ORFs longer than a minimum length, those that do not overlap any others are output. The start codon used for each ORF is the first possible one. The program, by default, automatically determines the value that maximizes the number of ORFs that are output. With the -t option, the initial set of candidate ORFs also can be filtered using entropy distance, which generally produces a larger, more accurate training set, particularly for high-GC-content genomes. + + +**License and citation** + +This Galaxy tool is Copyright © 2013 `CRS4 Srl.`_ and is released under the `MIT license`_. + +.. _CRS4 Srl.: http://www.crs4.it/ +.. _MIT license: http://opensource.org/licenses/MIT + +If you use this tool in Galaxy, please cite |Cuccuru2013|_. + +.. |Cuccuru2013| replace:: Cuccuru, G., Orsini, M., Pinna, A., Sbardellati, A., Soranzo, N., Travaglione, A., Uva, P., Zanetti, G., Fotia, G. (2013) Orione, a web-based framework for NGS analysis in microbiology. *Submitted* +.. _Cuccuru2013: http://orione.crs4.it/ + +This tool uses `Glimmer`_, which is licensed separately. Please cite |Delcher2007|_. + +.. _Glimmer: http://ccb.jhu.edu/software/glimmer/index.shtml +.. |Delcher2007| replace:: Delcher, A. L., Bratke, K. A., Powers, E. C., Salzberg, S. L. (2007) Identifying bacterial genes and endosymbiont DNA with Glimmer. *Bioinformatics* 23(6), 673-679 +.. _Delcher2007: http://bioinformatics.oxfordjournals.org/content/23/6/673 + + diff -r 000000000000 -r 9c8ffce71f7c tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Mon Sep 09 12:16:17 2013 -0400 @@ -0,0 +1,20 @@ + + + + + + http://ccb.jhu.edu/software/glimmer/glimmer302b.tar.gz + make -C src/ + + rm -f bin/test + + bin + $INSTALL_DIR/bin + + + $INSTALL_DIR/bin + + + + +