Mercurial > repos > cmonjeau > commet
changeset 0:a6beb4d4c417
Imported from capsule None
author | cmonjeau |
---|---|
date | Fri, 05 Jun 2015 11:41:26 -0400 |
parents | |
children | 0a09dd575d91 |
files | commet.py commet.xml commet_datatype.py datatypes_conf.xml prepare_commet.py prepare_commet.xml tool_dependencies.xml |
diffstat | 7 files changed, 379 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/commet.py Fri Jun 05 11:41:26 2015 -0400 @@ -0,0 +1,129 @@ +#!/usr/bin/env python +import sys, tempfile, subprocess, glob +import os, re, shutil, optparse +import zipfile, tarfile, gzip +from os.path import basename + +""" +WARNING : + +commet.py needs commet_exe binaries in your $PATH + +commet is available after compiling sources : + +http://github.com/pierrepeterlongo/commet + +or with the galaxy_commet package in the GenOuest toolshed (coming soon) + +NOTE : + +please add the line #!/usr/bin/env python in top of the Commet.py file if you've a bash error. + + +""" + +def __main__(): + + # arguments recuperation + parser = optparse.OptionParser() + parser.add_option("--input", dest="input") + parser.add_option("-k", dest="kmer") + parser.add_option("-t", dest="minsharedkmer") + parser.add_option("-l", dest="minlengthread") + parser.add_option("-n", dest="maxn") + parser.add_option("-e", dest="minshannonindex") + parser.add_option("-m", dest="maxreads") + + parser.add_option("--output") + parser.add_option("--output_vectors") + parser.add_option("--output_dendro") + parser.add_option("--output_logs") + parser.add_option("--output_matrix") + parser.add_option("--output_heatmap1") + parser.add_option("--output_heatmap2") + parser.add_option("--output_heatmap3") + + (options, args) = parser.parse_args() + + + # copy R script into the current dir + shutil.copy(os.environ['RSCRIPTS']+"/heatmap.r", os.getcwd()) + shutil.copy(os.environ['RSCRIPTS']+"/dendro.R", os.getcwd()) + + # remove the first line of the input file + commet_file = open(options.input, "r") + commet_file_clean = open("commet_clean_file", "w") + + # delete the first line + commet_file.readline() + for line in commet_file: + commet_file_clean.write(line) + + # close files + commet_file.close() + commet_file_clean.close() + + # edit the command line + cmd_line=[] + cmd_line.append("Commet.py") + cmd_line.extend(["commet_clean_file","-b",os.environ['BINARIES'],"-k",options.kmer,"-t",options.minsharedkmer,"-l",options.minlengthread,"-e",options.minshannonindex]) + + # add options + if options.maxn: + + #cmd_line += ' -n '+options.maxn+' -m '+options.maxreads+' > '+options.output+' 2>>'+options.output + cmd_line.extend(["-n",options.maxn,"-m",options.maxreads]) + #else: + #cmd_line += ' > '+options.output+' 2>>'+options.output + + # execute job + p=subprocess.Popen(cmd_line, + stdout=subprocess.PIPE,stderr=subprocess.PIPE) + + stdoutput, stderror = p.communicate() + + # log file + logfile=open(options.output, "w") + logfile.write("[COMMAND LINE]"+' '.join(cmd_line)+"\n\n") + logfile.write(str(stdoutput)) + logfile.write(str(stderror)) + logfile.close() + + # copy .bv files inside a .bv archive + tmp_output_dir=os.getcwd()+"/output_commet/" + os.chdir(tmp_output_dir) + + # create zip outputs + mybvzipfile=zipfile.ZipFile(tmp_output_dir+'bv.zip.temp', 'w') + mylogzipfile=zipfile.ZipFile(tmp_output_dir+'log.zip.temp', 'w') + mymatrixzipfile=zipfile.ZipFile(tmp_output_dir+'matrix.zip.temp', 'w') + + # write files into the specific archive + list_files = glob.glob(tmp_output_dir+'/*') + for i in list_files: + + if re.search("\.bv$", i): + mybvzipfile.write(os.path.basename(i)) + if re.search("\.log$", i): + mylogzipfile.write(os.path.basename(i)) + if re.search(".csv$", i): + mymatrixzipfile.write(os.path.basename(i)) + + # close zip files + mybvzipfile.close() + mylogzipfile.close() + mymatrixzipfile.close() + + # return the archives + shutil.move(tmp_output_dir+'bv.zip.temp', options.output_vectors) + shutil.move(tmp_output_dir+'log.zip.temp', options.output_logs) + shutil.move(tmp_output_dir+'matrix.zip.temp', options.output_matrix) + + # outputs + shutil.move(tmp_output_dir+'dendrogram_normalized.png', options.output_dendro) + shutil.move(tmp_output_dir+'heatmap_normalized.png', options.output_heatmap1) + shutil.move(tmp_output_dir+'heatmap_percentage.png', options.output_heatmap2) + shutil.move(tmp_output_dir+'heatmap_plain.png', options.output_heatmap3) + +if __name__ == "__main__": __main__() +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/commet.xml Fri Jun 05 11:41:26 2015 -0400 @@ -0,0 +1,103 @@ +<tool id="commet" name="commet" version="24.7.14"> + <description>COmpare Multiple METagenomes</description> + <requirements> + <requirement type="package" version="24.7.14">commet</requirement> + </requirements> +<command interpreter="python"> +commet.py +--input $input +-k $kmer +-t $minsharedkmer +-l $minlengthread +-e $minshannonindex +#if str( $options_advanced.options_advanced_selector ) == "advanced" +-m $options_advanced.maxreads +-n $options_advanced.maxn +#end if +--output $output +--output_vectors $output_vectors +--output_dendro $output_dendro +--output_logs $output_logs +--output_matrix $output_matrix +--output_heatmap1 $output_heatmap1 +--output_heatmap2 $output_heatmap2 +--output_heatmap3 $output_heatmap3 +</command> + + <inputs> + <!-- Input data files --> + <param name="input" type="data" format="commet" label="Read sets" help="input read sets a line = a set composed by “set_name: read_file; read_file...“. Generate with Prepare commet tool" /> + <param name="kmer" type="integer" label="Size of kmers" value="33" help="Set the length of used kmers." /> + <param name="minsharedkmer" type="integer" label="Mini shared kmers" value="2" help="Minimal number of shared kmers." /> + <param name="minlengthread" type="integer" label="Read mini length" value="0" help="Minimal length a read should have to be kept." /> + <param name="minshannonindex" type="float" label="Mini Shannon index" value="0" help="Minimal Shannon index a read should have to be kept. Float in [0,2.32]" /> + <conditional name="options_advanced"> + <param name="options_advanced_selector" type="select" label="Reads filter options" help="by default, all reads are kept with no Ns limitation"> + <option value="default" selected="true">default</option> + <option value="advanced">advanced</option> + </param> + <when value="advanced"> + <param name="maxreads" type="integer" value="600" label="Maximum number of selected reads in sets" help="Maximum number of selected reads in sets. If a set is composed by 3 read files, and this option = 600, then the first 200 reads from each read file will be treated" /> + <param name="maxn" type="integer" value="5" label="Read maxi number of Ns" help="Maximal number of Ns a read should contain to be kept." /> + </when> + </conditional> + </inputs> + + <outputs> + <data format="txt" name="output" label="${tool.name} on ${on_string}: commet.log" /> + <data format="zip" name="output_vectors" label="${tool.name} on ${on_string}: vector.zip" /> + <data format="zip" name="output_logs" label="${tool.name} on ${on_string}: logs.zip" /> + <data format="png" name="output_dendro" label="${tool.name} on ${on_string}: dendrogram.png" /> + <data format="zip" name="output_matrix" label="${tool.name} on ${on_string}: matrix.zip" /> + <data format="png" name="output_heatmap1" label="${tool.name} on ${on_string}: heatmap_normalized.png" /> + <data format="png" name="output_heatmap2" label="${tool.name} on ${on_string}: heatmap_percentage.png" /> + <data format="png" name="output_heatmap3" label="${tool.name} on ${on_string}: heatmap_plain.png" /> + </outputs> + <help> + +**Description** + +COMMET (COmpare Multiple METagenomes”) provides a global similarity overview between all datasets of a large metagenomic project. + +Directly from non-assembled reads, all against all comparisons are performed through an efficient indexing strategy. Then, results are stored as bit vectors, a compressed representation of read files, that can be used to further combine read subsets by common logical operations. Finally, COMMET computes a clusterization of metagenomic datasets, which is visualized by dendrogram and heatmaps. + + +------- + +**Web site** + +http://colibread.inria.fr/commet/ + +------- + +**Integrated by** + +Yvan Le Bras and Cyril Monjeaud + +GenOuest Bio-informatics Core Facility + +UMR 6074 IRISA INRIA-CNRS-UR1 Rennes (France) + +support@genouest.org + +If you use this tool in Galaxy, please cite : + +`Y. Le Bras, A. Roult, C. Monjeaud, M. Bahin, O. Quenez, C. Heriveau, A. Bretaudeau, O. Sallou, O. Collin, Towards a Life Sciences Virtual Research Environment : an e-Science initiative in Western France. JOBIM 2013. <https://www.e-biogenouest.org/resources/128>`_ + + + </help> +<citations> +<citation type="doi">10.1186/1471-2105-13-S19-S10</citation> +<citation type="bibtex">@INPROCEEDINGS{JOBIM2013, + author = {Le Bras, Y. and ROULT, A. and Monjeaud, C. and Bahin, M. and Quenez, O. and Heriveau, C. and Bretaudeau, A. and Sallou, O. and Collin, O.}, + title = {Towards a Life Sciences Virtual Research Environment: An e-Science initiative in Western France}, + booktitle = {JOBIM 2013 Proceedings}, + year = {2013}, + url = {https://www.e-biogenouest.org/resources/128}, + pages = {97-106} + } +</citation> +</citations> + +</tool> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/commet_datatype.py Fri Jun 05 11:41:26 2015 -0400 @@ -0,0 +1,42 @@ + +""" +Commet input file datatype +""" + + +import logging, os, os.path, sys, time, tempfile, shutil, string, glob, re + +from galaxy.datatypes.sniff import * +from galaxy.datatypes import data +from galaxy.datatypes.metadata import MetadataElement +from galaxy.datatypes.xml import GenericXml + +log = logging.getLogger(__name__) + + +class Commet( data.Text ): + """ + Resource Description Framework format (http://www.w3.org/RDF/). + """ + file_ext = "commet" + + def sniff( self, filename ): + """ + Returns false and the user must manually set. + """ + with open( filename ) as handle: + first_line = handle.readline() + if first_line.startswith('//commet input file//'): + return True + + return False + + def set_peek( self, dataset, is_multi_byte=False ): + """Set the peek and blurb text""" + if not dataset.dataset.purged: + dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) + dataset.blurb = 'Commet input data' + else: + dataset.peek = 'file does not exist' + dataset.blurb = 'file purged from disk' +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/datatypes_conf.xml Fri Jun 05 11:41:26 2015 -0400 @@ -0,0 +1,12 @@ +<?xml version="1.0"?> +<datatypes> + <datatype_files> + <datatype_file name="commet_datatype.py"/> + </datatype_files> + <registration> + <datatype extension="commet" type="galaxy.datatypes.commet_datatype:Commet" display_in_upload="true"/> + </registration> + <sniffers> + <sniffer type="galaxy.datatypes.commet_datatype:Commet"/> + </sniffers> +</datatypes>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/prepare_commet.py Fri Jun 05 11:41:26 2015 -0400 @@ -0,0 +1,26 @@ +import sys, os, re + +""" + +Created by Cyril Monjeaud +Cyril.Monjeaud@irisa.fr + +""" + +def __main__(): + + # open the outpt file + read_set=open(sys.argv[1], 'w') + read_set.write("//commet input file//\n") + + # write the files path + i = 2 + while i < len(sys.argv): + read_set.write(sys.argv[i+1]+":") + read_set.write(sys.argv[i].replace(",", ";")+"\n") + i = i+2 + + # close output file + read_set.close() + +if __name__ == "__main__": __main__()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/prepare_commet.xml Fri Jun 05 11:41:26 2015 -0400 @@ -0,0 +1,61 @@ +<tool id="preparecommet" name="Prepare commet" version="1.0"> + <description>input sets file</description> +<command interpreter="python"> +prepare_commet.py +${read_sets} +#for $set in $sets + ${set.reads} + "${set.setname}" +#end for + +</command> + + <inputs> + <!-- Input data files --> + <repeat name="sets" title="Read sets" min="1"> + <param name="reads" type="data" multiple="true" format="fasta, fasta.gz, fastq, fastq.gz" label="Dataset"/> + <param name="setname" type="text" label="Set name" value="set_name"/> + </repeat> + </inputs> + + <outputs> + <data format="commet" name="read_sets" label="read_sets : ${tool.name} on ${on_string}" /> + </outputs> + <help> + +**Description** + +Prepare a configuration file with sets for commet tool + +------- + +**Created and integrated by** + +Yvan Le Bras and Cyril Monjeaud + +GenOuest Bio-informatics Core Facility + +UMR 6074 IRISA INRIA-CNRS-UR1 Rennes (France) + +support@genouest.org + +If you use this tool in Galaxy, please cite : + +`Y. Le Bras, A. Roult, C. Monjeaud, M. Bahin, O. Quenez, C. Heriveau, A. Bretaudeau, O. Sallou, O. Collin, Towards a Life Sciences Virtual Research Environment : an e-Science initiative in Western France. JOBIM 2013. <https://www.e-biogenouest.org/resources/128>`_ + + </help> + +<citations> +<citation type="bibtex">@INPROCEEDINGS{JOBIM2013, + author = {Le Bras, Y. and ROULT, A. and Monjeaud, C. and Bahin, M. and Quenez, O. and Heriveau, C. and Bretaudeau, A. and Sallou, O. and Collin, O.}, + title = {Towards a Life Sciences Virtual Research Environment: An e-Science initiative in Western France}, + booktitle = {JOBIM 2013 Proceedings}, + year = {2013}, + url = {https://www.e-biogenouest.org/resources/128}, + pages = {97-106} + } +</citation> +</citations> + +</tool> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Fri Jun 05 11:41:26 2015 -0400 @@ -0,0 +1,6 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="commet" version="24.7.14"> + <repository changeset_revision="73a820a5e919" name="package_commet" owner="cmonjeau" prior_installation_required="True" toolshed="https://toolshed.g2.bx.psu.edu" /> + </package> +</tool_dependency>