Mercurial > repos > greg > plant_tribes_gene_family_integrator
changeset 0:fa38de0b1f1a draft
Uploaded
author | greg |
---|---|
date | Thu, 08 Jun 2017 12:50:11 -0400 |
parents | |
children | 274f9159ffab |
files | .shed.yml gene_family_integrator.py gene_family_integrator.xml macros.xml plant_tribes_scaffolds.loc plant_tribes_scaffolds.loc.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test utils.py |
diffstat | 9 files changed, 404 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.shed.yml Thu Jun 08 12:50:11 2017 -0400 @@ -0,0 +1,14 @@ +name: plant_tribes_gene_family_integrator +owner: greg +description: | + Contains a tool that integrates de novo assembly sequences with scaffold gene family sequences. +homepage_url: https://github.com/dePamphilis/PlantTribes +long_description: | + Contains a tool that tool is one of the PlantTribes collection of automated modular analysis pipelines that + utilize objective classifications of complete protein sequences from sequenced plant genomes to perform + comparative evolutionary studies. This tool integrates classified post processed de novo transcriptome + assembly sequences with the scaffold gene family sequences. +remote_repository_url: https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/plant_tribes/gene_family_integrator +type: unrestricted +categories: +- Phylogenetics
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gene_family_integrator.py Thu Jun 08 12:50:11 2017 -0400 @@ -0,0 +1,32 @@ +#!/usr/bin/env python +import argparse +import os + +import utils + +OUTPUT_DIR = 'integratedGeneFamilies_dir' + +parser = argparse.ArgumentParser() +parser.add_argument('--orthogroup_faa', dest='orthogroup_faa', help='Directory of input fasta datasets') +parser.add_argument('--scaffold', dest='scaffold', help='Orthogroups or gene families proteins scaffold') +parser.add_argument('--method', dest='method', help='Protein clustering method') +parser.add_argument('--orthogroup_fna', dest='orthogroup_fna', default=None, help='Use correspong coding sequences') +parser.add_argument('--output', dest='output', help='Output dataset') +parser.add_argument('--output_dir', dest='output_dir', help='Output dataset file_path directory') + +args = parser.parse_args() + +# Build the command line. +cmd = 'GeneFamilyIntegrator' +cmd += ' --orthogroup_faa %s' % args.orthogroup_faa +cmd += ' --scaffold %s' % args.scaffold +cmd += ' --method %s' % args.method +if args.orthogroup_fna is not None: + cmd += ' --orthogroup_fna' + +# Run the command. +utils.run_command(cmd) + +# Handle outputs. +utils.move_directory_files(os.path.join(OUTPUT_DIR, 'orthogroups_fasta'), args.output_dir) +utils.write_html_output(args.output, 'Integrated gene family sequences', args.output_dir)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gene_family_integrator.xml Thu Jun 08 12:50:11 2017 -0400 @@ -0,0 +1,129 @@ +<tool id="plant_tribes_gene_family_integrator" name="GeneFamilyIntegrator" version="@WRAPPER_VERSION@.0"> + <description>integrates gene models in pre-computed orthologous gene family clusters with classified gene coding sequences</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements_gene_family_integrator" /> + <command detect_errors="exit_code"><![CDATA[ +#set input_format = $input_format_cond.input_format +#set scaffold = $input_format_cond.scaffold +#set method = $input_format_cond.method + +python $__tool_directory__/gene_family_integrator.py +--scaffold '$scaffold.fields.path' +--method $method +#if str($input_format) == 'ptortho': + --orthogroup_faa '$input_format_cond.input_ptortho.extra_files_path' + --output '$output_ptortho' + --output_dir '$output_ptortho.files_path' +#else: + ## str($input_format) == 'ptorthocs' + --orthogroup_faa '$input_format_cond.input_ptorthocs.extra_files_path' + #if str($input_format_cond.orthogroup_fna) == 'yes': + --orthogroup_fna 'true' + --output '$output_ptorthocs' + --output_dir '$output_ptorthocs.files_path' + #else: + --output '$output_ptortho' + --output_dir '$output_ptortho.files_path' + #end if +#end if + ]]></command> + <inputs> + <conditional name="input_format_cond"> + <param name="input_format" type="select" label="Classified orthogroup fasta files"> + <option value="ptortho">Proteins orthogroup fasta files</option> + <option value="ptorthocs">Protein and coding sequences orthogroup fasta files</option> + </param> + <when value="ptortho"> + <param name="input_ptortho" format="ptortho" type="data" label="Proteins orthogroup fasta files"> + <validator type="empty_extra_files_path" /> + </param> + <expand macro="param_scaffold" /> + <expand macro="param_method" /> + </when> + <when value="ptorthocs"> + <param name="input_ptorthocs" format="ptorthocs" type="data" label="Protein and coding sequences orthogroup fasta files"> + <validator type="empty_extra_files_path" /> + </param> + <expand macro="param_scaffold" /> + <expand macro="param_method" /> + <expand macro="param_orthogroup_fna" /> + </when> + </conditional> + </inputs> + <outputs> + <data name="output_ptortho" format="ptortho" label="${tool.name} (integrated gene family clusters) on ${on_string}"> + <filter>input_format_cond['input_format'] == 'ptortho' or (input_format_cond['input_format'] == 'ptorthocs' and input_format_cond['orthogroup_fna'] == 'no')</filter> + </data> + <data name="output_ptorthocs" format="ptorthocs" label="${tool.name} (integrated gene family clusters) on ${on_string}"> + <filter>input_format_cond['input_format'] == 'ptorthocs' and input_format_cond['orthogroup_fna'] == 'yes'</filter> + </data> + </outputs> + <tests> + <!-- Test framework does not currently support inputs whose associated extra_files_path contains files to be analyzed. + <test> + </test> + --> + </tests> + <help> +This tool is one of the PlantTribes collection of automated modular analysis pipelines for comparative and evolutionary +analyses of genome-scale gene families and transcriptomes. This tool integrates PlantTribes scaffold orthogroup backbone +gene models with gene coding sequences classified into the scaffold by the GeneFamilyClassifier tool. + +----- + +**Required options** + + * **Classified orthogroup fasta files** - orthogroup fasta files produced by the GeneFamilyClassifier tool selected from your history. Depending on how the GeneFamilyClassifier tool was executed, these could either be proteins or proteins and their corresponding coding sequences. + + * **Gene family scaffold** - one of the PlantTribes gene family scaffolds installed into Galaxy by the PlantTribes Scaffold Data Manager tool. + * **Protein clustering method** - gene family scaffold protein clustering method as described in the AssemblyPostProcessor tool. + +**Other options** + + * **Orthogroups coding sequences** - Select 'Yes' to create corresponding coding sequences orthogroup fasta files for the classified protein sequences. + + </help> + <citations> + <expand macro="citation1" /> + <citation type="bibtex"> + @article{Wall2008, + journal = {Nucleic Acids Research}, + author = {2. Wall PK, Leebens-Mack J, Muller KF, Field D, Altman NS}, + title = {PlantTribes: a gene and gene family resource for comparative genomics in plants}, + year = {2008}, + volume = {36}, + number = {suppl 1}, + pages = {D970-D976},} + </citation> + <citation type="bibtex"> + @article{Sasidharan2012, + journal = {Nucleic Acids Research}, + author = {3. Sasidharan R, Nepusz T, Swarbreck D, Huala E, Paccanaro A}, + title = {GFam: a platform for automatic annotation of gene families}, + year = {2012}, + pages = {gks631},} + </citation> + <citation type="bibtex"> + @article{Li2003, + journal = {Genome Research} + author = {4. Li L, Stoeckert CJ, Roos DS}, + title = {OrthoMCL: identification of ortholog groups for eukaryotic genomes}, + year = {2003}, + volume = {13}, + number = {9}, + pages = {2178-2189},} + </citation> + <citation type="bibtex"> + @article{Emms2015, + journal = {Genome Biology} + author = {5. Emms DM, Kelly S}, + title = {OrthoFinder: solving fundamental biases in whole genome comparisons dramatically improves orthogroup inference accuracy}, + year = {2015}, + volume = {16}, + number = {1}, + pages = {157},} + </citation> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Thu Jun 08 12:50:11 2017 -0400 @@ -0,0 +1,130 @@ +<?xml version='1.0' encoding='UTF-8'?> +<macros> + <token name="@WRAPPER_VERSION@">1.0</token> + <xml name="requirements_assembly_post_processor"> + <requirements> + <requirement type="package" version="1.0.0">plant_tribes_assembly_post_processor</requirement> + </requirements> + </xml> + <xml name="requirements_gene_family_aligner"> + <requirements> + <requirement type="package" version="1.0.0">plant_tribes_gene_family_aligner</requirement> + </requirements> + </xml> + <xml name="requirements_gene_family_classifier"> + <requirements> + <requirement type="package" version="1.0.0">plant_tribes_gene_family_classifier</requirement> + </requirements> + </xml> + <xml name="requirements_gene_family_integrator"> + <requirements> + <requirement type="package" version="1.0.0">plant_tribes_gene_family_integrator</requirement> + </requirements> + </xml> + <xml name="requirements_kaks_analysis"> + <requirements> + <requirement type="package" version="1.0.0">plant_tribes_kaks_analysis</requirement> + </requirements> + </xml> + <xml name="requirements_ks_distribution"> + <requirements> + <requirement type="package" version="1.3.0">r-optparse</requirement> + </requirements> + </xml> + <xml name="requirements_gene_family_phylogeny_builder"> + <requirements> + <requirement type="package" version="1.0.0">plant_tribes_gene_family_phylogeny_builder</requirement> + </requirements> + </xml> + <xml name="param_codon_alignments"> + <param name="codon_alignments" type="select" label="Codon alignments"> + <option value="yes" selected="true">Yes</option> + <option value="no">No</option> + </param> + </xml> + <xml name="param_method"> + <param name="method" type="select" label="Protein clustering method"> + <option value="gfam" selected="true">GFam</option> + <option value="orthofinder">OrthoFinder</option> + <option value="orthomcl">OrthoMCL</option> + </param> + </xml> + <xml name="param_options_type"> + <param name="options_type" type="select" label="Options Configuration"> + <option value="basic" selected="true">Basic</option> + <option value="advanced">Advanced</option> + </param> + </xml> + <xml name="param_orthogroup_fna"> + <param name="orthogroup_fna" type="select" label="Orthogroups coding sequences"> + <option value="yes" selected="true">Yes</option> + <option value="no">No</option> + </param> + </xml> + <xml name="param_scaffold"> + <param name="scaffold" type="select" label="Gene family scaffold"> + <options from_data_table="plant_tribes_scaffolds" /> + <validator type="no_options" message="No PlantTribes scaffolds are available. Use the PlantTribes Scaffolds Download Data Manager tool in Galaxy to install and populate the PlantTribes scaffolds data table." /> + </param> + </xml> + <xml name="param_sequence_type"> + <param name="sequence_type" type="select" label="Sequence type used in the phylogenetic inference (dna)"> + <option value="protein" selected="true">Amino acid based</option> + <option value="dna">Nucleotide based</option> + </param> + </xml> + <xml name="cond_alignment_method"> + <conditional name="alignment_method_cond"> + <param name="alignment_method" type="select" force_select="true" label="Multiple sequence alignment method"> + <option value="mafft" selected="true">MAFFT</option> + <option value="pasta">PASTA</option> + </param> + <when value="mafft" /> + <when value="pasta"> + <param name="pasta_iter_limit" type="integer" value="3" min="1" label="PASTA iteration limit" /> + </when> + </conditional> + </xml> + <xml name="cond_remove_gappy_sequences"> + <conditional name="remove_gappy_sequences_cond"> + <param name="remove_gappy_sequences" type="select" label="Alignment post-processing configuration"> + <option value="no" selected="true">No</option> + <option value="yes">Yes</option> + </param> + <when value="no" /> + <when value="yes"> + <conditional name="trim_type_cond"> + <param name="trim_type" type="select" label="Trimming method"> + <option value="gap_trimming" selected="true">Gap score based trimming</option> + <option value="automated_trimming">Automated heuristic trimming</option> + </param> + <when value="gap_trimming"> + <param name="gap_trimming" type="float" optional="true" min="0" max="1.0" label="Gap score" /> + </when> + <when value="automated_trimming" /> + </conditional> + <conditional name="remove_sequences_with_gaps_cond"> + <param name="remove_sequences_with_gaps" type="select" label="Remove sequences"> + <option value="no" selected="true">No</option> + <option value="yes">Yes</option> + </param> + <when value="no" /> + <when value="yes"> + <param name="remove_sequences_with_gaps_of" type="float" optional="true" min="0" max="1" label="Coverage score" /> + <param name="iterative_realignment" type="integer" optional="true" min="0" label="Realignment iteration limit" /> + </when> + </conditional> + </when> + </conditional> + </xml> + <xml name="citation1"> + <citation type="bibtex"> + @misc{None, + journal = {None}, + author = {1. Wafula EK}, + title = {Manuscript in preparation}, + year = {None}, + url = {https://github.com/dePamphilis/PlantTribes},} + </citation> + </xml> +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/plant_tribes_scaffolds.loc Thu Jun 08 12:50:11 2017 -0400 @@ -0,0 +1,3 @@ +## Plant Tribes scaffolds +#Value Name Path Description +22Gv1.1 22Gv1.1 ${__HERE__}/test-data/tool-data/plant_tribes/scaffolds/22Gv1.1 22 plant genomes (Angiosperms clusters, version 1.1; 22Gv1.1)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/plant_tribes_scaffolds.loc.sample Thu Jun 08 12:50:11 2017 -0400 @@ -0,0 +1,4 @@ +## Plant Tribes scaffolds +#Value Name Path Description +#22Gv1.0 22Gv1.0 /plant_tribes/scaffolds/22Gv1.0 22 plant genomes (Angiosperms clusters, version 1.0; 22Gv1.0) +#22Gv1.1 22Gv1.1 /plant_tribes/scaffolds/22Gv1.1 22 plant genomes (Angiosperms clusters, version 1.1; 22Gv1.1)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Thu Jun 08 12:50:11 2017 -0400 @@ -0,0 +1,6 @@ +<tables> + <table name="plant_tribes_scaffolds" comment_char="#"> + <columns>value, name, path, description</columns> + <file path="tool-data/plant_tribes_scaffolds.loc" /> + </table> +</tables>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.test Thu Jun 08 12:50:11 2017 -0400 @@ -0,0 +1,6 @@ +<tables> + <table name="plant_tribes_scaffolds" comment_char="#"> + <columns>value, name, path, description</columns> + <file path="${__HERE__}/plant_tribes_scaffolds.loc" /> + </table> +</tables>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/utils.py Thu Jun 08 12:50:11 2017 -0400 @@ -0,0 +1,80 @@ +import os +import shutil +import subprocess +import sys + +FSTDERR = 'stderr.txt' +FSTDOUT = 'stdout.txt' + + +def check_execution_errors(rc, fstderr, fstdout): + if rc != 0: + fh = open(fstdout, 'rb') + out_msg = fh.read() + fh.close() + fh = open(fstderr, 'rb') + err_msg = fh.read() + fh.close() + msg = '%s\n%s\n' % (str(out_msg), str(err_msg)) + stop_err(msg) + + +def get_response_buffers(): + fstderr = os.path.join(os.getcwd(), FSTDERR) + fherr = open(fstderr, 'wb') + fstdout = os.path.join(os.getcwd(), FSTDOUT) + fhout = open(fstdout, 'wb') + return fstderr, fherr, fstdout, fhout + + +def move_directory_files(source_dir, destination_dir, copy=False): + source_directory = os.path.abspath(source_dir) + destination_directory = os.path.abspath(destination_dir) + if not os.path.isdir(destination_directory): + os.makedirs(destination_directory) + for dir_entry in os.listdir(source_directory): + source_entry = os.path.join(source_directory, dir_entry) + if copy: + shutil.copy(source_entry, destination_directory) + else: + shutil.move(source_entry, destination_directory) + + +def run_command(cmd): + fstderr, fherr, fstdout, fhout = get_response_buffers() + proc = subprocess.Popen(args=cmd, stderr=fherr, stdout=fhout, shell=True) + rc = proc.wait() + # Check results. + fherr.close() + fhout.close() + check_execution_errors(rc, fstderr, fstdout) + + +def stop_err(msg): + sys.exit(msg) + + +def write_html_output(output, title, dir): + with open(output, 'w') as fh: + dir_items = sorted(os.listdir(dir)) + # Directories can only contain either files or directories, + # but not both. + if len(dir_items) > 0: + item_path = os.path.join(dir, dir_items[0]) + if os.path.isdir(item_path): + header = 'Directories' + else: + header = 'Datasets' + else: + header = '' + fh.write('<html><head><h3>%s: %d items</h3></head>\n' % (title, len(dir_items))) + fh.write('<body><p/><table cellpadding="2">\n') + fh.write('<tr><b>%s</th></b>\n' % header) + for index, fname in enumerate(dir_items): + if index % 2 == 0: + bgcolor = '#D8D8D8' + else: + bgcolor = '#FFFFFF' + link = '<a href="%s" type="text/plain">%s</a>\n' % (fname, fname) + fh.write('<tr bgcolor="%s"><td>%s</td></tr>\n' % (bgcolor, link)) + fh.write('</table></body></html>\n')