# HG changeset patch # User greg # Date 1496940611 14400 # Node ID fa38de0b1f1a17346f73dd31939d28d4a3636a4a Uploaded diff -r 000000000000 -r fa38de0b1f1a .shed.yml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.shed.yml Thu Jun 08 12:50:11 2017 -0400 @@ -0,0 +1,14 @@ +name: plant_tribes_gene_family_integrator +owner: greg +description: | + Contains a tool that integrates de novo assembly sequences with scaffold gene family sequences. +homepage_url: https://github.com/dePamphilis/PlantTribes +long_description: | + Contains a tool that tool is one of the PlantTribes collection of automated modular analysis pipelines that + utilize objective classifications of complete protein sequences from sequenced plant genomes to perform + comparative evolutionary studies. This tool integrates classified post processed de novo transcriptome + assembly sequences with the scaffold gene family sequences. +remote_repository_url: https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/plant_tribes/gene_family_integrator +type: unrestricted +categories: +- Phylogenetics diff -r 000000000000 -r fa38de0b1f1a gene_family_integrator.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gene_family_integrator.py Thu Jun 08 12:50:11 2017 -0400 @@ -0,0 +1,32 @@ +#!/usr/bin/env python +import argparse +import os + +import utils + +OUTPUT_DIR = 'integratedGeneFamilies_dir' + +parser = argparse.ArgumentParser() +parser.add_argument('--orthogroup_faa', dest='orthogroup_faa', help='Directory of input fasta datasets') +parser.add_argument('--scaffold', dest='scaffold', help='Orthogroups or gene families proteins scaffold') +parser.add_argument('--method', dest='method', help='Protein clustering method') +parser.add_argument('--orthogroup_fna', dest='orthogroup_fna', default=None, help='Use correspong coding sequences') +parser.add_argument('--output', dest='output', help='Output dataset') +parser.add_argument('--output_dir', dest='output_dir', help='Output dataset file_path directory') + +args = parser.parse_args() + +# Build the command line. +cmd = 'GeneFamilyIntegrator' +cmd += ' --orthogroup_faa %s' % args.orthogroup_faa +cmd += ' --scaffold %s' % args.scaffold +cmd += ' --method %s' % args.method +if args.orthogroup_fna is not None: + cmd += ' --orthogroup_fna' + +# Run the command. +utils.run_command(cmd) + +# Handle outputs. +utils.move_directory_files(os.path.join(OUTPUT_DIR, 'orthogroups_fasta'), args.output_dir) +utils.write_html_output(args.output, 'Integrated gene family sequences', args.output_dir) diff -r 000000000000 -r fa38de0b1f1a gene_family_integrator.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gene_family_integrator.xml Thu Jun 08 12:50:11 2017 -0400 @@ -0,0 +1,129 @@ + + integrates gene models in pre-computed orthologous gene family clusters with classified gene coding sequences + + macros.xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + input_format_cond['input_format'] == 'ptortho' or (input_format_cond['input_format'] == 'ptorthocs' and input_format_cond['orthogroup_fna'] == 'no') + + + input_format_cond['input_format'] == 'ptorthocs' and input_format_cond['orthogroup_fna'] == 'yes' + + + + + + +This tool is one of the PlantTribes collection of automated modular analysis pipelines for comparative and evolutionary +analyses of genome-scale gene families and transcriptomes. This tool integrates PlantTribes scaffold orthogroup backbone +gene models with gene coding sequences classified into the scaffold by the GeneFamilyClassifier tool. + +----- + +**Required options** + + * **Classified orthogroup fasta files** - orthogroup fasta files produced by the GeneFamilyClassifier tool selected from your history. Depending on how the GeneFamilyClassifier tool was executed, these could either be proteins or proteins and their corresponding coding sequences. + + * **Gene family scaffold** - one of the PlantTribes gene family scaffolds installed into Galaxy by the PlantTribes Scaffold Data Manager tool. + * **Protein clustering method** - gene family scaffold protein clustering method as described in the AssemblyPostProcessor tool. + +**Other options** + + * **Orthogroups coding sequences** - Select 'Yes' to create corresponding coding sequences orthogroup fasta files for the classified protein sequences. + + + + + + @article{Wall2008, + journal = {Nucleic Acids Research}, + author = {2. Wall PK, Leebens-Mack J, Muller KF, Field D, Altman NS}, + title = {PlantTribes: a gene and gene family resource for comparative genomics in plants}, + year = {2008}, + volume = {36}, + number = {suppl 1}, + pages = {D970-D976},} + + + @article{Sasidharan2012, + journal = {Nucleic Acids Research}, + author = {3. Sasidharan R, Nepusz T, Swarbreck D, Huala E, Paccanaro A}, + title = {GFam: a platform for automatic annotation of gene families}, + year = {2012}, + pages = {gks631},} + + + @article{Li2003, + journal = {Genome Research} + author = {4. Li L, Stoeckert CJ, Roos DS}, + title = {OrthoMCL: identification of ortholog groups for eukaryotic genomes}, + year = {2003}, + volume = {13}, + number = {9}, + pages = {2178-2189},} + + + @article{Emms2015, + journal = {Genome Biology} + author = {5. Emms DM, Kelly S}, + title = {OrthoFinder: solving fundamental biases in whole genome comparisons dramatically improves orthogroup inference accuracy}, + year = {2015}, + volume = {16}, + number = {1}, + pages = {157},} + + + diff -r 000000000000 -r fa38de0b1f1a macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Thu Jun 08 12:50:11 2017 -0400 @@ -0,0 +1,130 @@ + + + 1.0 + + + plant_tribes_assembly_post_processor + + + + + plant_tribes_gene_family_aligner + + + + + plant_tribes_gene_family_classifier + + + + + plant_tribes_gene_family_integrator + + + + + plant_tribes_kaks_analysis + + + + + r-optparse + + + + + plant_tribes_gene_family_phylogeny_builder + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @misc{None, + journal = {None}, + author = {1. Wafula EK}, + title = {Manuscript in preparation}, + year = {None}, + url = {https://github.com/dePamphilis/PlantTribes},} + + + diff -r 000000000000 -r fa38de0b1f1a plant_tribes_scaffolds.loc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/plant_tribes_scaffolds.loc Thu Jun 08 12:50:11 2017 -0400 @@ -0,0 +1,3 @@ +## Plant Tribes scaffolds +#Value Name Path Description +22Gv1.1 22Gv1.1 ${__HERE__}/test-data/tool-data/plant_tribes/scaffolds/22Gv1.1 22 plant genomes (Angiosperms clusters, version 1.1; 22Gv1.1) diff -r 000000000000 -r fa38de0b1f1a plant_tribes_scaffolds.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/plant_tribes_scaffolds.loc.sample Thu Jun 08 12:50:11 2017 -0400 @@ -0,0 +1,4 @@ +## Plant Tribes scaffolds +#Value Name Path Description +#22Gv1.0 22Gv1.0 /plant_tribes/scaffolds/22Gv1.0 22 plant genomes (Angiosperms clusters, version 1.0; 22Gv1.0) +#22Gv1.1 22Gv1.1 /plant_tribes/scaffolds/22Gv1.1 22 plant genomes (Angiosperms clusters, version 1.1; 22Gv1.1) diff -r 000000000000 -r fa38de0b1f1a tool_data_table_conf.xml.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Thu Jun 08 12:50:11 2017 -0400 @@ -0,0 +1,6 @@ + + + value, name, path, description + +

+ diff -r 000000000000 -r fa38de0b1f1a tool_data_table_conf.xml.test --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.test Thu Jun 08 12:50:11 2017 -0400 @@ -0,0 +1,6 @@ + + + value, name, path, description + +

+ diff -r 000000000000 -r fa38de0b1f1a utils.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/utils.py Thu Jun 08 12:50:11 2017 -0400 @@ -0,0 +1,80 @@ +import os +import shutil +import subprocess +import sys + +FSTDERR = 'stderr.txt' +FSTDOUT = 'stdout.txt' + + +def check_execution_errors(rc, fstderr, fstdout): + if rc != 0: + fh = open(fstdout, 'rb') + out_msg = fh.read() + fh.close() + fh = open(fstderr, 'rb') + err_msg = fh.read() + fh.close() + msg = '%s\n%s\n' % (str(out_msg), str(err_msg)) + stop_err(msg) + + +def get_response_buffers(): + fstderr = os.path.join(os.getcwd(), FSTDERR) + fherr = open(fstderr, 'wb') + fstdout = os.path.join(os.getcwd(), FSTDOUT) + fhout = open(fstdout, 'wb') + return fstderr, fherr, fstdout, fhout + + +def move_directory_files(source_dir, destination_dir, copy=False): + source_directory = os.path.abspath(source_dir) + destination_directory = os.path.abspath(destination_dir) + if not os.path.isdir(destination_directory): + os.makedirs(destination_directory) + for dir_entry in os.listdir(source_directory): + source_entry = os.path.join(source_directory, dir_entry) + if copy: + shutil.copy(source_entry, destination_directory) + else: + shutil.move(source_entry, destination_directory) + + +def run_command(cmd): + fstderr, fherr, fstdout, fhout = get_response_buffers() + proc = subprocess.Popen(args=cmd, stderr=fherr, stdout=fhout, shell=True) + rc = proc.wait() + # Check results. + fherr.close() + fhout.close() + check_execution_errors(rc, fstderr, fstdout) + + +def stop_err(msg): + sys.exit(msg) + + +def write_html_output(output, title, dir): + with open(output, 'w') as fh: + dir_items = sorted(os.listdir(dir)) + # Directories can only contain either files or directories, + # but not both. + if len(dir_items) > 0: + item_path = os.path.join(dir, dir_items[0]) + if os.path.isdir(item_path): + header = 'Directories' + else: + header = 'Datasets' + else: + header = '' + fh.write('

%s: %d items

\n' % (title, len(dir_items))) + fh.write('

\n') + fh.write('%s\n' % header) + for index, fname in enumerate(dir_items): + if index % 2 == 0: + bgcolor = '#D8D8D8' + else: + bgcolor = '#FFFFFF' + link = '%s\n' % (fname, fname) + fh.write('\n' % (bgcolor, link)) + fh.write('
%s
\n')