Mercurial > repos > greg > plant_tribes_gene_family_aligner
changeset 0:7ba9469800b9 draft
Uploaded
author | greg |
---|---|
date | Thu, 08 Jun 2017 12:46:20 -0400 |
parents | |
children | ab1f0bab96f3 |
files | gene_family_aligner.py gene_family_aligner.xml macros.xml plant_tribes_scaffolds.loc.sample run_pasta.py tool_data_table_conf.xml.sample utils.py |
diffstat | 7 files changed, 542 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gene_family_aligner.py Thu Jun 08 12:46:20 2017 -0400 @@ -0,0 +1,55 @@ +#!/usr/bin/env python +import argparse +import os + +import utils + +OUTPUT_DIR = 'geneFamilyAlignments_dir' + +parser = argparse.ArgumentParser() +parser.add_argument('--alignment_method', dest='alignment_method', help='Multiple sequence alignments method') +parser.add_argument('--automated_trimming', dest='automated_trimming', default=None, help='Trims alignments using trimAls ML heuristic trimming approach') +parser.add_argument('--codon_alignments', dest='codon_alignments', default=None, help='Flag for constructing orthogroup multiple codon alignments') +parser.add_argument('--gap_trimming', dest='gap_trimming', default=0, type=float, help='Remove sites in alignments with gaps of') +parser.add_argument('--iterative_realignment', dest='iterative_realignment', type=int, default=0, help='Maximum number of iterations') +parser.add_argument('--num_threads', dest='num_threads', type=int, help='Number of threads to use for execution') +parser.add_argument('--orthogroup_faa', dest='orthogroup_faa', help='Directory of input fasta datasets') +parser.add_argument('--output', dest='output', help='Output dataset') +parser.add_argument('--output_dir', dest='output_dir', help='Output dataset files_path directory') +parser.add_argument('--pasta_iter_limit', dest='pasta_iter_limit', type=int, default=None, help='Maximum number of iteration that the PASTA algorithm will execute') +parser.add_argument('--pasta_script_path', dest='pasta_script_path', default=None, help='Path to script for executing pasta') +parser.add_argument('--remove_sequences', dest='remove_sequences', default=0, type=float, help='Remove sequences with gaps of') + +args = parser.parse_args() + +# Build the command line. +cmd = 'GeneFamilyAligner' +cmd += ' --orthogroup_faa %s' % args.orthogroup_faa +cmd += ' --alignment_method %s' % args.alignment_method +if args.alignment_method == 'pasta': + if args.pasta_script_path is not None: + cmd += ' --pasta_script_path %s' % args.pasta_script_path + if args.pasta_iter_limit is not None: + cmd += ' --pasta_iter_limit %d' % args.pasta_iter_limit +cmd += ' --num_threads %d' % args.num_threads +if args.codon_alignments is not None: + cmd += ' --codon_alignments' +if args.automated_trimming is not None: + cmd += ' --automated_trimming' +if args.gap_trimming > 0: + cmd += ' --gap_trimming %4f' % args.gap_trimming +if args.remove_sequences > 0: + cmd += ' --remove_sequences %4f' % args.remove_sequences +if args.iterative_realignment > 0: + cmd += ' --iterative_realignment %d' % args.iterative_realignment + +# Run the command. +utils.run_command(cmd) + +# Handle outputs. +if args.codon_alignments is None: + src_output_dir = OUTPUT_DIR +else: + src_output_dir = os.path.join(OUTPUT_DIR, 'orthogroups_aln') +utils.move_directory_files(src_output_dir, args.output_dir) +utils.write_html_output(args.output, 'Aligned gene family sequences', args.output_dir)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gene_family_aligner.xml Thu Jun 08 12:46:20 2017 -0400 @@ -0,0 +1,204 @@ +<tool id="plant_tribes_gene_family_aligner" name="GeneFamilyAligner" version="@WRAPPER_VERSION@.0"> + <description>aligns integrated orthologous gene family clusters</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements_gene_family_aligner" /> + <command detect_errors="exit_code"><![CDATA[ +#set input_format = $input_format_cond.input_format +#set alignment_method_cond = $input_format_cond.alignment_method_cond +#set alignment_method = $alignment_method_cond.alignment_method +#if str($input_format_cond.input_format) == 'ptortho': + #set output_codon_alignments = False +#else if str($input_format_cond.input_format) == 'ptorthocs' and str($input_format_cond.codon_alignments ) == 'no': + #set output_codon_alignments = False +#else: + #set output_codon_alignments = True +#end if + +python $__tool_directory__/gene_family_aligner.py +--alignment_method $alignment_method +#if str($alignment_method) == 'pasta': + --pasta_script_path '$__tool_directory__/run_pasta.py' + --pasta_iter_limit $alignment_method_cond.pasta_iter_limit +#end if +--num_threads \${GALAXY_SLOTS:-4} +#if str($input_format) == 'ptortho': + --orthogroup_faa '$input_format_cond.input_ptortho.extra_files_path' +#else: + ## str($input_format) == 'ptorthocs' + --orthogroup_faa '$input_format_cond.input_ptorthocs.extra_files_path' + #if str($input_format_cond.codon_alignments) == 'yes': + --codon_alignments 'true' + #end if +#end if +#set remove_gappy_sequences = $remove_gappy_sequences_cond.remove_gappy_sequences +#if str($remove_gappy_sequences) == 'yes': + #set trim_type_cond = $remove_gappy_sequences_cond.trim_type_cond + #set trim_type = $trim_type_cond.trim_type + #if str($trim_type) == 'gap_trimming' and str($trim_type_cond.gap_trimming): + --gap_trimming $trim_type_cond.gap_trimming + #else: + ## str($trim_type) == 'automated_trimming' + --automated_trimming 'true' + #end if + #set remove_sequences_with_gaps_cond = $remove_gappy_sequences_cond.remove_sequences_with_gaps_cond + #set remove_sequences_with_gaps = $remove_sequences_with_gaps_cond.remove_sequences_with_gaps + #if str($remove_sequences_with_gaps) == 'yes': + #if str($remove_sequences_with_gaps_cond.remove_sequences_with_gaps_of): + --remove_sequences $remove_sequences_with_gaps_cond.remove_sequences_with_gaps_of + #end if + #if str($remove_sequences_with_gaps_cond.iterative_realignment): + --iterative_realignment $remove_sequences_with_gaps_cond.iterative_realignment + #end if + #if $output_codon_alignments: + --output '$output_aln_filtered_ca' + --output_dir '$output_aln_filtered_ca.files_path' + #else: + --output '$output_aln_filtered' + --output_dir '$output_aln_filtered.files_path' + #end if + #else: + #if $output_codon_alignments: + --output '$output_aln_trimmed_ca' + --output_dir '$output_aln_trimmed_ca.files_path' + #else: + --output '$output_aln_trimmed' + --output_dir '$output_aln_trimmed.files_path' + #end if + #end if +#else: + #if $output_codon_alignments: + --output '$output_aln_ca' + --output_dir '$output_aln_ca.files_path' + #else: + --output '$output_aln' + --output_dir '$output_aln.files_path' + #end if +#end if + ]]></command> + <inputs> + <conditional name="input_format_cond"> + <param name="input_format" type="select" label="Classified orthogroup fasta files"> + <option value="ptortho">Proteins orthogroup fasta files</option> + <option value="ptorthocs">Protein and coding sequences orthogroup fasta files</option> + </param> + <when value="ptortho"> + <param name="input_ptortho" format="ptortho" type="data" label="Proteins orthogroup fasta files"> + <validator type="empty_extra_files_path" /> + </param> + <expand macro="cond_alignment_method" /> + </when> + <when value="ptorthocs"> + <param name="input_ptorthocs" format="ptorthocs" type="data" label="Protein and coding sequences orthogroup fasta files"> + <validator type="empty_extra_files_path" /> + </param> + <expand macro="cond_alignment_method" /> + <expand macro="param_codon_alignments" /> + </when> + </conditional> + <expand macro="cond_remove_gappy_sequences" /> + </inputs> + <outputs> + <data name="output_aln" format="ptalign" label="${tool.name} (proteins orthogroup alignments) on ${on_string}"> + <filter>(input_format_cond['input_format'] == 'ptortho' or (input_format_cond['input_format'] == 'ptorthocs' and input_format_cond['codon_alignments'] == 'no')) and remove_gappy_sequences_cond['remove_gappy_sequences'] == 'no'</filter> + </data> + <data name="output_aln_ca" format="ptalignca" label="${tool.name} (protein and coding sequences orthogroup alignments) on ${on_string}"> + <filter>(input_format_cond['input_format'] == 'ptortho' or (input_format_cond['input_format'] == 'ptorthocs' and input_format_cond['codon_alignments'] == 'yes')) and remove_gappy_sequences_cond['remove_gappy_sequences'] == 'no'</filter> + </data> + <data name="output_aln_filtered" format="ptalignfiltered" label="${tool.name} (filtered proteins orthogroup alignments) on ${on_string}"> + <filter>(input_format_cond['input_format'] == 'ptortho' or (input_format_cond['input_format'] == 'ptorthocs' and input_format_cond['codon_alignments'] == 'no')) and (remove_gappy_sequences_cond['remove_gappy_sequences'] == 'yes' and remove_gappy_sequences_cond['remove_sequences_with_gaps_cond']['remove_sequences_with_gaps'] == 'yes')</filter> + </data> + <data name="output_aln_filtered_ca" format="ptalignfilteredca" label="${tool.name} (filtered protein and coding sequences orthogroup alignments) on ${on_string}"> + <filter>(input_format_cond['input_format'] == 'ptortho' or (input_format_cond['input_format'] == 'ptorthocs' and input_format_cond['codon_alignments'] == 'yes')) and (remove_gappy_sequences_cond['remove_gappy_sequences'] == 'yes' and remove_gappy_sequences_cond['remove_sequences_with_gaps_cond']['remove_sequences_with_gaps'] == 'yes')</filter> + </data> + <data name="output_aln_trimmed" format="ptaligntrimmed" label="${tool.name} (trimmed proteins orthogroup alignments) on ${on_string}"> + <filter>(input_format_cond['input_format'] == 'ptortho' or (input_format_cond['input_format'] == 'ptorthocs' and input_format_cond['codon_alignments'] == 'no')) and (remove_gappy_sequences_cond['remove_gappy_sequences'] == 'yes' and remove_gappy_sequences_cond['remove_sequences_with_gaps_cond']['remove_sequences_with_gaps'] == 'no')</filter> + </data> + <data name="output_aln_trimmed_ca" format="ptaligntrimmedca" label="${tool.name} (trimmed protein and coding sequences orthogroup alignments) on ${on_string}"> + <filter>(input_format_cond['input_format'] == 'ptortho' or (input_format_cond['input_format'] == 'ptorthocs' and input_format_cond['codon_alignments'] == 'yes')) and (remove_gappy_sequences_cond['remove_gappy_sequences'] == 'yes' and remove_gappy_sequences_cond['remove_sequences_with_gaps_cond']['remove_sequences_with_gaps'] == 'no')</filter> + </data> + </outputs> + <tests> + <!-- Test framework does not currently support inputs whose associated extra_files_path contains files to be analyzed. + <test> + </test> + --> + </tests> + <help> +This tool is one of the PlantTribes collection of automated modular analysis pipelines for comparative and evolutionary +analyses of genome-scale gene families and transcriptomes. This tool estimates protein and codon multiple sequence alignments +of integrated orthologous gene family fasta files produced by the GeneFamilyIntegrator tool. + +----- + +**Required options** + + * **Classified orthogroup fasta files** - orthogroup fasta files produced by the GeneFamilyClassifier tool selected from your history. Depending on how the GeneFamilyClassifier tool was executed, these could either be proteins or proteins and their corresponding coding sequences. + + * **Multiple sequence alignment method** - method for estimating orthogroup multiple sequence alignments. PlantTribes estimates alignments using either MAFFT's L-INS-i algorithm or the divide and conquer approach implemented in the PASTA pipeline for large alignments. + + - **PASTA iteration limit** - number of PASTA iterations. By default, PASTA performs 3 iterations. + + * **Codon alignments** - select 'Yes' to create codon multiple sequence alignments. This option requires protein and their corresponding coding sequences to be provided as input data. + +**Other options** + + * **Alignment post-processing configuration** - select 'Yes' to enable multiple sequence alignment post-processing configuration options. + + - **Trimming method** - multiple sequence alignment trimming method. PlantTribes trims alignments using two automated approaches implemented in trimAl. Gap score based trimming removes alignments sites that do not achieve a user specified gap score. For example, a setting of 0.1 removes sites that have gaps in 90% or more of the sequences in the multiple sequence alignment. The automated heuristic trimming approach determines the best automated trimAl method to trim a given alignment as described in the trimAl tutorial `trimAl`_. + + - **Gap score** - the fraction of sequences with gap allowed in an alignment site. The score is restricted to the range 0.0 - 1.0. Zero value has no effect. + + - **Remove sequences** - select 'Yes' to remove sequences in multiple sequence alignments that do not achieve a user specified alignment coverage score. For example, a setting of 0.7 removes sequences with more than 30% gaps in the alignment. This option requires one of the trimming methods to be set. + + - **Coverage score** - minimum fraction of sites without gaps for a sequence in a multiple sequence alignment. The score is restricted to the range 0.0 - 1.0. Zero value has no effect. + + - **Realignment iteration limit** - number of iterations to perform trimming, removal of sequences, and realignment of orthogroup sequences. Zero value has no effect. + +.. _trimAl: http://trimal.cgenomics.org + + </help> + <citations> + <expand macro="citation1" /> + <citation type="bibtex"> + @article{Wall2008, + journal = {Nucleic Acids Research}, + author = {2. Wall PK, Leebens-Mack J, Muller KF, Field D, Altman NS}, + title = {PlantTribes: a gene and gene family resource for comparative genomics in plants}, + year = {2008}, + volume = {36}, + number = {suppl 1}, + pages = {D970-D976},} + </citation> + <citation type="bibtex"> + @article{Katoh2013, + journal = {Molecular biology and evolution}, + author = {3. Katoh K, Standley DM}, + title = {MAFFT multiple sequence alignment software version 7: improvements in performance and usability}, + year = {2013}, + volume = {30}, + number = {4}, + pages = {772-780},} + </citation> + <citation type="bibtex"> + @article{Mirarab2014, + journal = {Research in Computational Molecular Biology (RECOMB)}, + author = {4. Mirarab S, Nguyen N, Warnow T}, + title = {PASTA: Ultra-Large Multiple Sequence Alignment. In R. Sharan (Ed.)}, + year = {2014}, + pages = {177–191}, + url = {https://github.com/smirarab/pasta},} + </citation> + <citation type="bibtex"> + @article{Capella-Gutierrez2009, + journal = {Bioinformatics,}, + author = {5. Capella-Gutierrez S, Silla-Martínez JM, Gabaldón T}, + title = {trimAl: a tool for automated alignment trimming in large-scale phylogenetic analyses}, + year = {2009}, + volume = {25}, + number = {15}, + pages = {1972-1973},} + </citation> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Thu Jun 08 12:46:20 2017 -0400 @@ -0,0 +1,130 @@ +<?xml version='1.0' encoding='UTF-8'?> +<macros> + <token name="@WRAPPER_VERSION@">1.0</token> + <xml name="requirements_assembly_post_processor"> + <requirements> + <requirement type="package" version="1.0.0">plant_tribes_assembly_post_processor</requirement> + </requirements> + </xml> + <xml name="requirements_gene_family_aligner"> + <requirements> + <requirement type="package" version="1.0.0">plant_tribes_gene_family_aligner</requirement> + </requirements> + </xml> + <xml name="requirements_gene_family_classifier"> + <requirements> + <requirement type="package" version="1.0.0">plant_tribes_gene_family_classifier</requirement> + </requirements> + </xml> + <xml name="requirements_gene_family_integrator"> + <requirements> + <requirement type="package" version="1.0.0">plant_tribes_gene_family_integrator</requirement> + </requirements> + </xml> + <xml name="requirements_kaks_analysis"> + <requirements> + <requirement type="package" version="1.0.0">plant_tribes_kaks_analysis</requirement> + </requirements> + </xml> + <xml name="requirements_ks_distribution"> + <requirements> + <requirement type="package" version="1.3.0">r-optparse</requirement> + </requirements> + </xml> + <xml name="requirements_gene_family_phylogeny_builder"> + <requirements> + <requirement type="package" version="1.0.0">plant_tribes_gene_family_phylogeny_builder</requirement> + </requirements> + </xml> + <xml name="param_codon_alignments"> + <param name="codon_alignments" type="select" label="Codon alignments"> + <option value="yes" selected="true">Yes</option> + <option value="no">No</option> + </param> + </xml> + <xml name="param_method"> + <param name="method" type="select" label="Protein clustering method"> + <option value="gfam" selected="true">GFam</option> + <option value="orthofinder">OrthoFinder</option> + <option value="orthomcl">OrthoMCL</option> + </param> + </xml> + <xml name="param_options_type"> + <param name="options_type" type="select" label="Options Configuration"> + <option value="basic" selected="true">Basic</option> + <option value="advanced">Advanced</option> + </param> + </xml> + <xml name="param_orthogroup_fna"> + <param name="orthogroup_fna" type="select" label="Orthogroups coding sequences"> + <option value="yes" selected="true">Yes</option> + <option value="no">No</option> + </param> + </xml> + <xml name="param_scaffold"> + <param name="scaffold" type="select" label="Gene family scaffold"> + <options from_data_table="plant_tribes_scaffolds" /> + <validator type="no_options" message="No PlantTribes scaffolds are available. Use the PlantTribes Scaffolds Download Data Manager tool in Galaxy to install and populate the PlantTribes scaffolds data table." /> + </param> + </xml> + <xml name="param_sequence_type"> + <param name="sequence_type" type="select" label="Sequence type used in the phylogenetic inference (dna)"> + <option value="protein" selected="true">Amino acid based</option> + <option value="dna">Nucleotide based</option> + </param> + </xml> + <xml name="cond_alignment_method"> + <conditional name="alignment_method_cond"> + <param name="alignment_method" type="select" force_select="true" label="Multiple sequence alignment method"> + <option value="mafft" selected="true">MAFFT</option> + <option value="pasta">PASTA</option> + </param> + <when value="mafft" /> + <when value="pasta"> + <param name="pasta_iter_limit" type="integer" value="3" min="1" label="PASTA iteration limit" /> + </when> + </conditional> + </xml> + <xml name="cond_remove_gappy_sequences"> + <conditional name="remove_gappy_sequences_cond"> + <param name="remove_gappy_sequences" type="select" label="Alignment post-processing configuration"> + <option value="no" selected="true">No</option> + <option value="yes">Yes</option> + </param> + <when value="no" /> + <when value="yes"> + <conditional name="trim_type_cond"> + <param name="trim_type" type="select" label="Trimming method"> + <option value="gap_trimming" selected="true">Gap score based trimming</option> + <option value="automated_trimming">Automated heuristic trimming</option> + </param> + <when value="gap_trimming"> + <param name="gap_trimming" type="float" optional="true" min="0" max="1.0" label="Gap score" /> + </when> + <when value="automated_trimming" /> + </conditional> + <conditional name="remove_sequences_with_gaps_cond"> + <param name="remove_sequences_with_gaps" type="select" label="Remove sequences"> + <option value="no" selected="true">No</option> + <option value="yes">Yes</option> + </param> + <when value="no" /> + <when value="yes"> + <param name="remove_sequences_with_gaps_of" type="float" optional="true" min="0" max="1" label="Coverage score" /> + <param name="iterative_realignment" type="integer" optional="true" min="0" label="Realignment iteration limit" /> + </when> + </conditional> + </when> + </conditional> + </xml> + <xml name="citation1"> + <citation type="bibtex"> + @misc{None, + journal = {None}, + author = {1. Wafula EK}, + title = {Manuscript in preparation}, + year = {None}, + url = {https://github.com/dePamphilis/PlantTribes},} + </citation> + </xml> +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/plant_tribes_scaffolds.loc.sample Thu Jun 08 12:46:20 2017 -0400 @@ -0,0 +1,4 @@ +## Plant Tribes scaffolds +#Value Name Path Description +#22Gv1.0 22Gv1.0 /plant_tribes/scaffolds/22Gv1.0 22 plant genomes (Angiosperms clusters, version 1.0; 22Gv1.0) +#22Gv1.1 22Gv1.1 /plant_tribes/scaffolds/22Gv1.1 22 plant genomes (Angiosperms clusters, version 1.1; 22Gv1.1)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/run_pasta.py Thu Jun 08 12:46:20 2017 -0400 @@ -0,0 +1,63 @@ +#! /usr/bin/env python + +"""Main script of PASTA in command-line mode - this simply invokes the main + function found in pasta/mainpasta.py +""" + +# This file is part of PASTA which is forked from SATe + +# PASTA like SATe is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +# Jiaye Yu and Mark Holder, University of Kansas + +if __name__ == "__main__": + import os + import sys + from pasta.mainpasta import pasta_main + from pasta import MESSENGER + sys.setrecursionlimit(100000) + _PASTA_DEBUG = os.environ.get('PASTA_DEBUG') + _DEVELOPER = _PASTA_DEBUG and _PASTA_DEBUG != '0' + + if not _DEVELOPER: + _PASTA_DEVELOPER = os.environ.get('PASTA_DEVELOPER') + _DEVELOPER = _PASTA_DEVELOPER and _PASTA_DEVELOPER != '0' + try: + rc, temp_dir, temp_fs = pasta_main() + if not rc: + raise ValueError("Unknown PASTA execution error") + if (temp_dir is not None) and (os.path.exists(temp_dir)): + MESSENGER.send_info("Note that temporary files from the run have not been deleted, they can be found in:\n '%s'\n" % temp_dir) + if sys.platform.lower().startswith('darwin') and ("'" not in temp_dir): + MESSENGER.send_info(''' +If you cannot see this directory in the Finder application, you may want to use +the 'open' command executed from a Terminal. You can do this by launching the +/Applications/Utilities/Terminal program and then typing + +open '%s' + +followed by a return at the prompt. If the argument to the open command is a +directory, then it should open a Finder window in the directory (even if that +directory is hidden by default). +''' % temp_dir) + except Exception as x: + if _DEVELOPER: + raise + message = "PASTA is exiting because of an error:\n%s " % str(x) + try: + from pasta import MESSENGER + MESSENGER.send_error(message) + except: + sys.stderr.write(message) + sys.exit(1)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Thu Jun 08 12:46:20 2017 -0400 @@ -0,0 +1,6 @@ +<tables> + <table name="plant_tribes_scaffolds" comment_char="#"> + <columns>value, name, path, description</columns> + <file path="tool-data/plant_tribes_scaffolds.loc" /> + </table> +</tables>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/utils.py Thu Jun 08 12:46:20 2017 -0400 @@ -0,0 +1,80 @@ +import os +import shutil +import subprocess +import sys + +FSTDERR = 'stderr.txt' +FSTDOUT = 'stdout.txt' + + +def check_execution_errors(rc, fstderr, fstdout): + if rc != 0: + fh = open(fstdout, 'rb') + out_msg = fh.read() + fh.close() + fh = open(fstderr, 'rb') + err_msg = fh.read() + fh.close() + msg = '%s\n%s\n' % (str(out_msg), str(err_msg)) + stop_err(msg) + + +def get_response_buffers(): + fstderr = os.path.join(os.getcwd(), FSTDERR) + fherr = open(fstderr, 'wb') + fstdout = os.path.join(os.getcwd(), FSTDOUT) + fhout = open(fstdout, 'wb') + return fstderr, fherr, fstdout, fhout + + +def move_directory_files(source_dir, destination_dir, copy=False): + source_directory = os.path.abspath(source_dir) + destination_directory = os.path.abspath(destination_dir) + if not os.path.isdir(destination_directory): + os.makedirs(destination_directory) + for dir_entry in os.listdir(source_directory): + source_entry = os.path.join(source_directory, dir_entry) + if copy: + shutil.copy(source_entry, destination_directory) + else: + shutil.move(source_entry, destination_directory) + + +def run_command(cmd): + fstderr, fherr, fstdout, fhout = get_response_buffers() + proc = subprocess.Popen(args=cmd, stderr=fherr, stdout=fhout, shell=True) + rc = proc.wait() + # Check results. + fherr.close() + fhout.close() + check_execution_errors(rc, fstderr, fstdout) + + +def stop_err(msg): + sys.exit(msg) + + +def write_html_output(output, title, dir): + with open(output, 'w') as fh: + dir_items = sorted(os.listdir(dir)) + # Directories can only contain either files or directories, + # but not both. + if len(dir_items) > 0: + item_path = os.path.join(dir, dir_items[0]) + if os.path.isdir(item_path): + header = 'Directories' + else: + header = 'Datasets' + else: + header = '' + fh.write('<html><head><h3>%s: %d items</h3></head>\n' % (title, len(dir_items))) + fh.write('<body><p/><table cellpadding="2">\n') + fh.write('<tr><b>%s</th></b>\n' % header) + for index, fname in enumerate(dir_items): + if index % 2 == 0: + bgcolor = '#D8D8D8' + else: + bgcolor = '#FFFFFF' + link = '<a href="%s" type="text/plain">%s</a>\n' % (fname, fname) + fh.write('<tr bgcolor="%s"><td>%s</td></tr>\n' % (bgcolor, link)) + fh.write('</table></body></html>\n')