changeset 3:3e10a2683769 draft

Uploaded
author greg
date Mon, 30 Oct 2017 09:52:34 -0400 (2017-10-30)
parents 8ec75782a05b
children 79adf9087193
files .shed.yml gene_family_classifier.py gene_family_classifier.xml macros.xml test-data/20.faa test-data/20.fna test-data/3494.faa test-data/3494.fna test-data/3722.faa test-data/3722.fna test-data/38889.faa test-data/38889.fna test-data/39614.faa test-data/39614.fna test-data/5235.faa test-data/5235.fna test-data/output.ptorthocs utils.py
diffstat 18 files changed, 101 insertions(+), 215 deletions(-) [+]
line wrap: on
line diff
--- a/.shed.yml	Thu Aug 24 13:26:35 2017 -0400
+++ b/.shed.yml	Mon Oct 30 09:52:34 2017 -0400
@@ -9,7 +9,7 @@
   utilize objective classifications of complete protein sequences from sequenced plant genomes to perform
   comparative evolutionary studies.  This tool classifies gene sequences into precomputed orthologous gene family
   clusters using either blastp (faster), HMMScan (slower but more sensitive to remote homologs) or both (more exhaustive).
-remote_repository_url: https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/plant_tribes/gene_family_classifier
+remote_repository_url: https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/phylogenetics/plant_tribes/gene_family_classifier
 type: unrestricted
 categories:
 - Phylogenetics
--- a/gene_family_classifier.py	Thu Aug 24 13:26:35 2017 -0400
+++ b/gene_family_classifier.py	Mon Oct 30 09:52:34 2017 -0400
@@ -24,12 +24,6 @@
 parser.add_argument('--coding_sequences', dest='coding_sequences', default=None, help='Flag to create orthogroup coding sequences')
 parser.add_argument('--save_hmmscan_log', dest='save_hmmscan_log', default=None, help='Flag to save the hmmscan log')
 parser.add_argument('--hmmscan_log', dest='hmmscan_log', default=None, help='hmmscan log file')
-parser.add_argument('--output_ptortho', dest='output_ptortho', default=None, help='Output for orthogroups')
-parser.add_argument('--output_ptortho_dir', dest='output_ptortho_dir', default=None, help='output_ptortho.files_path')
-parser.add_argument('--output_ptorthocs', dest='output_ptorthocs', default=None, help='Output for orthogroups with corresponding coding sequences')
-parser.add_argument('--output_ptorthocs_dir', dest='output_ptorthocs_dir', default=None, help='output_ptorthocs.files_path')
-parser.add_argument('--output_ptsco', dest='output_ptsco', default=None, help='Output for single copy orthogroups')
-parser.add_argument('--output_ptsco_dir', dest='output_ptsco_dir', default=None, help='output_ptsco.files_path')
 
 args = parser.parse_args()
 
@@ -74,22 +68,18 @@
 
 # Handle orthogroups outputs.
 if create_ortho_sequences:
-    if create_corresponding_coding_sequences:
-        out_file = args.output_ptorthocs
-        orthogroups_fasta_dest_dir = args.output_ptorthocs_dir
-        title = 'Orthogroups and corresponding coding sequences files'
-    else:
-        out_file = args.output_ptortho
-        orthogroups_fasta_dest_dir = args.output_ptortho_dir
-        title = 'Orthogroups files'
     orthogroups_fasta_src_dir = os.path.join(OUTPUT_DIR, 'orthogroups_fasta')
-    utils.move_directory_files(orthogroups_fasta_src_dir, orthogroups_fasta_dest_dir)
-    utils.write_html_output(out_file, title, orthogroups_fasta_dest_dir)
+    orthogroups_fasta_dest_dir = 'output_orthogroups_fasta_dir'
+    if not os.path.isdir(orthogroups_fasta_dest_dir):
+        os.makedirs(orthogroups_fasta_dest_dir)
+    # Remove source direrctory so it won't break dataset collection handler.
+    utils.move_directory_files(orthogroups_fasta_src_dir, orthogroups_fasta_dest_dir, remove_source_dir=True)
 
 # Handle single copy orthogroup outputs.
-if args.output_ptsco is not None:
+if args.single_copy_custom is not None or args.single_copy_taxa != 0:
     single_copy_fasta_src_dir = os.path.join(OUTPUT_DIR, 'single_copy_fasta')
-    single_copy_fasta_dest_dir = args.output_ptsco_dir
-    title = 'Single copy orthogroups files'
-    utils.move_directory_files(single_copy_fasta_src_dir, single_copy_fasta_dest_dir)
-    utils.write_html_output(args.output_ptsco, title, single_copy_fasta_dest_dir)
+    single_copy_fasta_dest_dir = 'output_single_copy_fasta_dir'
+    if not os.path.isdir(single_copy_fasta_dest_dir):
+        os.makedirs(single_copy_fasta_dest_dir)
+    # Remove source direrctory so it won't break dataset collection handler.
+    utils.move_directory_files(single_copy_fasta_src_dir, single_copy_fasta_dest_dir, remove_source_dir=True)
--- a/gene_family_classifier.xml	Thu Aug 24 13:26:35 2017 -0400
+++ b/gene_family_classifier.xml	Mon Oct 30 09:52:34 2017 -0400
@@ -1,9 +1,11 @@
-<tool id="plant_tribes_gene_family_classifier" name="GeneFamilyClassifier" version="@WRAPPER_VERSION@.2">
+<tool id="plant_tribes_gene_family_classifier" name="GeneFamilyClassifier" version="@WRAPPER_VERSION@.3.0">
     <description>classifies gene sequences into pre-computed orthologous gene family clusters</description>
     <macros>
         <import>macros.xml</import>
     </macros>
-    <expand macro="requirements_gene_family_classifier" />
+    <requirements>
+        <requirement type="package" version="1.0.3">plant_tribes_gene_family_classifier</requirement>
+    </requirements>
     <command detect_errors="exit_code"><![CDATA[
 #if str($options_type.options_type_selector) == 'advanced':
     #set specify_super_orthogroups_cond = $options_type.specify_super_orthogroups_cond
@@ -83,22 +85,7 @@
 
 #if (str($save_hmmscan_log_cond.classifier) == 'hmmscan' or str($save_hmmscan_log_cond.classifier) == 'both') and str($save_hmmscan_log_cond.save_hmmscan_log) == 'yes':
     --save_hmmscan_log true
-    --hmmscan_log '$hmmscan_log'
-#end if
-#if $create_ortho_sequences:
-    #if $create_corresponding_coding_sequences:
-        --output_ptorthocs '$output_ptorthocs'
-        --output_ptorthocs_dir '$output_ptorthocs.files_path'
-    #else:
-        --output_ptortho '$output_ptortho'
-        --output_ptortho_dir '$output_ptortho.files_path'
-    #end if
-#end if
-#if $single_copy_orthogroup:
-    #if $create_ortho_sequences:
-        --output_ptsco '$output_ptsco'
-        --output_ptsco_dir '$output_ptsco.files_path'
-    #end if
+    --hmmscan_log '$output_hmmscan_log'
 #end if
     ]]></command>
     <inputs>
@@ -206,24 +193,19 @@
         </conditional>
     </inputs>
     <outputs>
-        <data name="hmmscan_log" format="txt" label="${tool.name} (hmmscan.log) on ${on_string}">
+        <data name="output_hmmscan_log" format="txt" label="${tool.name} (hmmscan.log) on ${on_string}">
             <filter>save_hmmscan_log_cond['classifier'] in ['hmmscan', 'both'] and save_hmmscan_log_cond['save_hmmscan_log'] == 'yes'</filter>
         </data>
-        <data name="output_ptortho" format="ptortho" label="${tool.name} (gene family clusters) on ${on_string}">
-            <filter>options_type['options_type_selector'] == 'advanced' and options_type['create_orthogroup_cond']['create_orthogroup'] == 'yes' and options_type['create_orthogroup_cond']['create_corresponding_coding_sequences_cond']['create_corresponding_coding_sequences'] == 'no'</filter>
-        </data>
-        <data name="output_ptorthocs" format="ptorthocs" label="${tool.name} (gene family clusters) on ${on_string}">
-            <filter>options_type['options_type_selector'] == 'advanced' and options_type['create_orthogroup_cond']['create_orthogroup'] == 'yes' and options_type['create_orthogroup_cond']['create_corresponding_coding_sequences_cond']['create_corresponding_coding_sequences'] == 'yes'</filter>
-        </data>
-        <data name="output_ptsco" format="tabular" label="${tool.name} (single copy orthogroups) on ${on_string}">
+        <collection name="output_orthos" type="list" label="${tool.name} on ${on_string}">
+            <discover_datasets pattern="__name__" directory="geneFamilyClassification_dir" visible="false" ext="tabular" />
+        </collection>
+        <collection name="output_orthogroups_fasta" type="list" label="${tool.name} (gene family clusters) on ${on_string}">
+            <discover_datasets pattern="__name__" directory="output_orthogroups_fasta_dir" visible="false" ext="fasta" />
+            <filter>options_type['options_type_selector'] == 'advanced' and options_type['create_orthogroup_cond']['create_orthogroup'] == 'yes'</filter>
+        </collection>
+        <collection name="output_single_copy_fasta" type="list" label="${tool.name} (single copy orthogroups) on ${on_string}">
+            <discover_datasets pattern="__name__" directory="output_single_copy_fasta_dir" visible="false" ext="fasta" />
             <filter>options_type['options_type_selector'] == 'advanced' and options_type['create_orthogroup_cond']['create_orthogroup'] == 'yes' and options_type['specify_single_copy_cond']['specify_single_copy'] == 'yes'</filter>
-            <change_format>
-                <when input="options_type.create_orthogroup_cond.create_corresponding_coding_sequences_cond.create_corresponding_coding_sequences" value="no" format="ptortho" />
-                <when input="options_type.create_orthogroup_cond.create_corresponding_coding_sequences_cond.create_corresponding_coding_sequences" value="yes" format="ptorthocs" />
-            </change_format>
-        </data>
-        <collection name="orthos" type="list">
-            <discover_datasets pattern="__name__" directory="geneFamilyClassification_dir" visible="false" ext="tabular" />
         </collection>
     </outputs>
     <tests>
@@ -235,8 +217,8 @@
             <param name="options_type_selector" value="advanced"/>
             <param name="create_orthogroup" value="yes"/>
             <param name="create_corresponding_coding_sequences" value="yes"/>
-            <output name="output_ptorthocs" file="output.ptorthocs" ftype="ptorthocs"/>
-            <output_collection name="orthos" type="list">
+            <param name="coding_sequences" value="transcripts.cleaned.nr.cds" ftype="fasta"/>
+            <output_collection name="output_orthos" type="list">
                 <element name="proteins.blastp.22Gv1.1" file="proteins.blastp.22Gv1.1" ftype="tabular" compare="contains"/>
                 <element name="proteins.blastp.22Gv1.1.bestOrthos" file="proteins.blastp.22Gv1.1.bestOrthos" ftype="tabular" compare="contains"/>
                 <element name="proteins.both.22Gv1.1.bestOrthos" file="proteins.both.22Gv1.1.bestOrthos" ftype="tabular" compare="contains"/>
@@ -244,6 +226,20 @@
                 <element name="proteins.hmmscan.22Gv1.1" file="proteins.hmmscan.22Gv1.1" ftype="tabular" compare="contains"/>
                 <element name="proteins.hmmscan.22Gv1.1.bestOrthos" file="proteins.hmmscan.22Gv1.1.bestOrthos" ftype="tabular" compare="contains"/>
             </output_collection>
+            <output_collection name="output_orthogroups_fasta" type="list">
+                <element name="20.faa" file="20.faa" ftype="fasta"/>
+                <element name="20.fna" file="20.fna" ftype="fasta"/>
+                <element name="3494.faa" file="3494.faa" ftype="fasta"/>
+                <element name="3494.fna" file="3494.fna" ftype="fasta"/>
+                <element name="3722.faa" file="3722.faa" ftype="fasta"/>
+                <element name="3722.fna" file="3722.fna" ftype="fasta"/>
+                <element name="38889.faa" file="38889.faa" ftype="fasta"/>
+                <element name="38889.fna" file="38889.fna" ftype="fasta"/>
+                <element name="39614.faa" file="39614.faa" ftype="fasta"/>
+                <element name="39614.fna" file="39614.fna" ftype="fasta"/>
+                <element name="5235.faa" file="5235.faa" ftype="fasta"/>
+                <element name="5235.fna" file="5235.fna" ftype="fasta"/>
+            </output_collection>
         </test>
     </tests>
     <help>
--- a/macros.xml	Thu Aug 24 13:26:35 2017 -0400
+++ b/macros.xml	Mon Oct 30 09:52:34 2017 -0400
@@ -1,47 +1,6 @@
 <?xml version='1.0' encoding='UTF-8'?>
 <macros>
     <token name="@WRAPPER_VERSION@">1.0</token>
-    <xml name="requirements_assembly_post_processor">
-        <requirements>
-            <requirement type="package" version="1.0.2">plant_tribes_assembly_post_processor</requirement>
-        </requirements>
-    </xml>
-    <xml name="requirements_gene_family_aligner">
-        <requirements>
-            <requirement type="package" version="1.0.2">plant_tribes_gene_family_aligner</requirement>
-        </requirements>
-    </xml>
-    <xml name="requirements_gene_family_classifier">
-        <requirements>
-            <requirement type="package" version="1.0.2">plant_tribes_gene_family_classifier</requirement>
-        </requirements>
-    </xml>
-    <xml name="requirements_gene_family_integrator">
-        <requirements>
-            <requirement type="package" version="1.0.2">plant_tribes_gene_family_integrator</requirement>
-        </requirements>
-    </xml>
-    <xml name="requirements_kaks_analysis">
-        <requirements>
-            <requirement type="package" version="1.0.2">plant_tribes_kaks_analysis</requirement>
-        </requirements>
-    </xml>
-    <xml name="requirements_ks_distribution">
-        <requirements>
-            <requirement type="package" version="1.3.2">r-optparse</requirement>
-        </requirements>
-    </xml>
-    <xml name="requirements_gene_family_phylogeny_builder">
-        <requirements>
-            <requirement type="package" version="1.0.2">plant_tribes_gene_family_phylogeny_builder</requirement>
-        </requirements>
-    </xml>
-    <xml name="param_codon_alignments">
-        <param name="codon_alignments" type="select" label="Codon alignments">
-            <option value="yes" selected="true">Yes</option>
-            <option value="no">No</option>
-        </param>
-    </xml>
     <xml name="param_method">
         <param name="method" type="select" label="Protein clustering method">
             <option value="gfam" selected="true">GFam</option>
@@ -49,74 +8,12 @@
             <option value="orthomcl">OrthoMCL</option>
         </param>
     </xml>
-    <xml name="param_options_type">
-        <param name="options_type" type="select" label="Options Configuration">
-            <option value="basic" selected="true">Basic</option>
-            <option value="advanced">Advanced</option>
-        </param>
-    </xml>
-    <xml name="param_orthogroup_fna">
-        <param name="orthogroup_fna" type="select" label="Orthogroups coding sequences">
-            <option value="yes" selected="true">Yes</option>
-            <option value="no">No</option>
-        </param>
-    </xml>
     <xml name="param_scaffold">
         <param name="scaffold" type="select" label="Gene family scaffold">
             <options from_data_table="plant_tribes_scaffolds" />
             <validator type="no_options" message="No PlantTribes scaffolds are available.  Use the PlantTribes Scaffolds Download Data Manager tool in Galaxy to install and populate the PlantTribes scaffolds data table." />
         </param>
     </xml>
-    <xml name="param_sequence_type">
-        <param name="sequence_type" type="select" label="Sequence type used in the phylogenetic inference (dna)">
-            <option value="protein" selected="true">Amino acid based</option>
-            <option value="dna">Nucleotide based</option>
-        </param>
-    </xml>
-    <xml name="cond_alignment_method">
-        <conditional name="alignment_method_cond">
-            <param name="alignment_method" type="select" force_select="true" label="Multiple sequence alignment method">
-                <option value="mafft" selected="true">MAFFT</option>
-                <option value="pasta">PASTA</option>
-            </param>
-            <when value="mafft" />
-            <when value="pasta">
-                <param name="pasta_iter_limit" type="integer" value="3" min="1" label="PASTA iteration limit" />
-            </when>
-        </conditional>
-    </xml>
-    <xml name="cond_remove_gappy_sequences">
-        <conditional name="remove_gappy_sequences_cond">
-            <param name="remove_gappy_sequences" type="select" label="Alignment post-processing configuration">
-                <option value="no" selected="true">No</option>
-                <option value="yes">Yes</option>
-            </param>
-            <when value="no" />
-            <when value="yes">
-                <conditional name="trim_type_cond">
-                    <param name="trim_type" type="select" label="Trimming method">
-                        <option value="gap_trimming" selected="true">Gap score based trimming</option>
-                        <option value="automated_trimming">Automated heuristic trimming</option>
-                    </param>
-                    <when value="gap_trimming">
-                        <param name="gap_trimming" type="float" optional="true" min="0" max="1.0" label="Gap score" />
-                    </when>
-                    <when value="automated_trimming" />
-                </conditional>
-                <conditional name="remove_sequences_with_gaps_cond">
-                    <param name="remove_sequences_with_gaps" type="select" label="Remove sequences">
-                        <option value="no" selected="true">No</option>
-                        <option value="yes">Yes</option>
-                    </param>
-                    <when value="no" />
-                    <when value="yes">
-                        <param name="remove_sequences_with_gaps_of" type="float" optional="true" min="0" max="1" label="Coverage score" />
-                        <param name="iterative_realignment" type="integer" optional="true" min="0" label="Realignment iteration limit" />
-                    </when>
-                </conditional>
-            </when>
-        </conditional>
-    </xml>
     <xml name="citation1">
         <citation type="bibtex">
             @misc{None,
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/20.faa	Mon Oct 30 09:52:34 2017 -0400
@@ -0,0 +1,3 @@
+>contig_9
+XLRLKADEEAQCLNQMQRIIFDEIMEHVELEKGGFYFVYRPGGNGKTLWLAIISKLRSEGRIVLAVASSGIALLLVEGGR
+TAHSRFKIPIDVNEYNNCEIKQNIYLAELICHTNLVIWDEAPMTQYFVFEAVX
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/20.fna	Mon Oct 30 09:52:34 2017 -0400
@@ -0,0 +1,6 @@
+>contig_9
+NNACTTCGGTTAAAGGCAGATGAGGAGGCACAATGTTTGAATCAGATGCAGCGTATCATTTTTGATGAAATTATGGAGCA
+TGTGGAGTTAGAAAAGGGGGGCTTCTATTTCGTATATCGCCCTGGGGGCAACGGAAAGACCTTGTGGTTGGCTATTATCT
+CAAAACTGAGAAGCGAGGGTAGAATAGTTCTCGCAGTGGCTTCATCAGGTATAGCATTGCTTTTGGTTGAGGGTGGTAGA
+ACAGCCCATTCTCGATTTAAAATACCCATAGATGTCAATGAATATAACAATTGTGAAATTAAACAGAACATCTACCTCGC
+TGAACTTATATGTCACACCAATTTGGTCATTTGGGATGAGGCACCTATGACTCAATATTTTGTCTTTGAGGCGGTTGAN
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/3494.faa	Mon Oct 30 09:52:34 2017 -0400
@@ -0,0 +1,3 @@
+>contig_10
+MAEENTTTMNLDLNLGPINNSSDDSEPSSRPYTDVAMNLEDWLDSPVRVREVVRHRNHRWRSLWRQIPIPPDTRNLALEL
+IGGNAP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/3494.fna	Mon Oct 30 09:52:34 2017 -0400
@@ -0,0 +1,5 @@
+>contig_10
+ATGGCAGAAGAGAACACCACTACAATGAACCTCGATCTCAATTTGGGCCCCATCAATAACTCAAGCGACGATAGCGAACC
+TTCATCACGCCCTTATACTGATGTCGCAATGAACTTGGAAGATTGGTTAGATAGTCCCGTCCGAGTTCGTGAAGTCGTCC
+GCCACAGAAATCATAGGTGGCGCTCTTTGTGGCGCCAAATCCCAATTCCGCCTGATACGCGAAACCTCGCGCTCGAATTA
+ATCGGCGGCAATGCCCCN
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/3722.faa	Mon Oct 30 09:52:34 2017 -0400
@@ -0,0 +1,3 @@
+>contig_7
+ENEWSGAEFLNEMAAMMTQNKSNENGTGTFEELQQLFDEMFQSDIESFNGCSSSSNETCSNSNKRNSIESSSANFRPENG
+NESGEISGKKNTRKGKGDX
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/3722.fna	Mon Oct 30 09:52:34 2017 -0400
@@ -0,0 +1,5 @@
+>contig_7
+GAGAATGAGTGGTCTGGGGCTGAGTTTTTGAATGAAATGGCGGCAATGATGACTCAAAATAAATCCAATGAAAACGGAAC
+CGGAACTTTTGAAGAACTGCAACAATTGTTCGATGAAATGTTTCAGAGCGACATCGAGTCCTTCAATGGTTGTTCTTCAT
+CATCCAATGAAACATGTAGCAACTCGAACAAGAGGAATTCCATTGAGTCGAGCTCGGCTAATTTCAGACCCGAAAATGGA
+AACGAAAGCGGCGAGATTAGCGGGAAGAAGAATACTAGGAAAGGTAAAGGTGACGNN
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/38889.faa	Mon Oct 30 09:52:34 2017 -0400
@@ -0,0 +1,4 @@
+>contig_2
+XLSKVPIPSNNIYAINDKKSPEDAADDYENRLKELVSEKIIPVSTISGFPKFDLMLLGMGPDGHVASLFPSHMQRYEKEK
+WVTFITDSPKPPPSRITFTFPMINSASEIAMVVTGADLAGTTKIALGTTGNVKPGETPLPCTEVSAEGEVTWFLDKDAAS
+QLLNYVRFDD
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/38889.fna	Mon Oct 30 09:52:34 2017 -0400
@@ -0,0 +1,8 @@
+>contig_2
+NNCCTTTCCAAGGTACCCATCCCATCCAACAACATATATGCTATAAATGATAAGAAGTCCCCGGAAGATGCAGCAGACGA
+CTACGAAAACCGCCTCAAGGAACTCGTCTCCGAAAAAATCATACCCGTTTCAACCATTAGCGGGTTCCCGAAGTTCGACC
+TCATGTTGCTTGGAATGGGGCCCGACGGCCATGTGGCCTCTCTTTTCCCTTCTCACATGCAACGCTATGAGAAGGAGAAA
+TGGGTCACTTTCATAACTGACTCTCCCAAACCGCCTCCGTCGAGAATCACTTTTACGTTTCCGATGATCAACTCGGCTTC
+GGAGATCGCTATGGTGGTTACCGGGGCTGATTTGGCTGGTACAACTAAGATAGCATTGGGTACTACGGGCAATGTTAAGC
+CGGGTGAGACTCCTTTGCCTTGTACTGAAGTTTCGGCTGAGGGAGAGGTTACTTGGTTCTTGGACAAAGATGCTGCTTCA
+CAACTGTTAAATTATGTGCGCTTTGATGAT
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/39614.faa	Mon Oct 30 09:52:34 2017 -0400
@@ -0,0 +1,3 @@
+>contig_3
+XVDEGVVVAGLSEQEKASVSEILTTARAHSETIENLKRDHSQQVSCIEQHTNDTFRQKYMDYEPTGSTPVRSEPDIPSKG
+TIESLRAMPIDALEEEFRENHSYESAVTGKELMPSVTTRAPFSQIN
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/39614.fna	Mon Oct 30 09:52:34 2017 -0400
@@ -0,0 +1,6 @@
+>contig_3
+NNTGTGGATGAAGGAGTTGTTGTTGCTGGCTTGTCAGAGCAGGAGAAGGCATCTGTTTCTGAAATTCTGACAACTGCTAG
+AGCTCATTCAGAAACAATTGAGAACCTTAAGAGAGATCATTCCCAGCAGGTATCCTGTATCGAACAGCACACGAATGATA
+CTTTCAGGCAAAAATACATGGATTACGAGCCTACAGGGTCCACGCCAGTTAGGAGCGAGCCGGATATTCCCAGCAAAGGC
+ACAATAGAGTCACTTCGTGCCATGCCTATAGATGCACTTGAAGAAGAATTTCGAGAAAACCATTCATACGAATCTGCTGT
+TACAGGAAAGGAACTAATGCCGTCTGTTACGACTCGTGCACCATTTTCACAGATCAAC
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/5235.faa	Mon Oct 30 09:52:34 2017 -0400
@@ -0,0 +1,3 @@
+>contig_1
+XKKLYGDKEDILVPDIFWDYTSGKVLTMEWVEGVKLNEQDAVESQGLSVLDLVNTGIQCSLRQLLEYGYFHADPHPGNLL
+ATPDGKLAFLDFGMMSETPEQARSAIIGHVVHMVNR
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/5235.fna	Mon Oct 30 09:52:34 2017 -0400
@@ -0,0 +1,6 @@
+>contig_1
+NTTAAAAAATTATATGGAGACAAGGAAGATATCCTTGTCCCAGATATTTTCTGGGATTACACGAGTGGAAAGGTGCTAAC
+AATGGAGTGGGTTGAAGGTGTTAAATTAAATGAGCAAGATGCCGTTGAGAGTCAAGGGCTCAGTGTTCTGGATCTGGTGA
+ATACCGGCATACAGTGCAGTCTTCGACAGCTGCTTGAGTACGGCTATTTTCATGCAGATCCTCACCCAGGGAATCTCTTA
+GCTACACCTGACGGGAAGCTTGCTTTTCTTGATTTTGGAATGATGAGTGAGACTCCTGAACAAGCAAGATCGGCCATAAT
+TGGTCATGTTGTACACATGGTTAATCGN
--- a/test-data/output.ptorthocs	Thu Aug 24 13:26:35 2017 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,28 +0,0 @@
-<html><head><h3>Orthogroups and corresponding coding sequences files: 12 items</h3></head>
-<body><p/><table cellpadding="2">
-<tr><b>Datasets</th></b>
-<tr bgcolor="#D8D8D8"><td><a href="20.faa" type="text/plain">20.faa</a>
-</td></tr>
-<tr bgcolor="#FFFFFF"><td><a href="20.fna" type="text/plain">20.fna</a>
-</td></tr>
-<tr bgcolor="#D8D8D8"><td><a href="3494.faa" type="text/plain">3494.faa</a>
-</td></tr>
-<tr bgcolor="#FFFFFF"><td><a href="3494.fna" type="text/plain">3494.fna</a>
-</td></tr>
-<tr bgcolor="#D8D8D8"><td><a href="3722.faa" type="text/plain">3722.faa</a>
-</td></tr>
-<tr bgcolor="#FFFFFF"><td><a href="3722.fna" type="text/plain">3722.fna</a>
-</td></tr>
-<tr bgcolor="#D8D8D8"><td><a href="38889.faa" type="text/plain">38889.faa</a>
-</td></tr>
-<tr bgcolor="#FFFFFF"><td><a href="38889.fna" type="text/plain">38889.fna</a>
-</td></tr>
-<tr bgcolor="#D8D8D8"><td><a href="39614.faa" type="text/plain">39614.faa</a>
-</td></tr>
-<tr bgcolor="#FFFFFF"><td><a href="39614.fna" type="text/plain">39614.fna</a>
-</td></tr>
-<tr bgcolor="#D8D8D8"><td><a href="5235.faa" type="text/plain">5235.faa</a>
-</td></tr>
-<tr bgcolor="#FFFFFF"><td><a href="5235.fna" type="text/plain">5235.fna</a>
-</td></tr>
-</table></body></html>
--- a/utils.py	Thu Aug 24 13:26:35 2017 -0400
+++ b/utils.py	Mon Oct 30 09:52:34 2017 -0400
@@ -27,7 +27,7 @@
     return fstderr, fherr, fstdout, fhout
 
 
-def move_directory_files(source_dir, destination_dir, copy=False):
+def move_directory_files(source_dir, destination_dir, copy=False, remove_source_dir=False):
     source_directory = os.path.abspath(source_dir)
     destination_directory = os.path.abspath(destination_dir)
     if not os.path.isdir(destination_directory):
@@ -38,6 +38,8 @@
             shutil.copy(source_entry, destination_directory)
         else:
             shutil.move(source_entry, destination_directory)
+    if remove_source_dir:
+        os.rmdir(source_directory)
 
 
 def run_command(cmd):
@@ -52,29 +54,3 @@
 
 def stop_err(msg):
     sys.exit(msg)
-
-
-def write_html_output(output, title, dir):
-    with open(output, 'w') as fh:
-        dir_items = sorted(os.listdir(dir))
-        # Directories can only contain either files or directories,
-        # but not both.
-        if len(dir_items) > 0:
-            item_path = os.path.join(dir, dir_items[0])
-            if os.path.isdir(item_path):
-                header = 'Directories'
-            else:
-                header = 'Datasets'
-        else:
-            header = ''
-        fh.write('<html><head><h3>%s: %d items</h3></head>\n' % (title, len(dir_items)))
-        fh.write('<body><p/><table cellpadding="2">\n')
-        fh.write('<tr><b>%s</th></b>\n' % header)
-        for index, fname in enumerate(dir_items):
-            if index % 2 == 0:
-                bgcolor = '#D8D8D8'
-            else:
-                bgcolor = '#FFFFFF'
-            link = '<a href="%s" type="text/plain">%s</a>\n' % (fname, fname)
-            fh.write('<tr bgcolor="%s"><td>%s</td></tr>\n' % (bgcolor, link))
-        fh.write('</table></body></html>\n')