changeset 4:146bbd9d58f6 draft default tip

Added tests
author davidvanzessen
date Mon, 12 Nov 2018 09:33:34 -0500
parents b6d63b9efb8f
children
files demultiplex.py demultiplex.xml test-data/input.fasta test-data/input.fastq test-data/mapping.txt test-data/sequence1.fasta test-data/sequence1.fastq test-data/sequence2.fasta test-data/sequence2.fastq
diffstat 9 files changed, 142 insertions(+), 25 deletions(-) [+]
line wrap: on
line diff
--- a/demultiplex.py	Fri Nov 09 05:52:15 2018 -0500
+++ b/demultiplex.py	Mon Nov 12 09:33:34 2018 -0500
@@ -29,17 +29,17 @@
 
 def search_barcode_in_first_half(sequence, barcode):
     if type(sequence) is Seq:
-        sequence = str(sequence)
+        sequence = str(sequence).lower()
     elif type(sequence) is SeqRecord:
-        sequence = str(sequence.seq)
+        sequence = str(sequence.seq).lower()
     return sequence.find(barcode, 0, int(len(sequence) / 2))
 
 
 def search_barcode_in_second_half(sequence, barcode):
     if type(sequence) is Seq:
-        sequence = str(sequence)
+        sequence = str(sequence).lower()
     elif type(sequence) is SeqRecord:
-        sequence = str(sequence.seq)
+        sequence = str(sequence.seq).lower()
     return sequence.find(barcode, int(len(sequence) / 2))
 
 
@@ -65,10 +65,10 @@
 
 def main():
     parser = argparse.ArgumentParser()
-    parser.add_argument("-i", "--input", help="The input file")
+    parser.add_argument("-i", "--input", help="The input file", required=True)
     parser.add_argument("-f", "--format", help="The format of the input file (fastq/fasta)", default="auto", choices=["fasta", "fastq", "auto"])
-    parser.add_argument("-o", "--output-dir", help="The output dir")
-    parser.add_argument("-m", "--mapping-file", help="A tab seperated file containing two columns, ID and barcode (no header)")
+    parser.add_argument("-o", "--output-dir", help="The output dir", required=True)
+    parser.add_argument("-m", "--mapping-file", help="A tab seperated file containing two columns, ID and barcode (no header)", required=True)
 
     args = parser.parse_args()
 
@@ -121,9 +121,11 @@
         ID = ID_barcode["ID"]
         barcode = ID_barcode["barcode"]
 
+        logging.info("{0}:\t\t{1}".format(ID, barcode))
+
         output_file_path = os.path.join(
             output_dir,
-            "{0}_{1}.{2}".format(input_basename_no_ext, ID, input_format)
+            "{0}.{1}".format(ID, input_format)
         )
 
         if ID not in ID_file_handle_dict:
@@ -134,15 +136,15 @@
 
         barcode_data_dict[ID] += [BarcodeData(
             ID=ID,
-            barcode=barcode,
-            barcode_reverse=str(Seq(barcode, generic_dna).reverse_complement()),
+            barcode=barcode.lower(),
+            barcode_reverse=str(Seq(barcode, generic_dna).reverse_complement()).lower(),
             output_file_path=output_file_path,
             output_file_handle=ID_file_handle
         )]
 
     discarded_output_file_path = os.path.join(
         output_dir,
-        "{0}_{1}.{2}".format(basename_input_file_path, "discarded", input_format)
+        "{0}.{1}".format("discarded", input_format)
     )
 
     total_sequences = 0
--- a/demultiplex.xml	Fri Nov 09 05:52:15 2018 -0500
+++ b/demultiplex.xml	Mon Nov 12 09:33:34 2018 -0500
@@ -1,9 +1,10 @@
 <tool id="demultiplex-emc" name="Demultiplex" version="1.0.0">
+    <description></description>
     <requirements>
         <requirement type="package" version="3.7.0">python</requirement>
         <requirement type="package" version="1.72">biopython</requirement>
     </requirements>
-    <description></description>
+    
     <command>
         mkdir outputs;
         python3 $__tool_directory__/demultiplex.py 
@@ -18,29 +19,39 @@
     </inputs>
     <outputs>
         <!--<data name="debug" format="txt" label="debug"/>-->
-        <collection name='demultiplex_out' format_source='input' type='list'>
-            <discover_datasets pattern="__name_and_ext__" directory="outputs" format_source='input'/>
+        <collection name='demultiplex_out' format_source='input' type="list">
+            <discover_datasets pattern="__designation_and_ext__" directory="outputs"/>
+            <!--<discover_datasets pattern="(?P&lt;designation&gt;.+)\.(?P&lt;ext&gt;.+)" directory="outputs"/>-->
         </collection>
     </outputs>
     <tests>
-    <!--
         <test>
-            <param name="input1" value="1.bed"/>
-            <param name="input2" value="2.bed"/>
-            <output name="out_file1" file="cat_wrapper_out1.bed"/>
+            <param name="input" value="input.fastq"/>
+            <param name="mapping" value="mapping.txt"/>
+            <output_collection name="demultiplex_out" type="list">
+                <element name="sequence1" file="sequence1.fastq"/>
+                <element name="sequence2" file="sequence2.fastq"/>
+            </output_collection>
         </test>
-        TODO: if possible, enhance the underlying test code to handle this test
-            the problem is multiple params with the same name "input2"
         <test>
-            <param name="input1" value="1.bed"/>
-            <param name="input2" value="2.bed"/>
-            <param name="input2" value="3.bed"/>
-            <output name="out_file1" file="cat_wrapper_out2.bed"/>
+            <param name="input" value="input.fasta"/>
+            <param name="mapping" value="mapping.txt"/>
+            <output_collection name="demultiplex_out" type="list">
+                <element name="sequence1" file="sequence1.fasta"/>
+                <element name="sequence2" file="sequence2.fasta"/>
+            </output_collection>
         </test>
-    -->
     </tests>
     <help>
 There is no help
     </help>
+    <citations>
+        <citation type="bibtex">@misc{Demultplex-EMC,
+            author = {Erasmus MC},
+            title = {Demultiplex-EMC},
+            year = {2018},
+            howpublished = {https://github.com/ErasmusMC-Bioinformatics/Demultiplex}
+        }</citation>
+    </citations>
 </tool>
  
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input.fasta	Mon Nov 12 09:33:34 2018 -0500
@@ -0,0 +1,24 @@
+>sequence1_upper
+GTACAACAATATTTGTTAGTCACCTTTGGGTCACGATCTCCCACCTTACTGGAATTTAGTCCCTGCTATAATTTGCCTTG
+CATATAAGTTGCGTTACTTCAGCGTCCTAACCGCACCCTTAGCACGAAGACAGATTTGTTCATTCCCATACTCCGGCGTT
+GGCAGGGGGTTCGCATGTCCCACGTGAAACGTTGCTAAAC
+>sequence1_lower
+gtacaacaatatttgttagtcacctttgggtcacgatctcccaccttactggaatttagtccctgctataatttgccttg
+catataagttgcgttacttcagcgtcctaaccgcacccttagcacgaagacagatttgttcattcccatactccggcgtt
+ggcagggggttcgcatgtcccacgtgaaacgttgctaaac
+>sequence1_mix
+gTACaacAaTATTTGtTaGtCAccttTgGgTCACGATCtCcCaccttACtggAAtTTaGTcCCTGCTATAAtTtgCcttG
+CAtATAaGtTgcgttaCTtCaGCgtccTAaCcgcAccCTTagCaCgaAGacaGaTttGTTCATtCccATACTCcggCgtT
+ggCagGGGgtTCgCatgtCccACGtGaAACgTTgCtAAac
+>sequence2_upper
+CCTCAGGTTTCTGAGCGACAAAAGCTTTAAACGGGAGTTCGCGCTCATAACTTGGTCCGAATGCGGGTTCTTGCATCGTT
+CGACTGAGTTTGTTTCATGTAGAACGGGCGCAAAGTATACTTAGTTCAATCTTCAATACCTCGTATCATTGTACACCTGC
+CGGTCACCACCCAACGATGTGGGGACGGCGTTGCAACTTC
+>sequence2_lower
+cctcaggtttctgagcgacaaaagctttaaacgggagttcgcgctcataacttggtccgaatgcgggttcttgcatcgtt
+cgactgagtttgtttcatgtagaacgggcgcaaagtatacttagttcaatcttcaatacctcgtatcattgtacacctgc
+cggtcaccacccaacgatgtggggacggcgttgcaacttc
+>sequence1_mix
+CctCaGgTTTctGaGCGAcAAAagCTttAAaCgGGaGtTCgcGCtcAtAaCTtggTcCGAATgcgGGtTcTTgCAtCGtt
+CgaCtgaGTTTgtttCatGTAgAacGGGCgCAAagTatACttaGtTCaATCTtCaatACCtcgtAtcATTgTACaCCtGC
+CGgTcAccaCcCaAcgATgTGGggACggCGtTgCAAcTTC
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input.fastq	Mon Nov 12 09:33:34 2018 -0500
@@ -0,0 +1,24 @@
+@sequence1_upper
+GTACAACAATATTTGTTAGTCACCTTTGGGTCACGATCTCCCACCTTACTGGAATTTAGTCCCTGCTATAATTTGCCTTGCATATAAGTTGCGTTACTTCAGCGTCCTAACCGCACCCTTAGCACGAAGACAGATTTGTTCATTCCCATACTCCGGCGTTGGCAGGGGGTTCGCATGTCCCACGTGAAACGTTGCTAAAC
++
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+@sequence1_lower
+gtacaacaatatttgttagtcacctttgggtcacgatctcccaccttactggaatttagtccctgctataatttgccttgcatataagttgcgttacttcagcgtcctaaccgcacccttagcacgaagacagatttgttcattcccatactccggcgttggcagggggttcgcatgtcccacgtgaaacgttgctaaac
++
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+@sequence1_mix
+gTACaacAaTATTTGtTaGtCAccttTgGgTCACGATCtCcCaccttACtggAAtTTaGTcCCTGCTATAAtTtgCcttGCAtATAaGtTgcgttaCTtCaGCgtccTAaCcgcAccCTTagCaCgaAGacaGaTttGTTCATtCccATACTCcggCgtTggCagGGGgtTCgCatgtCccACGtGaAACgTTgCtAAac
++
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+@sequence2_upper
+CCTCAGGTTTCTGAGCGACAAAAGCTTTAAACGGGAGTTCGCGCTCATAACTTGGTCCGAATGCGGGTTCTTGCATCGTTCGACTGAGTTTGTTTCATGTAGAACGGGCGCAAAGTATACTTAGTTCAATCTTCAATACCTCGTATCATTGTACACCTGCCGGTCACCACCCAACGATGTGGGGACGGCGTTGCAACTTC
++
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+@sequence2_lower
+cctcaggtttctgagcgacaaaagctttaaacgggagttcgcgctcataacttggtccgaatgcgggttcttgcatcgttcgactgagtttgtttcatgtagaacgggcgcaaagtatacttagttcaatcttcaatacctcgtatcattgtacacctgccggtcaccacccaacgatgtggggacggcgttgcaacttc
++
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+@sequence1_mix
+CctCaGgTTTctGaGCGAcAAAagCTttAAaCgGGaGtTCgcGCtcAtAaCTtggTcCGAATgcgGGtTcTTgCAtCGttCgaCtgaGTTTgtttCatGTAgAacGGGCgCAAagTatACttaGtTCaATCTtCaatACCtcgtAtcATTgTACaCCtGCCGgTcAccaCcCaAcgATgTGGggACggCGtTgCAAcTTC
++
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/mapping.txt	Mon Nov 12 09:33:34 2018 -0500
@@ -0,0 +1,2 @@
+sequence1	CAATATTTGT
+sequence2	tttctgagcg
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sequence1.fasta	Mon Nov 12 09:33:34 2018 -0500
@@ -0,0 +1,15 @@
+>sequence1_upper
+GTACAACAATATTTGTTAGTCACCTTTGGGTCACGATCTCCCACCTTACTGGAATTTAGT
+CCCTGCTATAATTTGCCTTGCATATAAGTTGCGTTACTTCAGCGTCCTAACCGCACCCTT
+AGCACGAAGACAGATTTGTTCATTCCCATACTCCGGCGTTGGCAGGGGGTTCGCATGTCC
+CACGTGAAACGTTGCTAAAC
+>sequence1_lower
+gtacaacaatatttgttagtcacctttgggtcacgatctcccaccttactggaatttagt
+ccctgctataatttgccttgcatataagttgcgttacttcagcgtcctaaccgcaccctt
+agcacgaagacagatttgttcattcccatactccggcgttggcagggggttcgcatgtcc
+cacgtgaaacgttgctaaac
+>sequence1_mix
+gTACaacAaTATTTGtTaGtCAccttTgGgTCACGATCtCcCaccttACtggAAtTTaGT
+cCCTGCTATAAtTtgCcttGCAtATAaGtTgcgttaCTtCaGCgtccTAaCcgcAccCTT
+agCaCgaAGacaGaTttGTTCATtCccATACTCcggCgtTggCagGGGgtTCgCatgtCc
+cACGtGaAACgTTgCtAAac
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sequence1.fastq	Mon Nov 12 09:33:34 2018 -0500
@@ -0,0 +1,12 @@
+@sequence1_upper
+GTACAACAATATTTGTTAGTCACCTTTGGGTCACGATCTCCCACCTTACTGGAATTTAGTCCCTGCTATAATTTGCCTTGCATATAAGTTGCGTTACTTCAGCGTCCTAACCGCACCCTTAGCACGAAGACAGATTTGTTCATTCCCATACTCCGGCGTTGGCAGGGGGTTCGCATGTCCCACGTGAAACGTTGCTAAAC
++
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+@sequence1_lower
+gtacaacaatatttgttagtcacctttgggtcacgatctcccaccttactggaatttagtccctgctataatttgccttgcatataagttgcgttacttcagcgtcctaaccgcacccttagcacgaagacagatttgttcattcccatactccggcgttggcagggggttcgcatgtcccacgtgaaacgttgctaaac
++
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+@sequence1_mix
+gTACaacAaTATTTGtTaGtCAccttTgGgTCACGATCtCcCaccttACtggAAtTTaGTcCCTGCTATAAtTtgCcttGCAtATAaGtTgcgttaCTtCaGCgtccTAaCcgcAccCTTagCaCgaAGacaGaTttGTTCATtCccATACTCcggCgtTggCagGGGgtTCgCatgtCccACGtGaAACgTTgCtAAac
++
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sequence2.fasta	Mon Nov 12 09:33:34 2018 -0500
@@ -0,0 +1,15 @@
+>sequence2_upper
+CCTCAGGTTTCTGAGCGACAAAAGCTTTAAACGGGAGTTCGCGCTCATAACTTGGTCCGA
+ATGCGGGTTCTTGCATCGTTCGACTGAGTTTGTTTCATGTAGAACGGGCGCAAAGTATAC
+TTAGTTCAATCTTCAATACCTCGTATCATTGTACACCTGCCGGTCACCACCCAACGATGT
+GGGGACGGCGTTGCAACTTC
+>sequence2_lower
+cctcaggtttctgagcgacaaaagctttaaacgggagttcgcgctcataacttggtccga
+atgcgggttcttgcatcgttcgactgagtttgtttcatgtagaacgggcgcaaagtatac
+ttagttcaatcttcaatacctcgtatcattgtacacctgccggtcaccacccaacgatgt
+ggggacggcgttgcaacttc
+>sequence1_mix
+CctCaGgTTTctGaGCGAcAAAagCTttAAaCgGGaGtTCgcGCtcAtAaCTtggTcCGA
+ATgcgGGtTcTTgCAtCGttCgaCtgaGTTTgtttCatGTAgAacGGGCgCAAagTatAC
+ttaGtTCaATCTtCaatACCtcgtAtcATTgTACaCCtGCCGgTcAccaCcCaAcgATgT
+GGggACggCGtTgCAAcTTC
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sequence2.fastq	Mon Nov 12 09:33:34 2018 -0500
@@ -0,0 +1,12 @@
+@sequence2_upper
+CCTCAGGTTTCTGAGCGACAAAAGCTTTAAACGGGAGTTCGCGCTCATAACTTGGTCCGAATGCGGGTTCTTGCATCGTTCGACTGAGTTTGTTTCATGTAGAACGGGCGCAAAGTATACTTAGTTCAATCTTCAATACCTCGTATCATTGTACACCTGCCGGTCACCACCCAACGATGTGGGGACGGCGTTGCAACTTC
++
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+@sequence2_lower
+cctcaggtttctgagcgacaaaagctttaaacgggagttcgcgctcataacttggtccgaatgcgggttcttgcatcgttcgactgagtttgtttcatgtagaacgggcgcaaagtatacttagttcaatcttcaatacctcgtatcattgtacacctgccggtcaccacccaacgatgtggggacggcgttgcaacttc
++
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+@sequence1_mix
+CctCaGgTTTctGaGCGAcAAAagCTttAAaCgGGaGtTCgcGCtcAtAaCTtggTcCGAATgcgGGtTcTTgCAtCGttCgaCtgaGTTTgtttCatGTAgAacGGGCgCAAagTatACttaGtTCaATCTtCaatACCtcgtAtcATTgTACaCCtGCCGgTcAccaCcCaAcgATgTGGggACggCGtTgCAAcTTC
++
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa