# HG changeset patch
# User devteam
# Date 1390832744 18000
# Node ID 2793d1d765b95959197d92cbdf8898bf94c534b0
Imported from capsule None
diff -r 000000000000 -r 2793d1d765b9 fastq_paired_end_joiner.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fastq_paired_end_joiner.py	Mon Jan 27 09:25:44 2014 -0500
@@ -0,0 +1,38 @@
+#Dan Blankenberg
+import sys, os, shutil
+from galaxy_utils.sequence.fastq import fastqReader, fastqNamedReader, fastqWriter, fastqJoiner
+
+def main():
+    #Read command line arguments
+    input1_filename = sys.argv[1]
+    input1_type = sys.argv[2] or 'sanger'
+    input2_filename = sys.argv[3]
+    input2_type = sys.argv[4] or 'sanger'
+    output_filename = sys.argv[5]
+    
+    if input1_type != input2_type:
+        print "WARNING: You are trying to join files of two different types: %s and %s." % ( input1_type, input2_type )
+    
+    input2 = fastqNamedReader( open( input2_filename, 'rb' ), input2_type )
+    joiner = fastqJoiner( input1_type )
+    out = fastqWriter( open( output_filename, 'wb' ), format = input1_type )
+    
+    i = None
+    skip_count = 0
+    for i, fastq_read in enumerate( fastqReader( open( input1_filename, 'rb' ), format = input1_type ) ):
+        identifier = joiner.get_paired_identifier( fastq_read )
+        fastq_paired = input2.get( identifier )
+        if fastq_paired is None:
+            skip_count += 1
+        else:
+            out.write( joiner.join( fastq_read, fastq_paired ) )
+    out.close()
+    
+    if i is None:
+        print "Your file contains no valid FASTQ reads."
+    else:
+        print input2.has_data()
+        print 'Joined %s of %s read pairs (%.2f%%).' % ( i - skip_count + 1, i + 1, float( i - skip_count + 1 ) / float( i + 1 ) * 100.0 )
+
+if __name__ == "__main__":
+    main()
diff -r 000000000000 -r 2793d1d765b9 fastq_paired_end_joiner.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fastq_paired_end_joiner.xml	Mon Jan 27 09:25:44 2014 -0500
@@ -0,0 +1,65 @@
+
+  on paired end reads
+  
+    galaxy_sequence_utils
+  
+  fastq_paired_end_joiner.py '$input1_file' '${input1_file.extension[len( 'fastq' ):]}' '$input2_file' '${input2_file.extension[len( 'fastq' ):]}' '$output_file'
+  
+    
+    
+  
+  
+    
+  
+  
+    
+      
+      
+      
+    
+  
+  
+**What it does**
+
+This tool joins paired end FASTQ reads from two separate files into a single read in one file. The join is performed using sequence identifiers, allowing the two files to contain differing ordering. If a sequence identifier does not appear in both files, it is excluded from the output.
+
+Sequence identifiers with /1 and /2 appended override the left-hand and right-hand designation; i.e. if the reads end with /1 and /2, the read containing /1 will be used as the left-hand read and the read containing /2 will be used as the right-hand read. Sequences without this designation will follow the left-hand and right-hand settings set by the user.
+
+-----
+
+**Input formats**
+
+Left-hand Read::
+
+    @HWI-EAS91_1_30788AAXX:7:21:1542:1758/1
+    GTCAATTGTACTGGTCAATACTAAAAGAATAGGATC
+    +HWI-EAS91_1_30788AAXX:7:21:1542:1758/1
+    hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+
+Right-hand Read::
+
+    @HWI-EAS91_1_30788AAXX:7:21:1542:1758/2
+    GCTCCTAGCATCTGGAGTCTCTATCACCTGAGCCCA
+    +HWI-EAS91_1_30788AAXX:7:21:1542:1758/2
+    hhhhhhhhhhhhhhhhhhhhhhhh`hfhhVZSWehR
+
+-----
+
+**Output**
+
+A multiple-fastq file, for example::
+
+    @HWI-EAS91_1_30788AAXX:7:21:1542:1758
+    GTCAATTGTACTGGTCAATACTAAAAGAATAGGATCGCTCCTAGCATCTGGAGTCTCTATCACCTGAGCCCA
+    +HWI-EAS91_1_30788AAXX:7:21:1542:1758
+    hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh`hfhhVZSWehR
+
+------
+
+**Citation**
+
+If you use this tool, please cite `Blankenberg D, Gordon A, Von Kuster G, Coraor N, Taylor J, Nekrutenko A; Galaxy Team. Manipulation of FASTQ data with Galaxy. Bioinformatics. 2010 Jul 15;26(14):1783-5. <http://www.ncbi.nlm.nih.gov/pubmed/20562416>`_
+
+
+  
+
diff -r 000000000000 -r 2793d1d765b9 test-data/3.fastqsanger
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/3.fastqsanger	Mon Jan 27 09:25:44 2014 -0500
@@ -0,0 +1,20 @@
+@HWI-EAS91_1_30788AAXX:7:21:1542:1758
+GTCAATTGTACTGGTCAATACTAAAAGAATAGGATCGCTCCTAGCATCTGGAGTCTCTATCACCTGAGCCCA
++HWI-EAS91_1_30788AAXX:7:21:1542:1758
+hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh`hfhhVZSWehR
+@HWI-EAS91_1_30788AAXX:7:22:1621:462
+ATAATGGCTATTATTGTGGGGGGGATGATGCTGGAAACTAGCCCCAATATCAATCCTATATCAAATCTCACC
++HWI-EAS91_1_30788AAXX:7:22:1621:462
+hhhhhhhhhhhhQAhh@hhhhNhhhfhMbCIScC?hhJhhhhChhhJhhhRhhKhePhc\KhhV\KhXhJhh
+@HWI-EAS91_1_30788AAXX:7:45:408:807
+TACCCGATTTTTTGCTTTCCACTTTATCCTACCCTTATGAGTGCTAGGATCAGGATGGAGAGGATTAGGGCT
++HWI-EAS91_1_30788AAXX:7:45:408:807
+hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh`hhhZh`hhhhhRXhhYh
+@HWI-EAS91_1_30788AAXX:7:49:654:1439
+CTAACTCTATTTATTGTATTTCAACTAAAAATCTCATAGGTTTATTGATAGTTGTGTTGTTGGTGTAAATGG
++HWI-EAS91_1_30788AAXX:7:49:654:1439
+hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhdhh_hG\XhU@
+@HWI-EAS91_1_30788AAXX:7:64:947:234
+TATCAAAAAAGAATATAATCTGAATCAACACTACAACCTATTAGTGTGTAGAATAGGAAGTAGAGGCCTGCG
++HWI-EAS91_1_30788AAXX:7:64:947:234
+hhhhhhhhhhhhhhhhhhhhhhhRhhehhahhhhhJhhhhhhhh^hPhWfhhhhThWUhhfhh_hhNIVPUd
diff -r 000000000000 -r 2793d1d765b9 test-data/split_pair_reads_1.fastqsanger
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/split_pair_reads_1.fastqsanger	Mon Jan 27 09:25:44 2014 -0500
@@ -0,0 +1,20 @@
+@HWI-EAS91_1_30788AAXX:7:21:1542:1758/1
+GTCAATTGTACTGGTCAATACTAAAAGAATAGGATC
++HWI-EAS91_1_30788AAXX:7:21:1542:1758/1
+hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+@HWI-EAS91_1_30788AAXX:7:22:1621:462/1
+ATAATGGCTATTATTGTGGGGGGGATGATGCTGGAA
++HWI-EAS91_1_30788AAXX:7:22:1621:462/1
+hhhhhhhhhhhhQAhh@hhhhNhhhfhMbCIScC?h
+@HWI-EAS91_1_30788AAXX:7:45:408:807/1
+TACCCGATTTTTTGCTTTCCACTTTATCCTACCCTT
++HWI-EAS91_1_30788AAXX:7:45:408:807/1
+hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+@HWI-EAS91_1_30788AAXX:7:49:654:1439/1
+CTAACTCTATTTATTGTATTTCAACTAAAAATCTCA
++HWI-EAS91_1_30788AAXX:7:49:654:1439/1
+hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
+@HWI-EAS91_1_30788AAXX:7:64:947:234/1
+TATCAAAAAAGAATATAATCTGAATCAACACTACAA
++HWI-EAS91_1_30788AAXX:7:64:947:234/1
+hhhhhhhhhhhhhhhhhhhhhhhRhhehhahhhhhJ
diff -r 000000000000 -r 2793d1d765b9 test-data/split_pair_reads_2.fastqsanger
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/split_pair_reads_2.fastqsanger	Mon Jan 27 09:25:44 2014 -0500
@@ -0,0 +1,20 @@
+@HWI-EAS91_1_30788AAXX:7:21:1542:1758/2
+GCTCCTAGCATCTGGAGTCTCTATCACCTGAGCCCA
++HWI-EAS91_1_30788AAXX:7:21:1542:1758/2
+hhhhhhhhhhhhhhhhhhhhhhhh`hfhhVZSWehR
+@HWI-EAS91_1_30788AAXX:7:22:1621:462/2
+ACTAGCCCCAATATCAATCCTATATCAAATCTCACC
++HWI-EAS91_1_30788AAXX:7:22:1621:462/2
+hJhhhhChhhJhhhRhhKhePhc\KhhV\KhXhJhh
+@HWI-EAS91_1_30788AAXX:7:45:408:807/2
+ATGAGTGCTAGGATCAGGATGGAGAGGATTAGGGCT
++HWI-EAS91_1_30788AAXX:7:45:408:807/2
+hhhhhhhhhhhhhhhhhh`hhhZh`hhhhhRXhhYh
+@HWI-EAS91_1_30788AAXX:7:49:654:1439/2
+TAGGTTTATTGATAGTTGTGTTGTTGGTGTAAATGG
++HWI-EAS91_1_30788AAXX:7:49:654:1439/2
+hhhhhhhhhhhhhhhhhhhhhhhhhdhh_hG\XhU@
+@HWI-EAS91_1_30788AAXX:7:64:947:234/2
+CCTATTAGTGTGTAGAATAGGAAGTAGAGGCCTGCG
++HWI-EAS91_1_30788AAXX:7:64:947:234/2
+hhhhhhhh^hPhWfhhhhThWUhhfhh_hhNIVPUd
diff -r 000000000000 -r 2793d1d765b9 tool_dependencies.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Mon Jan 27 09:25:44 2014 -0500
@@ -0,0 +1,6 @@
+
+
+  
+      
+    
+