# HG changeset patch # User devteam # Date 1390832744 18000 # Node ID 2793d1d765b95959197d92cbdf8898bf94c534b0 Imported from capsule None diff -r 000000000000 -r 2793d1d765b9 fastq_paired_end_joiner.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fastq_paired_end_joiner.py Mon Jan 27 09:25:44 2014 -0500 @@ -0,0 +1,38 @@ +#Dan Blankenberg +import sys, os, shutil +from galaxy_utils.sequence.fastq import fastqReader, fastqNamedReader, fastqWriter, fastqJoiner + +def main(): + #Read command line arguments + input1_filename = sys.argv[1] + input1_type = sys.argv[2] or 'sanger' + input2_filename = sys.argv[3] + input2_type = sys.argv[4] or 'sanger' + output_filename = sys.argv[5] + + if input1_type != input2_type: + print "WARNING: You are trying to join files of two different types: %s and %s." % ( input1_type, input2_type ) + + input2 = fastqNamedReader( open( input2_filename, 'rb' ), input2_type ) + joiner = fastqJoiner( input1_type ) + out = fastqWriter( open( output_filename, 'wb' ), format = input1_type ) + + i = None + skip_count = 0 + for i, fastq_read in enumerate( fastqReader( open( input1_filename, 'rb' ), format = input1_type ) ): + identifier = joiner.get_paired_identifier( fastq_read ) + fastq_paired = input2.get( identifier ) + if fastq_paired is None: + skip_count += 1 + else: + out.write( joiner.join( fastq_read, fastq_paired ) ) + out.close() + + if i is None: + print "Your file contains no valid FASTQ reads." + else: + print input2.has_data() + print 'Joined %s of %s read pairs (%.2f%%).' % ( i - skip_count + 1, i + 1, float( i - skip_count + 1 ) / float( i + 1 ) * 100.0 ) + +if __name__ == "__main__": + main() diff -r 000000000000 -r 2793d1d765b9 fastq_paired_end_joiner.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fastq_paired_end_joiner.xml Mon Jan 27 09:25:44 2014 -0500 @@ -0,0 +1,65 @@ + + on paired end reads + + galaxy_sequence_utils + + fastq_paired_end_joiner.py '$input1_file' '${input1_file.extension[len( 'fastq' ):]}' '$input2_file' '${input2_file.extension[len( 'fastq' ):]}' '$output_file' + + + + + + + + + + + + + + + +**What it does** + +This tool joins paired end FASTQ reads from two separate files into a single read in one file. The join is performed using sequence identifiers, allowing the two files to contain differing ordering. If a sequence identifier does not appear in both files, it is excluded from the output. + +Sequence identifiers with /1 and /2 appended override the left-hand and right-hand designation; i.e. if the reads end with /1 and /2, the read containing /1 will be used as the left-hand read and the read containing /2 will be used as the right-hand read. Sequences without this designation will follow the left-hand and right-hand settings set by the user. + +----- + +**Input formats** + +Left-hand Read:: + + @HWI-EAS91_1_30788AAXX:7:21:1542:1758/1 + GTCAATTGTACTGGTCAATACTAAAAGAATAGGATC + +HWI-EAS91_1_30788AAXX:7:21:1542:1758/1 + hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh + +Right-hand Read:: + + @HWI-EAS91_1_30788AAXX:7:21:1542:1758/2 + GCTCCTAGCATCTGGAGTCTCTATCACCTGAGCCCA + +HWI-EAS91_1_30788AAXX:7:21:1542:1758/2 + hhhhhhhhhhhhhhhhhhhhhhhh`hfhhVZSWehR + +----- + +**Output** + +A multiple-fastq file, for example:: + + @HWI-EAS91_1_30788AAXX:7:21:1542:1758 + GTCAATTGTACTGGTCAATACTAAAAGAATAGGATCGCTCCTAGCATCTGGAGTCTCTATCACCTGAGCCCA + +HWI-EAS91_1_30788AAXX:7:21:1542:1758 + hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh`hfhhVZSWehR + +------ + +**Citation** + +If you use this tool, please cite `Blankenberg D, Gordon A, Von Kuster G, Coraor N, Taylor J, Nekrutenko A; Galaxy Team. Manipulation of FASTQ data with Galaxy. Bioinformatics. 2010 Jul 15;26(14):1783-5. <http://www.ncbi.nlm.nih.gov/pubmed/20562416>`_ + + + + diff -r 000000000000 -r 2793d1d765b9 test-data/3.fastqsanger --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/3.fastqsanger Mon Jan 27 09:25:44 2014 -0500 @@ -0,0 +1,20 @@ +@HWI-EAS91_1_30788AAXX:7:21:1542:1758 +GTCAATTGTACTGGTCAATACTAAAAGAATAGGATCGCTCCTAGCATCTGGAGTCTCTATCACCTGAGCCCA ++HWI-EAS91_1_30788AAXX:7:21:1542:1758 +hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh`hfhhVZSWehR +@HWI-EAS91_1_30788AAXX:7:22:1621:462 +ATAATGGCTATTATTGTGGGGGGGATGATGCTGGAAACTAGCCCCAATATCAATCCTATATCAAATCTCACC ++HWI-EAS91_1_30788AAXX:7:22:1621:462 +hhhhhhhhhhhhQAhh@hhhhNhhhfhMbCIScC?hhJhhhhChhhJhhhRhhKhePhc\KhhV\KhXhJhh +@HWI-EAS91_1_30788AAXX:7:45:408:807 +TACCCGATTTTTTGCTTTCCACTTTATCCTACCCTTATGAGTGCTAGGATCAGGATGGAGAGGATTAGGGCT ++HWI-EAS91_1_30788AAXX:7:45:408:807 +hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh`hhhZh`hhhhhRXhhYh +@HWI-EAS91_1_30788AAXX:7:49:654:1439 +CTAACTCTATTTATTGTATTTCAACTAAAAATCTCATAGGTTTATTGATAGTTGTGTTGTTGGTGTAAATGG ++HWI-EAS91_1_30788AAXX:7:49:654:1439 +hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhdhh_hG\XhU@ +@HWI-EAS91_1_30788AAXX:7:64:947:234 +TATCAAAAAAGAATATAATCTGAATCAACACTACAACCTATTAGTGTGTAGAATAGGAAGTAGAGGCCTGCG ++HWI-EAS91_1_30788AAXX:7:64:947:234 +hhhhhhhhhhhhhhhhhhhhhhhRhhehhahhhhhJhhhhhhhh^hPhWfhhhhThWUhhfhh_hhNIVPUd diff -r 000000000000 -r 2793d1d765b9 test-data/split_pair_reads_1.fastqsanger --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/split_pair_reads_1.fastqsanger Mon Jan 27 09:25:44 2014 -0500 @@ -0,0 +1,20 @@ +@HWI-EAS91_1_30788AAXX:7:21:1542:1758/1 +GTCAATTGTACTGGTCAATACTAAAAGAATAGGATC ++HWI-EAS91_1_30788AAXX:7:21:1542:1758/1 +hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh +@HWI-EAS91_1_30788AAXX:7:22:1621:462/1 +ATAATGGCTATTATTGTGGGGGGGATGATGCTGGAA ++HWI-EAS91_1_30788AAXX:7:22:1621:462/1 +hhhhhhhhhhhhQAhh@hhhhNhhhfhMbCIScC?h +@HWI-EAS91_1_30788AAXX:7:45:408:807/1 +TACCCGATTTTTTGCTTTCCACTTTATCCTACCCTT ++HWI-EAS91_1_30788AAXX:7:45:408:807/1 +hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh +@HWI-EAS91_1_30788AAXX:7:49:654:1439/1 +CTAACTCTATTTATTGTATTTCAACTAAAAATCTCA ++HWI-EAS91_1_30788AAXX:7:49:654:1439/1 +hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh +@HWI-EAS91_1_30788AAXX:7:64:947:234/1 +TATCAAAAAAGAATATAATCTGAATCAACACTACAA ++HWI-EAS91_1_30788AAXX:7:64:947:234/1 +hhhhhhhhhhhhhhhhhhhhhhhRhhehhahhhhhJ diff -r 000000000000 -r 2793d1d765b9 test-data/split_pair_reads_2.fastqsanger --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/split_pair_reads_2.fastqsanger Mon Jan 27 09:25:44 2014 -0500 @@ -0,0 +1,20 @@ +@HWI-EAS91_1_30788AAXX:7:21:1542:1758/2 +GCTCCTAGCATCTGGAGTCTCTATCACCTGAGCCCA ++HWI-EAS91_1_30788AAXX:7:21:1542:1758/2 +hhhhhhhhhhhhhhhhhhhhhhhh`hfhhVZSWehR +@HWI-EAS91_1_30788AAXX:7:22:1621:462/2 +ACTAGCCCCAATATCAATCCTATATCAAATCTCACC ++HWI-EAS91_1_30788AAXX:7:22:1621:462/2 +hJhhhhChhhJhhhRhhKhePhc\KhhV\KhXhJhh +@HWI-EAS91_1_30788AAXX:7:45:408:807/2 +ATGAGTGCTAGGATCAGGATGGAGAGGATTAGGGCT ++HWI-EAS91_1_30788AAXX:7:45:408:807/2 +hhhhhhhhhhhhhhhhhh`hhhZh`hhhhhRXhhYh +@HWI-EAS91_1_30788AAXX:7:49:654:1439/2 +TAGGTTTATTGATAGTTGTGTTGTTGGTGTAAATGG ++HWI-EAS91_1_30788AAXX:7:49:654:1439/2 +hhhhhhhhhhhhhhhhhhhhhhhhhdhh_hG\XhU@ +@HWI-EAS91_1_30788AAXX:7:64:947:234/2 +CCTATTAGTGTGTAGAATAGGAAGTAGAGGCCTGCG ++HWI-EAS91_1_30788AAXX:7:64:947:234/2 +hhhhhhhh^hPhWfhhhhThWUhhfhh_hhNIVPUd diff -r 000000000000 -r 2793d1d765b9 tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Mon Jan 27 09:25:44 2014 -0500 @@ -0,0 +1,6 @@ + + + + + +