Mercurial > repos > devteam > fastq_paired_end_joiner
comparison fastq_paired_end_joiner.py @ 0:2793d1d765b9 draft
Imported from capsule None
author | devteam |
---|---|
date | Mon, 27 Jan 2014 09:25:44 -0500 |
parents | |
children | 270a8ed8a300 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:2793d1d765b9 |
---|---|
1 #Dan Blankenberg | |
2 import sys, os, shutil | |
3 from galaxy_utils.sequence.fastq import fastqReader, fastqNamedReader, fastqWriter, fastqJoiner | |
4 | |
5 def main(): | |
6 #Read command line arguments | |
7 input1_filename = sys.argv[1] | |
8 input1_type = sys.argv[2] or 'sanger' | |
9 input2_filename = sys.argv[3] | |
10 input2_type = sys.argv[4] or 'sanger' | |
11 output_filename = sys.argv[5] | |
12 | |
13 if input1_type != input2_type: | |
14 print "WARNING: You are trying to join files of two different types: %s and %s." % ( input1_type, input2_type ) | |
15 | |
16 input2 = fastqNamedReader( open( input2_filename, 'rb' ), input2_type ) | |
17 joiner = fastqJoiner( input1_type ) | |
18 out = fastqWriter( open( output_filename, 'wb' ), format = input1_type ) | |
19 | |
20 i = None | |
21 skip_count = 0 | |
22 for i, fastq_read in enumerate( fastqReader( open( input1_filename, 'rb' ), format = input1_type ) ): | |
23 identifier = joiner.get_paired_identifier( fastq_read ) | |
24 fastq_paired = input2.get( identifier ) | |
25 if fastq_paired is None: | |
26 skip_count += 1 | |
27 else: | |
28 out.write( joiner.join( fastq_read, fastq_paired ) ) | |
29 out.close() | |
30 | |
31 if i is None: | |
32 print "Your file contains no valid FASTQ reads." | |
33 else: | |
34 print input2.has_data() | |
35 print 'Joined %s of %s read pairs (%.2f%%).' % ( i - skip_count + 1, i + 1, float( i - skip_count + 1 ) / float( i + 1 ) * 100.0 ) | |
36 | |
37 if __name__ == "__main__": | |
38 main() |