Mercurial > repos > devteam > fastq_groomer
comparison fastq_groomer.py @ 0:1298445c852b draft
Imported from capsule None
| author | devteam |
|---|---|
| date | Mon, 27 Jan 2014 09:27:58 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1298445c852b |
|---|---|
| 1 #Dan Blankenberg | |
| 2 import sys | |
| 3 from galaxy_utils.sequence.fastq import fastqReader, fastqVerboseErrorReader, fastqAggregator, fastqWriter | |
| 4 | |
| 5 def main(): | |
| 6 input_filename = sys.argv[1] | |
| 7 input_type = sys.argv[2] | |
| 8 output_filename = sys.argv[3] | |
| 9 output_type = sys.argv[4] | |
| 10 force_quality_encoding = sys.argv[5] | |
| 11 summarize_input = sys.argv[6] == 'summarize_input' | |
| 12 if force_quality_encoding == 'None': | |
| 13 force_quality_encoding = None | |
| 14 | |
| 15 aggregator = fastqAggregator() | |
| 16 out = fastqWriter( open( output_filename, 'wb' ), format = output_type, force_quality_encoding = force_quality_encoding ) | |
| 17 read_count = None | |
| 18 if summarize_input: | |
| 19 reader = fastqVerboseErrorReader | |
| 20 else: | |
| 21 reader = fastqReader | |
| 22 for read_count, fastq_read in enumerate( reader( open( input_filename ), format = input_type, apply_galaxy_conventions = True ) ): | |
| 23 if summarize_input: | |
| 24 aggregator.consume_read( fastq_read ) | |
| 25 out.write( fastq_read ) | |
| 26 out.close() | |
| 27 | |
| 28 if read_count is not None: | |
| 29 print "Groomed %i %s reads into %s reads." % ( read_count + 1, input_type, output_type ) | |
| 30 if input_type != output_type and 'solexa' in [ input_type, output_type ]: | |
| 31 print "Converted between Solexa and PHRED scores." | |
| 32 if summarize_input: | |
| 33 print "Based upon quality and sequence, the input data is valid for: %s" % ( ", ".join( aggregator.get_valid_formats() ) or "None" ) | |
| 34 ascii_range = aggregator.get_ascii_range() | |
| 35 decimal_range = aggregator.get_decimal_range() | |
| 36 print "Input ASCII range: %s(%i) - %s(%i)" % ( repr( ascii_range[0] ), ord( ascii_range[0] ), repr( ascii_range[1] ), ord( ascii_range[1] ) ) #print using repr, since \x00 (null) causes info truncation in galaxy when printed | |
| 37 print "Input decimal range: %i - %i" % ( decimal_range[0], decimal_range[1] ) | |
| 38 else: | |
| 39 print "No valid FASTQ reads were provided." | |
| 40 | |
| 41 | |
| 42 if __name__ == "__main__": main() |
