comparison fastq_paired_end_joiner.py @ 3:6a7f5da7c76d draft

planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/galaxy_sequence_utils/fastq_paired_end_joiner commit 117bc9911926e06d4541daffbb7b0e27d38d67a7
author devteam
date Wed, 16 Dec 2015 14:30:33 -0500
parents 270a8ed8a300
children
comparison
equal deleted inserted replaced
2:41ab1243e8f9 3:6a7f5da7c76d
52 return self.join_id( t ) 52 return self.join_id( t )
53 53
54 54
55 class FastqJoiner( fq.fastqJoiner ): 55 class FastqJoiner( fq.fastqJoiner ):
56 56
57 def __init__( self, format, force_quality_encoding=None, sep="\t" ): 57 def __init__( self, format, force_quality_encoding=None, sep="\t", paste="" ):
58 super( FastqJoiner, self ).__init__( format, force_quality_encoding ) 58 super( FastqJoiner, self ).__init__( format, force_quality_encoding, paste=paste )
59 self.id_manager = IDManager( sep ) 59 self.id_manager = IDManager( sep )
60 60
61 def join( self, read1, read2 ): 61 def join( self, read1, read2 ):
62 force_quality_encoding = self.force_quality_encoding 62 force_quality_encoding = self.force_quality_encoding
63 if not force_quality_encoding: 63 if not force_quality_encoding:
82 rval.description += rval.identifier[1:] 82 rval.description += rval.identifier[1:]
83 if rval.sequence_space == 'color': 83 if rval.sequence_space == 'color':
84 # convert to nuc space, join, then convert back 84 # convert to nuc space, join, then convert back
85 rval.sequence = rval.convert_base_to_color_space( 85 rval.sequence = rval.convert_base_to_color_space(
86 read1.convert_color_to_base_space( read1.sequence ) + 86 read1.convert_color_to_base_space( read1.sequence ) +
87 self.paste_sequence +
87 read2.convert_color_to_base_space( read2.sequence ) 88 read2.convert_color_to_base_space( read2.sequence )
88 ) 89 )
89 else: 90 else:
90 rval.sequence = read1.sequence + read2.sequence 91 rval.sequence = read1.sequence + self.paste_sequence + read2.sequence
91 if force_quality_encoding == 'ascii': 92 if force_quality_encoding == 'ascii':
92 rval.quality = read1.quality + read2.quality 93 rval.quality = read1.quality + self.paste_ascii_quality + read2.quality
93 else: 94 else:
94 rval.quality = "%s %s" % ( 95 rval.quality = "%s %s" % (
95 read1.quality.strip(), read2.quality.strip() 96 read1.quality.strip(), self.paste_decimal_quality
96 ) 97 )
98 rval.quality = ("%s %s" % (
99 rval.quality.strip(), read2.quality.strip()
100 )).strip()
97 return rval 101 return rval
98 102
99 def get_paired_identifier( self, read ): 103 def get_paired_identifier( self, read ):
100 return self.id_manager.get_paired_identifier( read ) 104 return self.id_manager.get_paired_identifier( read )
101 105
117 input2_filename = sys.argv[3] 121 input2_filename = sys.argv[3]
118 input2_type = sys.argv[4] or 'sanger' 122 input2_type = sys.argv[4] or 'sanger'
119 output_filename = sys.argv[5] 123 output_filename = sys.argv[5]
120 124
121 fastq_style = sys.argv[6] or 'old' 125 fastq_style = sys.argv[6] or 'old'
126
127 paste = sys.argv[7] or ''
122 #-- 128 #--
123 if input1_type != input2_type: 129 if input1_type != input2_type:
124 print "WARNING: You are trying to join files of two different types: %s and %s." % ( input1_type, input2_type ) 130 print "WARNING: You are trying to join files of two different types: %s and %s." % ( input1_type, input2_type )
125 131
126 if fastq_style == 'new': 132 if fastq_style == 'new':
127 sep = sniff_sep( input1_filename ) 133 sep = sniff_sep( input1_filename )
128 joiner = FastqJoiner( input1_type, sep=sep ) 134 joiner = FastqJoiner( input1_type, sep=sep, paste=paste )
129 else: 135 else:
130 joiner = fq.fastqJoiner( input1_type ) 136 joiner = fq.fastqJoiner( input1_type, paste=paste )
131 #-- 137 #--
132 input2 = fq.fastqNamedReader( open( input2_filename, 'rb' ), input2_type ) 138 input2 = fq.fastqNamedReader( open( input2_filename, 'rb' ), input2_type )
133 out = fq.fastqWriter( open( output_filename, 'wb' ), format=input1_type ) 139 out = fq.fastqWriter( open( output_filename, 'wb' ), format=input1_type )
134 i = None 140 i = None
135 skip_count = 0 141 skip_count = 0