comparison extract_genomic_dna.py @ 7:3088e7e70888 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 4a3c9f195ba5d899b1a1ce5e80281cdf230f456a
author iuc
date Mon, 23 Oct 2017 13:26:18 -0400
parents c8467246b57e
children e400dcbc60d0
comparison
equal deleted inserted replaced
6:53db9cb721f1 7:3088e7e70888
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 from __future__ import print_function
3
2 import argparse 4 import argparse
3 import os 5 import os
4 6
5 import bx.seq.nib 7 import bx.seq.nib
6 import bx.seq.twobit 8 import bx.seq.twobit
79 name = fields[name_col] 81 name = fields[name_col]
80 if input_is_gff: 82 if input_is_gff:
81 start, end = egdu.convert_gff_coords_to_bed([start, end]) 83 start, end = egdu.convert_gff_coords_to_bed([start, end])
82 if includes_strand_col: 84 if includes_strand_col:
83 strand = fields[strand_col] 85 strand = fields[strand_col]
84 except: 86 except Exception:
85 warning = "Invalid chrom, start or end column values. " 87 warning = "Invalid chrom, start or end column values. "
86 warnings.append(warning) 88 warnings.append(warning)
87 if not invalid_lines: 89 if not invalid_lines:
88 invalid_lines = egdu.get_lines(feature) 90 invalid_lines = egdu.get_lines(feature)
89 first_invalid_line = line_count 91 first_invalid_line = line_count
127 sequence = '' 129 sequence = ''
128 for interval in feature.intervals: 130 for interval in feature.intervals:
129 sequence += twobitfile[interval.chrom][interval.start:interval.end] 131 sequence += twobitfile[interval.chrom][interval.start:interval.end]
130 else: 132 else:
131 sequence = twobitfile[chrom][start:end] 133 sequence = twobitfile[chrom][start:end]
132 except: 134 except Exception:
133 warning = "Unable to fetch the sequence from '%d' to '%d' for chrom '%s'. " % (start, end - start, chrom) 135 warning = "Unable to fetch the sequence from '%d' to '%d' for chrom '%s'. " % (start, end - start, chrom)
134 warnings.append(warning) 136 warnings.append(warning)
135 if not invalid_lines: 137 if not invalid_lines:
136 invalid_lines = egdu.get_lines(feature) 138 invalid_lines = egdu.get_lines(feature)
137 first_invalid_line = line_count 139 first_invalid_line = line_count
154 skipped_lines += len(invalid_lines) 156 skipped_lines += len(invalid_lines)
155 continue 157 continue
156 if includes_strand_col and strand == "-": 158 if includes_strand_col and strand == "-":
157 sequence = egdu.reverse_complement(sequence) 159 sequence = egdu.reverse_complement(sequence)
158 if args.output_format == "fasta": 160 if args.output_format == "fasta":
159 l = len(sequence)
160 c = 0
161 if input_is_gff: 161 if input_is_gff:
162 start, end = egdu.convert_bed_coords_to_gff([start, end]) 162 start, end = egdu.convert_bed_coords_to_gff([start, end])
163 if args.fasta_header_type == "bedtools_getfasta_default": 163 if args.fasta_header_type == "bedtools_getfasta_default":
164 out.write(">%s\n" % egdu.get_bedtools_getfasta_default_header(str(chrom), 164 out.write(">%s\n" % egdu.get_bedtools_getfasta_default_header(str(chrom),
165 str(start), 165 str(start),
173 meta_data = field_delimiter.join(fields) 173 meta_data = field_delimiter.join(fields)
174 if name.strip(): 174 if name.strip():
175 out.write(">%s %s\n" % (meta_data, name)) 175 out.write(">%s %s\n" % (meta_data, name))
176 else: 176 else:
177 out.write(">%s\n" % meta_data) 177 out.write(">%s\n" % meta_data)
178 while c < l: 178 c = 0
179 b = min(c + 50, l) 179 sequence_length = len(sequence)
180 while c < sequence_length:
181 b = min(c + 50, sequence_length)
180 out.write("%s\n" % str(sequence[c:b])) 182 out.write("%s\n" % str(sequence[c:b]))
181 c = b 183 c = b
182 else: 184 else:
183 # output_format == "interval". 185 # output_format == "interval".
184 if interpret_features: 186 if interpret_features:
207 out.close() 209 out.close()
208 210
209 if warnings: 211 if warnings:
210 warn_msg = "%d warnings, 1st is: " % len(warnings) 212 warn_msg = "%d warnings, 1st is: " % len(warnings)
211 warn_msg += warnings[0] 213 warn_msg += warnings[0]
212 print warn_msg 214 print(warn_msg)
213 if skipped_lines: 215 if skipped_lines:
214 # Error message includes up to the first 10 skipped lines. 216 # Error message includes up to the first 10 skipped lines.
215 print 'Skipped %d invalid lines, 1st is #%d, "%s"' % (skipped_lines, first_invalid_line, '\n'.join(invalid_lines[:10])) 217 print('Skipped %d invalid lines, 1st is #%d, "%s"' % (skipped_lines, first_invalid_line, '\n'.join(invalid_lines[:10])))
216 218
217 if args.reference_genome_source == "history": 219 if args.reference_genome_source == "history":
218 os.remove(seq_path) 220 os.remove(seq_path)