Mercurial > repos > iuc > extract_genomic_dna
comparison extract_genomic_dna.py @ 7:3088e7e70888 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 4a3c9f195ba5d899b1a1ce5e80281cdf230f456a
author | iuc |
---|---|
date | Mon, 23 Oct 2017 13:26:18 -0400 |
parents | c8467246b57e |
children | e400dcbc60d0 |
comparison
equal
deleted
inserted
replaced
6:53db9cb721f1 | 7:3088e7e70888 |
---|---|
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 from __future__ import print_function | |
3 | |
2 import argparse | 4 import argparse |
3 import os | 5 import os |
4 | 6 |
5 import bx.seq.nib | 7 import bx.seq.nib |
6 import bx.seq.twobit | 8 import bx.seq.twobit |
79 name = fields[name_col] | 81 name = fields[name_col] |
80 if input_is_gff: | 82 if input_is_gff: |
81 start, end = egdu.convert_gff_coords_to_bed([start, end]) | 83 start, end = egdu.convert_gff_coords_to_bed([start, end]) |
82 if includes_strand_col: | 84 if includes_strand_col: |
83 strand = fields[strand_col] | 85 strand = fields[strand_col] |
84 except: | 86 except Exception: |
85 warning = "Invalid chrom, start or end column values. " | 87 warning = "Invalid chrom, start or end column values. " |
86 warnings.append(warning) | 88 warnings.append(warning) |
87 if not invalid_lines: | 89 if not invalid_lines: |
88 invalid_lines = egdu.get_lines(feature) | 90 invalid_lines = egdu.get_lines(feature) |
89 first_invalid_line = line_count | 91 first_invalid_line = line_count |
127 sequence = '' | 129 sequence = '' |
128 for interval in feature.intervals: | 130 for interval in feature.intervals: |
129 sequence += twobitfile[interval.chrom][interval.start:interval.end] | 131 sequence += twobitfile[interval.chrom][interval.start:interval.end] |
130 else: | 132 else: |
131 sequence = twobitfile[chrom][start:end] | 133 sequence = twobitfile[chrom][start:end] |
132 except: | 134 except Exception: |
133 warning = "Unable to fetch the sequence from '%d' to '%d' for chrom '%s'. " % (start, end - start, chrom) | 135 warning = "Unable to fetch the sequence from '%d' to '%d' for chrom '%s'. " % (start, end - start, chrom) |
134 warnings.append(warning) | 136 warnings.append(warning) |
135 if not invalid_lines: | 137 if not invalid_lines: |
136 invalid_lines = egdu.get_lines(feature) | 138 invalid_lines = egdu.get_lines(feature) |
137 first_invalid_line = line_count | 139 first_invalid_line = line_count |
154 skipped_lines += len(invalid_lines) | 156 skipped_lines += len(invalid_lines) |
155 continue | 157 continue |
156 if includes_strand_col and strand == "-": | 158 if includes_strand_col and strand == "-": |
157 sequence = egdu.reverse_complement(sequence) | 159 sequence = egdu.reverse_complement(sequence) |
158 if args.output_format == "fasta": | 160 if args.output_format == "fasta": |
159 l = len(sequence) | |
160 c = 0 | |
161 if input_is_gff: | 161 if input_is_gff: |
162 start, end = egdu.convert_bed_coords_to_gff([start, end]) | 162 start, end = egdu.convert_bed_coords_to_gff([start, end]) |
163 if args.fasta_header_type == "bedtools_getfasta_default": | 163 if args.fasta_header_type == "bedtools_getfasta_default": |
164 out.write(">%s\n" % egdu.get_bedtools_getfasta_default_header(str(chrom), | 164 out.write(">%s\n" % egdu.get_bedtools_getfasta_default_header(str(chrom), |
165 str(start), | 165 str(start), |
173 meta_data = field_delimiter.join(fields) | 173 meta_data = field_delimiter.join(fields) |
174 if name.strip(): | 174 if name.strip(): |
175 out.write(">%s %s\n" % (meta_data, name)) | 175 out.write(">%s %s\n" % (meta_data, name)) |
176 else: | 176 else: |
177 out.write(">%s\n" % meta_data) | 177 out.write(">%s\n" % meta_data) |
178 while c < l: | 178 c = 0 |
179 b = min(c + 50, l) | 179 sequence_length = len(sequence) |
180 while c < sequence_length: | |
181 b = min(c + 50, sequence_length) | |
180 out.write("%s\n" % str(sequence[c:b])) | 182 out.write("%s\n" % str(sequence[c:b])) |
181 c = b | 183 c = b |
182 else: | 184 else: |
183 # output_format == "interval". | 185 # output_format == "interval". |
184 if interpret_features: | 186 if interpret_features: |
207 out.close() | 209 out.close() |
208 | 210 |
209 if warnings: | 211 if warnings: |
210 warn_msg = "%d warnings, 1st is: " % len(warnings) | 212 warn_msg = "%d warnings, 1st is: " % len(warnings) |
211 warn_msg += warnings[0] | 213 warn_msg += warnings[0] |
212 print warn_msg | 214 print(warn_msg) |
213 if skipped_lines: | 215 if skipped_lines: |
214 # Error message includes up to the first 10 skipped lines. | 216 # Error message includes up to the first 10 skipped lines. |
215 print 'Skipped %d invalid lines, 1st is #%d, "%s"' % (skipped_lines, first_invalid_line, '\n'.join(invalid_lines[:10])) | 217 print('Skipped %d invalid lines, 1st is #%d, "%s"' % (skipped_lines, first_invalid_line, '\n'.join(invalid_lines[:10]))) |
216 | 218 |
217 if args.reference_genome_source == "history": | 219 if args.reference_genome_source == "history": |
218 os.remove(seq_path) | 220 os.remove(seq_path) |