Mercurial > repos > iuc > extract_genomic_dna
annotate extract_genomic_dna_utils.py @ 11:80414c33a59a draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 6db2d98b513e4980788fcba49d809c91e5750296
| author | iuc | 
|---|---|
| date | Thu, 21 Nov 2024 07:20:29 +0000 | 
| parents | 3088e7e70888 | 
| children | 
| rev | line source | 
|---|---|
| 0 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 1 import copy | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 2 import os | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 3 import subprocess | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 4 import sys | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 5 import tempfile | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 6 | 
| 3 
b71579ad576c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit bd4fbe120288bf8452e479cbd82aa1bbf5c4bd31
 iuc parents: 
2diff
changeset | 7 from bx.intervals.io import Comment, GenomicInterval, Header | 
| 0 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 8 from bx.intervals.io import GenomicIntervalReader, NiceReaderWrapper, ParseError | 
| 7 
3088e7e70888
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 4a3c9f195ba5d899b1a1ce5e80281cdf230f456a
 iuc parents: 
3diff
changeset | 9 from six import Iterator | 
| 0 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 10 | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 11 # Default chrom, start, end, strand cols for a bed file | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 12 BED_DEFAULT_COLS = 0, 1, 2, 5 | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 13 | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 14 | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 15 class GFFInterval(GenomicInterval): | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 16 """ | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 17 A GFF interval, including attributes. If file is strictly a GFF file, | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 18 only attribute is 'group.' | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 19 """ | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 20 | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 21 def __init__(self, reader, fields, chrom_col=0, feature_col=2, start_col=3, end_col=4, | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 22 strand_col=6, score_col=5, default_strand='.', fix_strand=False): | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 23 # GFF format allows '.' for strand but GenomicInterval does not. To get around this, | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 24 # temporarily set strand and then unset after initing GenomicInterval. | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 25 unknown_strand = False | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 26 if not fix_strand and fields[strand_col] == '.': | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 27 unknown_strand = True | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 28 fields[strand_col] = '+' | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 29 GenomicInterval.__init__(self, reader, fields, chrom_col, start_col, end_col, | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 30 strand_col, default_strand, fix_strand=fix_strand) | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 31 if unknown_strand: | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 32 self.strand = '.' | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 33 self.fields[strand_col] = '.' | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 34 # Handle feature, score column. | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 35 self.feature_col = feature_col | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 36 if self.feature_col >= self.nfields: | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 37 stop_err("No field for feature_col (%d)" % feature_col) | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 38 self.feature = self.fields[self.feature_col] | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 39 self.score_col = score_col | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 40 if self.score_col >= self.nfields: | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 41 stop_err("No field for score_col (%d)" % score_col) | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 42 self.score = self.fields[self.score_col] | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 43 # GFF attributes. | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 44 self.attributes = parse_gff_attributes(fields[8]) | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 45 | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 46 def copy(self): | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 47 return GFFInterval(self.reader, list(self.fields), self.chrom_col, self.feature_col, | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 48 self.start_col, self.end_col, self.strand_col, self.score_col, self.strand) | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 49 | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 50 | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 51 class GFFFeature(GFFInterval): | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 52 """ | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 53 A GFF feature, which can include multiple intervals. | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 54 """ | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 55 | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 56 def __init__(self, reader, chrom_col=0, feature_col=2, start_col=3, end_col=4, strand_col=6, | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 57 score_col=5, default_strand='.', fix_strand=False, intervals=[], raw_size=0): | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 58 # Use copy so that first interval and feature do not share fields. | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 59 GFFInterval.__init__(self, reader, copy.deepcopy(intervals[0].fields), chrom_col, feature_col, | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 60 start_col, end_col, strand_col, score_col, default_strand, fix_strand=fix_strand) | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 61 self.intervals = intervals | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 62 self.raw_size = raw_size | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 63 # Use intervals to set feature attributes. | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 64 for interval in self.intervals: | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 65 # Error checking. NOTE: intervals need not share the same strand. | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 66 if interval.chrom != self.chrom: | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 67 stop_err("interval chrom does not match self chrom: %s != %s" % (interval.chrom, self.chrom)) | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 68 # Set start, end of interval. | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 69 if interval.start < self.start: | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 70 self.start = interval.start | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 71 if interval.end > self.end: | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 72 self.end = interval.end | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 73 | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 74 def name(self): | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 75 """ | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 76 Returns feature's name. | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 77 """ | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 78 name = None | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 79 # Preference for name: | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 80 # GTF: 'gene_id', 'transcript_id' | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 81 # GFF3: 'ID', 'id' | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 82 # GFF: 'group' | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 83 for attr_name in ['gene_id', 'transcript_id', 'ID', 'id', 'group']: | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 84 name = self.attributes.get(attr_name, None) | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 85 if name is not None: | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 86 break | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 87 return name | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 88 | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 89 def copy(self): | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 90 intervals_copy = [] | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 91 for interval in self.intervals: | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 92 intervals_copy.append(interval.copy()) | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 93 return GFFFeature(self.reader, self.chrom_col, self.feature_col, self.start_col, self.end_col, | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 94 self.strand_col, self.score_col, self.strand, intervals=intervals_copy) | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 95 | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 96 def lines(self): | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 97 lines = [] | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 98 for interval in self.intervals: | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 99 lines.append('\t'.join(interval.fields)) | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 100 return lines | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 101 | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 102 | 
| 7 
3088e7e70888
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 4a3c9f195ba5d899b1a1ce5e80281cdf230f456a
 iuc parents: 
3diff
changeset | 103 class GFFReaderWrapper(Iterator, NiceReaderWrapper): | 
| 0 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 104 """ | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 105 Reader wrapper for GFF files which has two major functions: | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 106 1. group entries for GFF file (via group column), GFF3 (via id attribute), | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 107 or GTF (via gene_id/transcript id); | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 108 2. convert coordinates from GFF format--starting and ending coordinates | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 109 are 1-based, closed--to the 'traditional'/BED interval format--0 based, | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 110 half-open. This is useful when using GFF files as inputs to tools that | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 111 expect traditional interval format. | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 112 """ | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 113 | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 114 def __init__(self, reader, chrom_col=0, feature_col=2, start_col=3, end_col=4, strand_col=6, | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 115 score_col=5, fix_strand=False, convert_to_bed_coord=False, **kwargs): | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 116 NiceReaderWrapper.__init__(self, reader, chrom_col=chrom_col, start_col=start_col, end_col=end_col, | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 117 strand_col=strand_col, fix_strand=fix_strand, **kwargs) | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 118 self.feature_col = feature_col | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 119 self.score_col = score_col | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 120 self.convert_to_bed_coord = convert_to_bed_coord | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 121 self.last_line = None | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 122 self.cur_offset = 0 | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 123 self.seed_interval = None | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 124 self.seed_interval_line_len = 0 | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 125 | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 126 def parse_row(self, line): | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 127 interval = GFFInterval(self, line.split("\t"), self.chrom_col, self.feature_col, self.start_col, | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 128 self.end_col, self.strand_col, self.score_col, self.default_strand, | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 129 fix_strand=self.fix_strand) | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 130 return interval | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 131 | 
| 7 
3088e7e70888
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 4a3c9f195ba5d899b1a1ce5e80281cdf230f456a
 iuc parents: 
3diff
changeset | 132 def __next__(self): | 
| 0 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 133 """ | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 134 Returns next GFFFeature. | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 135 """ | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 136 | 
| 7 
3088e7e70888
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 4a3c9f195ba5d899b1a1ce5e80281cdf230f456a
 iuc parents: 
3diff
changeset | 137 def handle_parse_error(e): | 
| 0 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 138 """ | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 139 Actions to take when ParseError found. | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 140 """ | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 141 if self.outstream: | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 142 if self.print_delegate and hasattr(self.print_delegate, "__call__"): | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 143 self.print_delegate(self.outstream, e, self) | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 144 self.skipped += 1 | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 145 # No reason to stuff an entire bad file into memory. | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 146 if self.skipped < 10: | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 147 self.skipped_lines.append((self.linenum, self.current_line, str(e))) | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 148 # Get next GFFFeature | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 149 raw_size = self.seed_interval_line_len | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 150 # If there is no seed interval, set one. Also, if there are no more | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 151 # intervals to read, this is where iterator dies. | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 152 if not self.seed_interval: | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 153 while not self.seed_interval: | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 154 try: | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 155 self.seed_interval = GenomicIntervalReader.next(self) | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 156 except ParseError as e: | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 157 handle_parse_error(e) | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 158 finally: | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 159 raw_size += len(self.current_line) | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 160 # If header or comment, clear seed interval and return it with its size. | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 161 if isinstance(self.seed_interval, (Header, Comment)): | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 162 return_val = self.seed_interval | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 163 return_val.raw_size = len(self.current_line) | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 164 self.seed_interval = None | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 165 self.seed_interval_line_len = 0 | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 166 return return_val | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 167 # Initialize feature identifier from seed. | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 168 # For GFF. | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 169 feature_group = self.seed_interval.attributes.get('group', None) | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 170 # For GFF3 | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 171 feature_id = self.seed_interval.attributes.get('ID', None) | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 172 # For GTF. | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 173 feature_transcript_id = self.seed_interval.attributes.get('transcript_id', None) | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 174 # Read all intervals associated with seed. | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 175 feature_intervals = [] | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 176 feature_intervals.append(self.seed_interval) | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 177 while True: | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 178 try: | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 179 interval = GenomicIntervalReader.next(self) | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 180 raw_size += len(self.current_line) | 
| 11 
80414c33a59a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 6db2d98b513e4980788fcba49d809c91e5750296
 iuc parents: 
7diff
changeset | 181 except StopIteration: | 
| 0 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 182 # No more intervals to read, but last feature needs to be | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 183 # returned. | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 184 interval = None | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 185 raw_size += len(self.current_line) | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 186 break | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 187 except ParseError as e: | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 188 handle_parse_error(e) | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 189 raw_size += len(self.current_line) | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 190 continue | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 191 # Ignore comments. | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 192 if isinstance(interval, Comment): | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 193 continue | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 194 # Determine if interval is part of feature. | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 195 part_of = False | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 196 group = interval.attributes.get('group', None) | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 197 # GFF test: | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 198 if group and feature_group == group: | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 199 part_of = True | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 200 # GFF3 test: | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 201 parent_id = interval.attributes.get('Parent', None) | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 202 cur_id = interval.attributes.get('ID', None) | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 203 if (cur_id and cur_id == feature_id) or (parent_id and parent_id == feature_id): | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 204 part_of = True | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 205 # GTF test: | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 206 transcript_id = interval.attributes.get('transcript_id', None) | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 207 if transcript_id and transcript_id == feature_transcript_id: | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 208 part_of = True | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 209 # If interval is not part of feature, clean up and break. | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 210 if not part_of: | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 211 # Adjust raw size because current line is not part of feature. | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 212 raw_size -= len(self.current_line) | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 213 break | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 214 # Interval associated with feature. | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 215 feature_intervals.append(interval) | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 216 # Last interval read is the seed for the next interval. | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 217 self.seed_interval = interval | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 218 self.seed_interval_line_len = len(self.current_line) | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 219 # Return feature. | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 220 feature = GFFFeature(self, self.chrom_col, self.feature_col, self.start_col, | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 221 self.end_col, self.strand_col, self.score_col, | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 222 self.default_strand, fix_strand=self.fix_strand, | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 223 intervals=feature_intervals, raw_size=raw_size) | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 224 # Convert to BED coords? | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 225 if self.convert_to_bed_coord: | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 226 convert_gff_coords_to_bed(feature) | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 227 return feature | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 228 | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 229 | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 230 def convert_bed_coords_to_gff(interval): | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 231 """ | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 232 Converts an interval object's coordinates from BED format to GFF format. | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 233 Accepted object types include GenomicInterval and list (where the first | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 234 element in the list is the interval's start, and the second element is | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 235 the interval's end). | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 236 """ | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 237 if isinstance(interval, GenomicInterval): | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 238 interval.start += 1 | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 239 if isinstance(interval, GFFFeature): | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 240 for subinterval in interval.intervals: | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 241 convert_bed_coords_to_gff(subinterval) | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 242 elif isinstance(interval, list): | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 243 interval[0] += 1 | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 244 return interval | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 245 | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 246 | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 247 def convert_gff_coords_to_bed(interval): | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 248 """ | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 249 Converts an interval object's coordinates from GFF format to BED format. | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 250 Accepted object types include GFFFeature, GenomicInterval, and list (where | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 251 the first element in the list is the interval's start, and the second | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 252 element is the interval's end). | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 253 """ | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 254 if isinstance(interval, GenomicInterval): | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 255 interval.start -= 1 | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 256 if isinstance(interval, GFFFeature): | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 257 for subinterval in interval.intervals: | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 258 convert_gff_coords_to_bed(subinterval) | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 259 elif isinstance(interval, list): | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 260 interval[0] -= 1 | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 261 return interval | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 262 | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 263 | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 264 def convert_to_twobit(reference_genome): | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 265 """ | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 266 Create 2bit file history fasta dataset. | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 267 """ | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 268 try: | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 269 seq_path = tempfile.NamedTemporaryFile(dir=".").name | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 270 cmd = "faToTwoBit %s %s" % (reference_genome, seq_path) | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 271 tmp_name = tempfile.NamedTemporaryFile(dir=".").name | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 272 tmp_stderr = open(tmp_name, 'wb') | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 273 proc = subprocess.Popen(args=cmd, shell=True, stderr=tmp_stderr.fileno()) | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 274 returncode = proc.wait() | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 275 tmp_stderr.close() | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 276 if returncode != 0: | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 277 # Get stderr, allowing for case where it's very large. | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 278 tmp_stderr = open(tmp_name, 'rb') | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 279 stderr = '' | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 280 buffsize = 1048576 | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 281 try: | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 282 while True: | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 283 stderr += tmp_stderr.read(buffsize) | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 284 if not stderr or len(stderr) % buffsize != 0: | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 285 break | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 286 except OverflowError: | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 287 pass | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 288 tmp_stderr.close() | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 289 os.remove(tmp_name) | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 290 stop_err(stderr) | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 291 return seq_path | 
| 3 
b71579ad576c
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit bd4fbe120288bf8452e479cbd82aa1bbf5c4bd31
 iuc parents: 
2diff
changeset | 292 except Exception as e: | 
| 0 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 293 stop_err('Error running faToTwoBit. ' + str(e)) | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 294 | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 295 | 
| 2 
702970e4a134
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 9192c1e90e2fd5017e6044884bcc6f2e80ba8b31
 iuc parents: 
0diff
changeset | 296 def get_bedtools_getfasta_default_header(chrom, start, end, strand, includes_strand_col): | 
| 
702970e4a134
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 9192c1e90e2fd5017e6044884bcc6f2e80ba8b31
 iuc parents: 
0diff
changeset | 297 """ | 
| 
702970e4a134
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 9192c1e90e2fd5017e6044884bcc6f2e80ba8b31
 iuc parents: 
0diff
changeset | 298 Return a fasta header that is the default produced by the bedtools | 
| 
702970e4a134
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 9192c1e90e2fd5017e6044884bcc6f2e80ba8b31
 iuc parents: 
0diff
changeset | 299 getfasta tool, assuming "force strandedness". This will produce a | 
| 
702970e4a134
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 9192c1e90e2fd5017e6044884bcc6f2e80ba8b31
 iuc parents: 
0diff
changeset | 300 header with this format: <chrom>:<start>-<end>(strand). If the input | 
| 
702970e4a134
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 9192c1e90e2fd5017e6044884bcc6f2e80ba8b31
 iuc parents: 
0diff
changeset | 301 data includes a strand column and the strand is '+' or '-', then use it. | 
| 
702970e4a134
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 9192c1e90e2fd5017e6044884bcc6f2e80ba8b31
 iuc parents: 
0diff
changeset | 302 If the input data includes a strand column and the value of strand is | 
| 
702970e4a134
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 9192c1e90e2fd5017e6044884bcc6f2e80ba8b31
 iuc parents: 
0diff
changeset | 303 anything but '+' or '-', set strand to '.' in the header. If the input | 
| 
702970e4a134
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 9192c1e90e2fd5017e6044884bcc6f2e80ba8b31
 iuc parents: 
0diff
changeset | 304 data does not include a strand column, set strand to '.' in the header. | 
| 
702970e4a134
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 9192c1e90e2fd5017e6044884bcc6f2e80ba8b31
 iuc parents: 
0diff
changeset | 305 """ | 
| 
702970e4a134
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 9192c1e90e2fd5017e6044884bcc6f2e80ba8b31
 iuc parents: 
0diff
changeset | 306 if includes_strand_col and strand in ['+', '-']: | 
| 
702970e4a134
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 9192c1e90e2fd5017e6044884bcc6f2e80ba8b31
 iuc parents: 
0diff
changeset | 307 strand_val = strand | 
| 
702970e4a134
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 9192c1e90e2fd5017e6044884bcc6f2e80ba8b31
 iuc parents: 
0diff
changeset | 308 else: | 
| 
702970e4a134
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 9192c1e90e2fd5017e6044884bcc6f2e80ba8b31
 iuc parents: 
0diff
changeset | 309 strand_val = '.' | 
| 
702970e4a134
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 9192c1e90e2fd5017e6044884bcc6f2e80ba8b31
 iuc parents: 
0diff
changeset | 310 return '%s:%s-%s(%s)' % (chrom, start, end, strand_val) | 
| 
702970e4a134
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 9192c1e90e2fd5017e6044884bcc6f2e80ba8b31
 iuc parents: 
0diff
changeset | 311 | 
| 
702970e4a134
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 9192c1e90e2fd5017e6044884bcc6f2e80ba8b31
 iuc parents: 
0diff
changeset | 312 | 
| 
702970e4a134
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 9192c1e90e2fd5017e6044884bcc6f2e80ba8b31
 iuc parents: 
0diff
changeset | 313 def get_fasta_header_delimiter(delimiter): | 
| 
702970e4a134
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 9192c1e90e2fd5017e6044884bcc6f2e80ba8b31
 iuc parents: 
0diff
changeset | 314 # Return a specified fasta header delimiter. | 
| 
702970e4a134
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 9192c1e90e2fd5017e6044884bcc6f2e80ba8b31
 iuc parents: 
0diff
changeset | 315 if delimiter == 'underscore': | 
| 
702970e4a134
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 9192c1e90e2fd5017e6044884bcc6f2e80ba8b31
 iuc parents: 
0diff
changeset | 316 return '_' | 
| 
702970e4a134
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 9192c1e90e2fd5017e6044884bcc6f2e80ba8b31
 iuc parents: 
0diff
changeset | 317 if delimiter == 'semicolon': | 
| 
702970e4a134
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 9192c1e90e2fd5017e6044884bcc6f2e80ba8b31
 iuc parents: 
0diff
changeset | 318 return ';' | 
| 
702970e4a134
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 9192c1e90e2fd5017e6044884bcc6f2e80ba8b31
 iuc parents: 
0diff
changeset | 319 if delimiter == 'comma': | 
| 
702970e4a134
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 9192c1e90e2fd5017e6044884bcc6f2e80ba8b31
 iuc parents: 
0diff
changeset | 320 return ',' | 
| 
702970e4a134
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 9192c1e90e2fd5017e6044884bcc6f2e80ba8b31
 iuc parents: 
0diff
changeset | 321 if delimiter == 'tilde': | 
| 
702970e4a134
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 9192c1e90e2fd5017e6044884bcc6f2e80ba8b31
 iuc parents: 
0diff
changeset | 322 return '~' | 
| 
702970e4a134
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 9192c1e90e2fd5017e6044884bcc6f2e80ba8b31
 iuc parents: 
0diff
changeset | 323 if delimiter == 'vertical_bar': | 
| 
702970e4a134
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 9192c1e90e2fd5017e6044884bcc6f2e80ba8b31
 iuc parents: 
0diff
changeset | 324 return '|' | 
| 
702970e4a134
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 9192c1e90e2fd5017e6044884bcc6f2e80ba8b31
 iuc parents: 
0diff
changeset | 325 # Set the default to underscore. | 
| 
702970e4a134
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 9192c1e90e2fd5017e6044884bcc6f2e80ba8b31
 iuc parents: 
0diff
changeset | 326 return '_' | 
| 
702970e4a134
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 9192c1e90e2fd5017e6044884bcc6f2e80ba8b31
 iuc parents: 
0diff
changeset | 327 | 
| 
702970e4a134
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 9192c1e90e2fd5017e6044884bcc6f2e80ba8b31
 iuc parents: 
0diff
changeset | 328 | 
| 0 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 329 def get_lines(feature): | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 330 # Get feature's line(s). | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 331 if isinstance(feature, GFFFeature): | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 332 return feature.lines() | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 333 else: | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 334 return [feature.rstrip('\r\n')] | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 335 | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 336 | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 337 def gff_attributes_to_str(attrs, gff_format): | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 338 """ | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 339 Convert GFF attributes to string. Supported formats are GFF3, GTF. | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 340 """ | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 341 if gff_format == 'GTF': | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 342 format_string = '%s "%s"' | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 343 # Convert group (GFF) and ID, parent (GFF3) attributes to | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 344 # transcript_id, gene_id. | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 345 id_attr = None | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 346 if 'group' in attrs: | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 347 id_attr = 'group' | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 348 elif 'ID' in attrs: | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 349 id_attr = 'ID' | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 350 elif 'Parent' in attrs: | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 351 id_attr = 'Parent' | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 352 if id_attr: | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 353 attrs['transcript_id'] = attrs['gene_id'] = attrs[id_attr] | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 354 elif gff_format == 'GFF3': | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 355 format_string = '%s=%s' | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 356 attrs_strs = [] | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 357 for name, value in attrs.items(): | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 358 attrs_strs.append(format_string % (name, value)) | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 359 return " ; ".join(attrs_strs) | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 360 | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 361 | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 362 def parse_cols_arg(cols): | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 363 """ | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 364 Parse a columns command line argument into a four-tuple. | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 365 """ | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 366 if cols: | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 367 # Handle case where no strand column included - in this case, cols | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 368 # looks something like 1,2,3, | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 369 if cols.endswith(','): | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 370 cols += '0' | 
| 7 
3088e7e70888
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit 4a3c9f195ba5d899b1a1ce5e80281cdf230f456a
 iuc parents: 
3diff
changeset | 371 col_list = [int(x) - 1 for x in cols.split(",")] | 
| 0 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 372 return col_list | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 373 else: | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 374 return BED_DEFAULT_COLS | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 375 | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 376 | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 377 def parse_gff_attributes(attr_str): | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 378 """ | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 379 Parses a GFF/GTF attribute string and returns a dictionary of name-value | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 380 pairs. The general format for a GFF3 attributes string is | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 381 name1=value1;name2=value2 | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 382 The general format for a GTF attribute string is | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 383 name1 "value1" ; name2 "value2" | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 384 The general format for a GFF attribute string is a single string that | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 385 denotes the interval's group; in this case, method returns a dictionary | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 386 with a single key-value pair, and key name is 'group'. | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 387 """ | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 388 attributes_list = attr_str.split(";") | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 389 attributes = {} | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 390 for name_value_pair in attributes_list: | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 391 # Try splitting by '=' (GFF3) first because spaces are allowed in GFF3 | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 392 # attribute; next, try double quotes for GTF. | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 393 pair = name_value_pair.strip().split("=") | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 394 if len(pair) == 1: | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 395 pair = name_value_pair.strip().split("\"") | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 396 if len(pair) == 1: | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 397 # Could not split for some reason. | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 398 continue | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 399 if pair == '': | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 400 continue | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 401 name = pair[0].strip() | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 402 if name == '': | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 403 continue | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 404 # Need to strip double quote from values | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 405 value = pair[1].strip(" \"") | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 406 attributes[name] = value | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 407 if len(attributes) == 0: | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 408 # Could not split attributes string, so entire string must be | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 409 # 'group' attribute. This is the case for strictly GFF files. | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 410 attributes['group'] = attr_str | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 411 return attributes | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 412 | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 413 | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 414 def reverse_complement(s): | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 415 complement_dna = {"A": "T", "T": "A", "C": "G", "G": "C", "a": "t", "t": "a", "c": "g", "g": "c", "N": "N", "n": "n"} | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 416 reversed_s = [] | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 417 for i in s: | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 418 reversed_s.append(complement_dna[i]) | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 419 reversed_s.reverse() | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 420 return "".join(reversed_s) | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 421 | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 422 | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 423 def stop_err(msg): | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 424 sys.stderr.write(msg) | 
| 
8dd8e89c0603
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/extract_genomic_dna commit b'67cff25a50ba173b0468819204d0999496f68ea9'
 iuc parents: diff
changeset | 425 sys.exit(1) | 
