annotate utils/gff_util.py @ 4:7a2a604ae9c8 draft

planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
author devteam
date Thu, 11 Feb 2016 12:11:59 -0500
parents
children 0145969324c4
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
4
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
1 """
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
2 Provides utilities for working with GFF files.
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
3 """
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
4
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
5 import copy
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
6 from bx.intervals.io import GenomicInterval, GenomicIntervalReader, MissingFieldError, NiceReaderWrapper
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
7 from bx.tabular.io import Header, Comment, ParseError
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
8 from utils.odict import odict
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
9
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
10
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
11 class GFFInterval( GenomicInterval ):
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
12 """
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
13 A GFF interval, including attributes. If file is strictly a GFF file,
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
14 only attribute is 'group.'
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
15 """
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
16 def __init__( self, reader, fields, chrom_col=0, feature_col=2, start_col=3, end_col=4,
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
17 strand_col=6, score_col=5, default_strand='.', fix_strand=False ):
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
18 # HACK: GFF format allows '.' for strand but GenomicInterval does not. To get around this,
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
19 # temporarily set strand and then unset after initing GenomicInterval.
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
20 unknown_strand = False
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
21 if not fix_strand and fields[ strand_col ] == '.':
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
22 unknown_strand = True
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
23 fields[ strand_col ] = '+'
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
24 GenomicInterval.__init__( self, reader, fields, chrom_col, start_col, end_col, strand_col,
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
25 default_strand, fix_strand=fix_strand )
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
26 if unknown_strand:
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
27 self.strand = '.'
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
28 self.fields[ strand_col ] = '.'
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
29
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
30 # Handle feature, score column.
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
31 self.feature_col = feature_col
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
32 if self.feature_col >= self.nfields:
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
33 raise MissingFieldError( "No field for feature_col (%d)" % feature_col )
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
34 self.feature = self.fields[ self.feature_col ]
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
35 self.score_col = score_col
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
36 if self.score_col >= self.nfields:
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
37 raise MissingFieldError( "No field for score_col (%d)" % score_col )
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
38 self.score = self.fields[ self.score_col ]
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
39
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
40 # GFF attributes.
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
41 self.attributes = parse_gff_attributes( fields[8] )
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
42
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
43 def copy( self ):
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
44 return GFFInterval(self.reader, list( self.fields ), self.chrom_col, self.feature_col, self.start_col,
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
45 self.end_col, self.strand_col, self.score_col, self.strand)
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
46
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
47
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
48 class GFFFeature( GFFInterval ):
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
49 """
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
50 A GFF feature, which can include multiple intervals.
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
51 """
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
52 def __init__( self, reader, chrom_col=0, feature_col=2, start_col=3, end_col=4,
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
53 strand_col=6, score_col=5, default_strand='.', fix_strand=False, intervals=[],
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
54 raw_size=0 ):
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
55 # Use copy so that first interval and feature do not share fields.
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
56 GFFInterval.__init__( self, reader, copy.deepcopy( intervals[0].fields ), chrom_col, feature_col,
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
57 start_col, end_col, strand_col, score_col, default_strand,
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
58 fix_strand=fix_strand )
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
59 self.intervals = intervals
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
60 self.raw_size = raw_size
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
61 # Use intervals to set feature attributes.
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
62 for interval in self.intervals:
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
63 # Error checking. NOTE: intervals need not share the same strand.
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
64 if interval.chrom != self.chrom:
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
65 raise ValueError( "interval chrom does not match self chrom: %s != %s" %
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
66 ( interval.chrom, self.chrom ) )
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
67 # Set start, end of interval.
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
68 if interval.start < self.start:
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
69 self.start = interval.start
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
70 if interval.end > self.end:
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
71 self.end = interval.end
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
72
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
73 def name( self ):
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
74 """ Returns feature's name. """
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
75 name = None
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
76 # Preference for name: GTF, GFF3, GFF.
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
77 for attr_name in ['gene_id', 'transcript_id', # GTF
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
78 'ID', 'id', # GFF3
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
79 'group' ]: # GFF (TODO)
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
80 name = self.attributes.get( attr_name, None )
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
81 if name is not None:
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
82 break
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
83 return name
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
84
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
85 def copy( self ):
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
86 intervals_copy = []
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
87 for interval in self.intervals:
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
88 intervals_copy.append( interval.copy() )
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
89 return GFFFeature(self.reader, self.chrom_col, self.feature_col, self.start_col, self.end_col, self.strand_col,
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
90 self.score_col, self.strand, intervals=intervals_copy )
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
91
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
92 def lines( self ):
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
93 lines = []
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
94 for interval in self.intervals:
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
95 lines.append( '\t'.join( interval.fields ) )
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
96 return lines
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
97
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
98
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
99 class GFFIntervalToBEDReaderWrapper( NiceReaderWrapper ):
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
100 """
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
101 Reader wrapper that reads GFF intervals/lines and automatically converts
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
102 them to BED format.
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
103 """
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
104
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
105 def parse_row( self, line ):
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
106 # HACK: this should return a GFF interval, but bx-python operations
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
107 # require GenomicInterval objects and subclasses will not work.
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
108 interval = GenomicInterval( self, line.split( "\t" ), self.chrom_col, self.start_col,
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
109 self.end_col, self.strand_col, self.default_strand,
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
110 fix_strand=self.fix_strand )
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
111 interval = convert_gff_coords_to_bed( interval )
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
112 return interval
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
113
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
114
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
115 class GFFReaderWrapper( NiceReaderWrapper ):
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
116 """
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
117 Reader wrapper for GFF files.
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
118
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
119 Wrapper has two major functions:
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
120
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
121 1. group entries for GFF file (via group column), GFF3 (via id attribute),
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
122 or GTF (via gene_id/transcript id);
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
123 2. convert coordinates from GFF format--starting and ending coordinates
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
124 are 1-based, closed--to the 'traditional'/BED interval format--0 based,
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
125 half-open. This is useful when using GFF files as inputs to tools that
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
126 expect traditional interval format.
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
127 """
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
128
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
129 def __init__( self, reader, chrom_col=0, feature_col=2, start_col=3,
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
130 end_col=4, strand_col=6, score_col=5, fix_strand=False, convert_to_bed_coord=False, **kwargs ):
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
131 NiceReaderWrapper.__init__( self, reader, chrom_col=chrom_col, start_col=start_col, end_col=end_col,
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
132 strand_col=strand_col, fix_strand=fix_strand, **kwargs )
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
133 self.feature_col = feature_col
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
134 self.score_col = score_col
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
135 self.convert_to_bed_coord = convert_to_bed_coord
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
136 self.last_line = None
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
137 self.cur_offset = 0
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
138 self.seed_interval = None
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
139 self.seed_interval_line_len = 0
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
140
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
141 def parse_row( self, line ):
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
142 interval = GFFInterval( self, line.split( "\t" ), self.chrom_col, self.feature_col,
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
143 self.start_col, self.end_col, self.strand_col, self.score_col,
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
144 self.default_strand, fix_strand=self.fix_strand )
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
145 return interval
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
146
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
147 def next( self ):
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
148 """ Returns next GFFFeature. """
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
149
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
150 #
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
151 # Helper function.
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
152 #
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
153
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
154 def handle_parse_error( parse_error ):
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
155 """ Actions to take when ParseError found. """
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
156 if self.outstream:
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
157 if self.print_delegate and hasattr(self.print_delegate, "__call__"):
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
158 self.print_delegate( self.outstream, e, self )
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
159 self.skipped += 1
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
160 # no reason to stuff an entire bad file into memmory
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
161 if self.skipped < 10:
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
162 self.skipped_lines.append( ( self.linenum, self.current_line, str( e ) ) )
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
163
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
164 # For debugging, uncomment this to propogate parsing exceptions up.
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
165 # I.e. the underlying reason for an unexpected StopIteration exception
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
166 # can be found by uncommenting this.
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
167 # raise e
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
168
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
169 #
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
170 # Get next GFFFeature
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
171 #
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
172 raw_size = self.seed_interval_line_len
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
173
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
174 # If there is no seed interval, set one. Also, if there are no more
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
175 # intervals to read, this is where iterator dies.
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
176 if not self.seed_interval:
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
177 while not self.seed_interval:
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
178 try:
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
179 self.seed_interval = GenomicIntervalReader.next( self )
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
180 except ParseError, e:
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
181 handle_parse_error( e )
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
182 # TODO: When no longer supporting python 2.4 use finally:
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
183 #finally:
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
184 raw_size += len( self.current_line )
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
185
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
186 # If header or comment, clear seed interval and return it with its size.
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
187 if isinstance( self.seed_interval, ( Header, Comment ) ):
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
188 return_val = self.seed_interval
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
189 return_val.raw_size = len( self.current_line )
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
190 self.seed_interval = None
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
191 self.seed_interval_line_len = 0
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
192 return return_val
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
193
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
194 # Initialize feature identifier from seed.
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
195 feature_group = self.seed_interval.attributes.get( 'group', None ) # For GFF
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
196 # For GFF3
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
197 feature_id = self.seed_interval.attributes.get( 'ID', None )
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
198 # For GTF.
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
199 feature_transcript_id = self.seed_interval.attributes.get( 'transcript_id', None )
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
200
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
201 # Read all intervals associated with seed.
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
202 feature_intervals = []
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
203 feature_intervals.append( self.seed_interval )
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
204 while True:
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
205 try:
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
206 interval = GenomicIntervalReader.next( self )
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
207 raw_size += len( self.current_line )
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
208 except StopIteration, e:
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
209 # No more intervals to read, but last feature needs to be
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
210 # returned.
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
211 interval = None
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
212 raw_size += len( self.current_line )
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
213 break
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
214 except ParseError, e:
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
215 handle_parse_error( e )
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
216 raw_size += len( self.current_line )
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
217 continue
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
218 # TODO: When no longer supporting python 2.4 use finally:
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
219 #finally:
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
220 #raw_size += len( self.current_line )
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
221
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
222 # Ignore comments.
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
223 if isinstance( interval, Comment ):
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
224 continue
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
225
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
226 # Determine if interval is part of feature.
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
227 part_of = False
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
228 group = interval.attributes.get( 'group', None )
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
229 # GFF test:
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
230 if group and feature_group == group:
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
231 part_of = True
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
232 # GFF3 test:
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
233 parent_id = interval.attributes.get( 'Parent', None )
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
234 cur_id = interval.attributes.get( 'ID', None )
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
235 if ( cur_id and cur_id == feature_id ) or ( parent_id and parent_id == feature_id ):
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
236 part_of = True
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
237 # GTF test:
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
238 transcript_id = interval.attributes.get( 'transcript_id', None )
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
239 if transcript_id and transcript_id == feature_transcript_id:
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
240 part_of = True
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
241
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
242 # If interval is not part of feature, clean up and break.
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
243 if not part_of:
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
244 # Adjust raw size because current line is not part of feature.
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
245 raw_size -= len( self.current_line )
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
246 break
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
247
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
248 # Interval associated with feature.
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
249 feature_intervals.append( interval )
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
250
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
251 # Last interval read is the seed for the next interval.
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
252 self.seed_interval = interval
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
253 self.seed_interval_line_len = len( self.current_line )
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
254
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
255 # Return feature.
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
256 feature = GFFFeature( self, self.chrom_col, self.feature_col, self.start_col,
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
257 self.end_col, self.strand_col, self.score_col,
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
258 self.default_strand, fix_strand=self.fix_strand,
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
259 intervals=feature_intervals, raw_size=raw_size )
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
260
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
261 # Convert to BED coords?
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
262 if self.convert_to_bed_coord:
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
263 convert_gff_coords_to_bed( feature )
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
264
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
265 return feature
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
266
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
267
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
268 def convert_bed_coords_to_gff( interval ):
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
269 """
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
270 Converts an interval object's coordinates from BED format to GFF format.
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
271 Accepted object types include GenomicInterval and list (where the first
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
272 element in the list is the interval's start, and the second element is
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
273 the interval's end).
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
274 """
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
275 if isinstance( interval, GenomicInterval ):
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
276 interval.start += 1
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
277 if isinstance( interval, GFFFeature ):
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
278 for subinterval in interval.intervals:
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
279 convert_bed_coords_to_gff( subinterval )
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
280 elif type( interval ) is list:
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
281 interval[ 0 ] += 1
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
282 return interval
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
283
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
284
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
285 def convert_gff_coords_to_bed( interval ):
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
286 """
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
287 Converts an interval object's coordinates from GFF format to BED format.
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
288 Accepted object types include GFFFeature, GenomicInterval, and list (where
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
289 the first element in the list is the interval's start, and the second
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
290 element is the interval's end).
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
291 """
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
292 if isinstance( interval, GenomicInterval ):
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
293 interval.start -= 1
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
294 if isinstance( interval, GFFFeature ):
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
295 for subinterval in interval.intervals:
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
296 convert_gff_coords_to_bed( subinterval )
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
297 elif type( interval ) is list:
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
298 interval[ 0 ] -= 1
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
299 return interval
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
300
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
301
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
302 def parse_gff_attributes( attr_str ):
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
303 """
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
304 Parses a GFF/GTF attribute string and returns a dictionary of name-value
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
305 pairs. The general format for a GFF3 attributes string is
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
306
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
307 name1=value1;name2=value2
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
308
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
309 The general format for a GTF attribute string is
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
310
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
311 name1 "value1" ; name2 "value2"
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
312
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
313 The general format for a GFF attribute string is a single string that
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
314 denotes the interval's group; in this case, method returns a dictionary
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
315 with a single key-value pair, and key name is 'group'
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
316 """
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
317 attributes_list = attr_str.split(";")
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
318 attributes = {}
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
319 for name_value_pair in attributes_list:
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
320 # Try splitting by '=' (GFF3) first because spaces are allowed in GFF3
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
321 # attribute; next, try double quotes for GTF.
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
322 pair = name_value_pair.strip().split("=")
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
323 if len( pair ) == 1:
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
324 pair = name_value_pair.strip().split("\"")
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
325 if len( pair ) == 1:
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
326 # Could not split for some reason -- raise exception?
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
327 continue
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
328 if pair == '':
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
329 continue
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
330 name = pair[0].strip()
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
331 if name == '':
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
332 continue
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
333 # Need to strip double quote from values
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
334 value = pair[1].strip(" \"")
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
335 attributes[ name ] = value
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
336
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
337 if len( attributes ) == 0:
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
338 # Could not split attributes string, so entire string must be
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
339 # 'group' attribute. This is the case for strictly GFF files.
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
340 attributes['group'] = attr_str
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
341 return attributes
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
342
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
343
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
344 def gff_attributes_to_str( attrs, gff_format ):
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
345 """
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
346 Convert GFF attributes to string. Supported formats are GFF3, GTF.
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
347 """
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
348 if gff_format == 'GTF':
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
349 format_string = '%s "%s"'
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
350 # Convert group (GFF) and ID, parent (GFF3) attributes to transcript_id, gene_id
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
351 id_attr = None
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
352 if 'group' in attrs:
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
353 id_attr = 'group'
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
354 elif 'ID' in attrs:
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
355 id_attr = 'ID'
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
356 elif 'Parent' in attrs:
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
357 id_attr = 'Parent'
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
358 if id_attr:
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
359 attrs['transcript_id'] = attrs['gene_id'] = attrs[id_attr]
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
360 elif gff_format == 'GFF3':
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
361 format_string = '%s=%s'
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
362 attrs_strs = []
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
363 for name, value in attrs.items():
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
364 attrs_strs.append( format_string % ( name, value ) )
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
365 return " ; ".join( attrs_strs )
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
366
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
367
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
368 def read_unordered_gtf( iterator, strict=False ):
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
369 """
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
370 Returns GTF features found in an iterator. GTF lines need not be ordered
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
371 or clustered for reader to work. Reader returns GFFFeature objects sorted
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
372 by transcript_id, chrom, and start position.
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
373 """
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
374
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
375 # -- Get function that generates line/feature key. --
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
376
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
377 get_transcript_id = lambda fields: parse_gff_attributes( fields[8] )[ 'transcript_id' ]
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
378 if strict:
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
379 # Strict GTF parsing uses transcript_id only to group lines into feature.
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
380 key_fn = get_transcript_id
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
381 else:
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
382 # Use lenient parsing where chromosome + transcript_id is the key. This allows
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
383 # transcripts with same ID on different chromosomes; this occurs in some popular
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
384 # datasources, such as RefGenes in UCSC.
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
385 key_fn = lambda fields: fields[0] + '_' + get_transcript_id( fields )
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
386
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
387 # Aggregate intervals by transcript_id and collect comments.
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
388 feature_intervals = odict()
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
389 comments = []
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
390 for count, line in enumerate( iterator ):
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
391 if line.startswith( '#' ):
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
392 comments.append( Comment( line ) )
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
393 continue
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
394
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
395 line_key = key_fn( line.split('\t') )
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
396 if line_key in feature_intervals:
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
397 feature = feature_intervals[ line_key ]
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
398 else:
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
399 feature = []
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
400 feature_intervals[ line_key ] = feature
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
401 feature.append( GFFInterval( None, line.split( '\t' ) ) )
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
402
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
403 # Create features.
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
404 chroms_features = {}
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
405 for count, intervals in enumerate( feature_intervals.values() ):
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
406 # Sort intervals by start position.
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
407 intervals.sort( lambda a, b: cmp( a.start, b.start ) )
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
408 feature = GFFFeature( None, intervals=intervals )
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
409 if feature.chrom not in chroms_features:
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
410 chroms_features[ feature.chrom ] = []
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
411 chroms_features[ feature.chrom ].append( feature )
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
412
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
413 # Sort features by chrom, start position.
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
414 chroms_features_sorted = []
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
415 for chrom_features in chroms_features.values():
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
416 chroms_features_sorted.append( chrom_features )
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
417 chroms_features_sorted.sort( lambda a, b: cmp( a[0].chrom, b[0].chrom ) )
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
418 for features in chroms_features_sorted:
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
419 features.sort( lambda a, b: cmp( a.start, b.start ) )
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
420
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
421 # Yield comments first, then features.
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
422 # FIXME: comments can appear anywhere in file, not just the beginning.
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
423 # Ideally, then comments would be associated with features and output
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
424 # just before feature/line.
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
425 for comment in comments:
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
426 yield comment
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
427
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
428 for chrom_features in chroms_features_sorted:
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
429 for feature in chrom_features:
7a2a604ae9c8 planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/gops/subtract commit 4e2052686dfe8003f867449e0affff96398b2a62
devteam
parents:
diff changeset
430 yield feature