annotate utils/maf_utilities.py @ 2:16df616b39e5 draft

"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
author devteam
date Sun, 01 Mar 2020 07:24:26 -0500
parents 717aee069681
children 25b8736c627a
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
1 #!/usr/bin/env python
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
2 """
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
3 Provides wrappers and utilities for working with MAF files and alignments.
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
4 """
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
5 # Dan Blankenberg
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
6 import bx.align.maf
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
7 import bx.intervals
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
8 import bx.interval_index_file
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
9 import sys
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
10 import os
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
11 import tempfile
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
12 import logging
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
13 from copy import deepcopy
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
14
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
15 try:
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
16 maketrans = str.maketrans
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
17 except AttributeError:
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
18 from string import maketrans
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
19
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
20 assert sys.version_info[:2] >= (2, 4)
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
21
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
22 log = logging.getLogger(__name__)
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
23
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
24
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
25 GAP_CHARS = ['-']
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
26 SRC_SPLIT_CHAR = '.'
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
27
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
28
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
29 def src_split(src):
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
30 fields = src.split(SRC_SPLIT_CHAR, 1)
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
31 spec = fields.pop(0)
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
32 if fields:
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
33 chrom = fields.pop(0)
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
34 else:
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
35 chrom = spec
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
36 return spec, chrom
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
37
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
38
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
39 def src_merge(spec, chrom, contig=None):
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
40 if None in [spec, chrom]:
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
41 spec = chrom = spec or chrom
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
42 return bx.align.maf.src_merge(spec, chrom, contig)
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
43
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
44
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
45 def get_species_in_block(block):
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
46 species = []
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
47 for c in block.components:
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
48 spec, chrom = src_split(c.src)
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
49 if spec not in species:
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
50 species.append(spec)
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
51 return species
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
52
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
53
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
54 def tool_fail(msg="Unknown Error"):
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
55 msg = "Fatal Error: %s" % msg
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
56 sys.exit(msg)
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
57
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
58 # an object corresponding to a reference layered alignment
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
59
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
60
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
61 class RegionAlignment(object):
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
62
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
63 DNA_COMPLEMENT = maketrans("ACGTacgt", "TGCAtgca")
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
64
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
65 def __init__(self, size, species=[]):
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
66 self.size = size
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
67 self.sequences = {}
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
68 if not isinstance(species, list):
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
69 species = [species]
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
70 for spec in species:
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
71 self.add_species(spec)
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
72
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
73 # add a species to the alignment
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
74 def add_species(self, species):
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
75 # make temporary sequence files
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
76 self.sequences[species] = tempfile.TemporaryFile()
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
77 self.sequences[species].write("-" * self.size)
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
78
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
79 # returns the names for species found in alignment, skipping names as requested
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
80 def get_species_names(self, skip=[]):
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
81 if not isinstance(skip, list):
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
82 skip = [skip]
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
83 names = self.sequences.keys()
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
84 for name in skip:
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
85 try:
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
86 names.remove(name)
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
87 except Exception:
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
88 pass
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
89 return names
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
90
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
91 # returns the sequence for a species
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
92 def get_sequence(self, species):
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
93 self.sequences[species].seek(0)
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
94 return self.sequences[species].read()
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
95
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
96 # returns the reverse complement of the sequence for a species
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
97 def get_sequence_reverse_complement(self, species):
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
98 complement = [base for base in self.get_sequence(species).translate(self.DNA_COMPLEMENT)]
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
99 complement.reverse()
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
100 return "".join(complement)
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
101
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
102 # sets a position for a species
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
103 def set_position(self, index, species, base):
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
104 if len(base) != 1:
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
105 raise Exception("A genomic position can only have a length of 1.")
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
106 return self.set_range(index, species, base)
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
107 # sets a range for a species
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
108
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
109 def set_range(self, index, species, bases):
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
110 if index >= self.size or index < 0:
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
111 raise Exception("Your index (%i) is out of range (0 - %i)." % (index, self.size - 1))
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
112 if len(bases) == 0:
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
113 raise Exception("A set of genomic positions can only have a positive length.")
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
114 if species not in self.sequences.keys():
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
115 self.add_species(species)
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
116 self.sequences[species].seek(index)
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
117 self.sequences[species].write(bases)
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
118
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
119 # Flush temp file of specified species, or all species
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
120 def flush(self, species=None):
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
121 if species is None:
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
122 species = self.sequences.keys()
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
123 elif not isinstance(species, list):
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
124 species = [species]
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
125 for spec in species:
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
126 self.sequences[spec].flush()
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
127
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
128
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
129 class GenomicRegionAlignment(RegionAlignment):
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
130
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
131 def __init__(self, start, end, species=[]):
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
132 RegionAlignment.__init__(self, end - start, species)
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
133 self.start = start
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
134 self.end = end
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
135
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
136
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
137 class SplicedAlignment(object):
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
138
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
139 DNA_COMPLEMENT = maketrans("ACGTacgt", "TGCAtgca")
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
140
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
141 def __init__(self, exon_starts, exon_ends, species=[]):
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
142 if not isinstance(exon_starts, list):
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
143 exon_starts = [exon_starts]
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
144 if not isinstance(exon_ends, list):
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
145 exon_ends = [exon_ends]
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
146 assert len(exon_starts) == len(exon_ends), "The number of starts does not match the number of sizes."
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
147 self.exons = []
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
148 for i in range(len(exon_starts)):
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
149 self.exons.append(GenomicRegionAlignment(exon_starts[i], exon_ends[i], species))
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
150
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
151 # returns the names for species found in alignment, skipping names as requested
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
152 def get_species_names(self, skip=[]):
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
153 if not isinstance(skip, list):
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
154 skip = [skip]
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
155 names = []
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
156 for exon in self.exons:
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
157 for name in exon.get_species_names(skip=skip):
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
158 if name not in names:
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
159 names.append(name)
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
160 return names
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
161
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
162 # returns the sequence for a species
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
163 def get_sequence(self, species):
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
164 sequence = tempfile.TemporaryFile()
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
165 for exon in self.exons:
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
166 if species in exon.get_species_names():
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
167 sequence.write(exon.get_sequence(species))
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
168 else:
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
169 sequence.write("-" * exon.size)
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
170 sequence.seek(0)
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
171 return sequence.read()
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
172
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
173 # returns the reverse complement of the sequence for a species
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
174 def get_sequence_reverse_complement(self, species):
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
175 complement = [base for base in self.get_sequence(species).translate(self.DNA_COMPLEMENT)]
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
176 complement.reverse()
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
177 return "".join(complement)
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
178
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
179 # Start and end of coding region
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
180 @property
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
181 def start(self):
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
182 return self.exons[0].start
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
183
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
184 @property
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
185 def end(self):
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
186 return self.exons[-1].end
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
187
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
188 # Open a MAF index using a UID
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
189
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
190
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
191 def maf_index_by_uid(maf_uid, index_location_file):
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
192 for line in open(index_location_file):
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
193 try:
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
194 # read each line, if not enough fields, go to next line
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
195 if line[0:1] == "#":
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
196 continue
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
197 fields = line.split('\t')
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
198 if maf_uid == fields[1]:
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
199 try:
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
200 maf_files = fields[4].replace("\n", "").replace("\r", "").split(",")
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
201 return bx.align.maf.MultiIndexed(maf_files, keep_open=True, parse_e_rows=False)
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
202 except Exception as e:
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
203 raise Exception('MAF UID (%s) found, but configuration appears to be malformed: %s' % (maf_uid, e))
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
204 except Exception:
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
205 pass
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
206 return None
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
207
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
208 # return ( index, temp_index_filename ) for user maf, if available, or build one and return it, return None when no tempfile is created
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
209
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
210
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
211 def open_or_build_maf_index(maf_file, index_filename, species=None):
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
212 try:
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
213 return (bx.align.maf.Indexed(maf_file, index_filename=index_filename, keep_open=True, parse_e_rows=False), None)
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
214 except Exception:
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
215 return build_maf_index(maf_file, species=species)
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
216
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
217
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
218 def build_maf_index_species_chromosomes(filename, index_species=None):
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
219 species = []
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
220 species_chromosomes = {}
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
221 indexes = bx.interval_index_file.Indexes()
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
222 blocks = 0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
223 try:
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
224 maf_reader = bx.align.maf.Reader(open(filename))
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
225 while True:
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
226 pos = maf_reader.file.tell()
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
227 block = maf_reader.next()
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
228 if block is None:
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
229 break
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
230 blocks += 1
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
231 for c in block.components:
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
232 spec = c.src
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
233 chrom = None
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
234 if "." in spec:
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
235 spec, chrom = spec.split(".", 1)
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
236 if spec not in species:
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
237 species.append(spec)
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
238 species_chromosomes[spec] = []
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
239 if chrom and chrom not in species_chromosomes[spec]:
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
240 species_chromosomes[spec].append(chrom)
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
241 if index_species is None or spec in index_species:
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
242 forward_strand_start = c.forward_strand_start
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
243 forward_strand_end = c.forward_strand_end
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
244 try:
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
245 forward_strand_start = int(forward_strand_start)
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
246 forward_strand_end = int(forward_strand_end)
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
247 except ValueError:
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
248 continue # start and end are not integers, can't add component to index, goto next component
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
249 # this likely only occurs when parse_e_rows is True?
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
250 # could a species exist as only e rows? should the
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
251 if forward_strand_end > forward_strand_start:
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
252 # require positive length; i.e. certain lines have start = end = 0 and cannot be indexed
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
253 indexes.add(c.src, forward_strand_start, forward_strand_end, pos, max=c.src_size)
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
254 except Exception as e:
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
255 # most likely a bad MAF
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
256 log.debug('Building MAF index on %s failed: %s' % (filename, e))
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
257 return (None, [], {}, 0)
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
258 return (indexes, species, species_chromosomes, blocks)
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
259
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
260 # builds and returns ( index, index_filename ) for specified maf_file
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
261
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
262
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
263 def build_maf_index(maf_file, species=None):
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
264 indexes, found_species, species_chromosomes, blocks = build_maf_index_species_chromosomes(maf_file, species)
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
265 if indexes is not None:
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
266 fd, index_filename = tempfile.mkstemp()
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
267 out = os.fdopen(fd, 'w')
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
268 indexes.write(out)
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
269 out.close()
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
270 return (bx.align.maf.Indexed(maf_file, index_filename=index_filename, keep_open=True, parse_e_rows=False), index_filename)
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
271 return (None, None)
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
272
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
273
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
274 def component_overlaps_region(c, region):
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
275 if c is None:
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
276 return False
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
277 start, end = c.get_forward_strand_start(), c.get_forward_strand_end()
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
278 if region.start >= end or region.end <= start:
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
279 return False
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
280 return True
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
281
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
282
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
283 def chop_block_by_region(block, src, region, species=None, mincols=0):
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
284 # This chopping method was designed to maintain consistency with how start/end padding gaps have been working in Galaxy thus far:
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
285 # behavior as seen when forcing blocks to be '+' relative to src sequence (ref) and using block.slice_by_component( ref, slice_start, slice_end )
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
286 # whether-or-not this is the 'correct' behavior is questionable, but this will at least maintain consistency
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
287 # comments welcome
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
288 slice_start = block.text_size # max for the min()
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
289 slice_end = 0 # min for the max()
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
290 old_score = block.score # save old score for later use
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
291 # We no longer assume only one occurance of src per block, so we need to check them all
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
292 for c in iter_components_by_src(block, src):
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
293 if component_overlaps_region(c, region):
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
294 if c.text is not None:
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
295 rev_strand = False
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
296 if c.strand == "-":
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
297 # We want our coord_to_col coordinates to be returned from positive stranded component
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
298 rev_strand = True
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
299 c = c.reverse_complement()
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
300 start = max(region.start, c.start)
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
301 end = min(region.end, c.end)
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
302 start = c.coord_to_col(start)
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
303 end = c.coord_to_col(end)
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
304 if rev_strand:
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
305 # need to orient slice coordinates to the original block direction
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
306 slice_len = end - start
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
307 end = len(c.text) - start
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
308 start = end - slice_len
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
309 slice_start = min(start, slice_start)
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
310 slice_end = max(end, slice_end)
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
311
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
312 if slice_start < slice_end:
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
313 block = block.slice(slice_start, slice_end)
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
314 if block.text_size > mincols:
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
315 # restore old score, may not be accurate, but it is better than 0 for everything?
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
316 block.score = old_score
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
317 if species is not None:
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
318 block = block.limit_to_species(species)
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
319 block.remove_all_gap_columns()
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
320 return block
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
321 return None
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
322
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
323
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
324 def orient_block_by_region(block, src, region, force_strand=None):
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
325 # loop through components matching src,
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
326 # make sure each of these components overlap region
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
327 # cache strand for each of overlaping regions
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
328 # if force_strand / region.strand not in strand cache, reverse complement
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
329 # we could have 2 sequences with same src, overlapping region, on different strands, this would cause no reverse_complementing
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
330 strands = [c.strand for c in iter_components_by_src(block, src) if component_overlaps_region(c, region)]
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
331 if strands and (force_strand is None and region.strand not in strands) or (force_strand is not None and force_strand not in strands):
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
332 block = block.reverse_complement()
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
333 return block
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
334
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
335
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
336 def get_oriented_chopped_blocks_for_region(index, src, region, species=None, mincols=0, force_strand=None):
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
337 for block, idx, offset in get_oriented_chopped_blocks_with_index_offset_for_region(index, src, region, species, mincols, force_strand):
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
338 yield block
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
339
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
340
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
341 def get_oriented_chopped_blocks_with_index_offset_for_region(index, src, region, species=None, mincols=0, force_strand=None):
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
342 for block, idx, offset in get_chopped_blocks_with_index_offset_for_region(index, src, region, species, mincols):
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
343 yield orient_block_by_region(block, src, region, force_strand), idx, offset
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
344
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
345 # split a block with multiple occurances of src into one block per src
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
346
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
347
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
348 def iter_blocks_split_by_src(block, src):
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
349 for src_c in iter_components_by_src(block, src):
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
350 new_block = bx.align.Alignment(score=block.score, attributes=deepcopy(block.attributes))
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
351 new_block.text_size = block.text_size
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
352 for c in block.components:
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
353 if c == src_c or c.src != src:
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
354 new_block.add_component(deepcopy(c)) # components have reference to alignment, dont want to loose reference to original alignment block in original components
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
355 yield new_block
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
356
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
357 # split a block into multiple blocks with all combinations of a species appearing only once per block
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
358
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
359
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
360 def iter_blocks_split_by_species(block, species=None):
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
361 def __split_components_by_species(components_by_species, new_block):
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
362 if components_by_species:
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
363 # more species with components to add to this block
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
364 components_by_species = deepcopy(components_by_species)
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
365 spec_comps = components_by_species.pop(0)
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
366 for c in spec_comps:
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
367 newer_block = deepcopy(new_block)
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
368 newer_block.add_component(deepcopy(c))
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
369 for value in __split_components_by_species(components_by_species, newer_block):
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
370 yield value
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
371 else:
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
372 # no more components to add, yield this block
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
373 yield new_block
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
374
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
375 # divide components by species
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
376 spec_dict = {}
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
377 if not species:
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
378 species = []
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
379 for c in block.components:
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
380 spec, chrom = src_split(c.src)
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
381 if spec not in spec_dict:
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
382 spec_dict[spec] = []
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
383 species.append(spec)
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
384 spec_dict[spec].append(c)
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
385 else:
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
386 for spec in species:
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
387 spec_dict[spec] = []
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
388 for c in iter_components_by_src_start(block, spec):
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
389 spec_dict[spec].append(c)
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
390
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
391 empty_block = bx.align.Alignment(score=block.score, attributes=deepcopy(block.attributes)) # should we copy attributes?
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
392 empty_block.text_size = block.text_size
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
393 # call recursive function to split into each combo of spec/blocks
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
394 for value in __split_components_by_species(spec_dict.values(), empty_block):
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
395 sort_block_components_by_block(value, block) # restore original component order
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
396 yield value
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
397
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
398
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
399 # generator yielding only chopped and valid blocks for a specified region
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
400 def get_chopped_blocks_for_region(index, src, region, species=None, mincols=0):
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
401 for block, idx, offset in get_chopped_blocks_with_index_offset_for_region(index, src, region, species, mincols):
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
402 yield block
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
403
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
404
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
405 def get_chopped_blocks_with_index_offset_for_region(index, src, region, species=None, mincols=0):
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
406 for block, idx, offset in index.get_as_iterator_with_index_and_offset(src, region.start, region.end):
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
407 block = chop_block_by_region(block, src, region, species, mincols)
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
408 if block is not None:
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
409 yield block, idx, offset
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
410
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
411 # returns a filled region alignment for specified regions
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
412
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
413
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
414 def get_region_alignment(index, primary_species, chrom, start, end, strand='+', species=None, mincols=0, overwrite_with_gaps=True):
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
415 if species is not None:
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
416 alignment = RegionAlignment(end - start, species)
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
417 else:
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
418 alignment = RegionAlignment(end - start, primary_species)
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
419 return fill_region_alignment(alignment, index, primary_species, chrom, start, end, strand, species, mincols, overwrite_with_gaps)
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
420
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
421 # reduces a block to only positions exisiting in the src provided
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
422
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
423
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
424 def reduce_block_by_primary_genome(block, species, chromosome, region_start):
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
425 # returns ( startIndex, {species:texts}
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
426 # where texts' contents are reduced to only positions existing in the primary genome
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
427 src = "%s.%s" % (species, chromosome)
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
428 ref = block.get_component_by_src(src)
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
429 start_offset = ref.start - region_start
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
430 species_texts = {}
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
431 for c in block.components:
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
432 species_texts[c.src.split('.')[0]] = list(c.text)
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
433 # remove locations which are gaps in the primary species, starting from the downstream end
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
434 for i in range(len(species_texts[species]) - 1, -1, -1):
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
435 if species_texts[species][i] == '-':
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
436 for text in species_texts.values():
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
437 text.pop(i)
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
438 for spec, text in species_texts.items():
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
439 species_texts[spec] = ''.join(text)
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
440 return (start_offset, species_texts)
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
441
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
442 # fills a region alignment
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
443
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
444
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
445 def fill_region_alignment(alignment, index, primary_species, chrom, start, end, strand='+', species=None, mincols=0, overwrite_with_gaps=True):
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
446 region = bx.intervals.Interval(start, end)
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
447 region.chrom = chrom
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
448 region.strand = strand
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
449 primary_src = "%s.%s" % (primary_species, chrom)
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
450
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
451 # Order blocks overlaping this position by score, lowest first
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
452 blocks = []
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
453 for block, idx, offset in index.get_as_iterator_with_index_and_offset(primary_src, start, end):
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
454 score = float(block.score)
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
455 for i in range(0, len(blocks)):
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
456 if score < blocks[i][0]:
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
457 blocks.insert(i, (score, idx, offset))
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
458 break
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
459 else:
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
460 blocks.append((score, idx, offset))
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
461
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
462 # gap_chars_tuple = tuple( GAP_CHARS )
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
463 gap_chars_str = ''.join(GAP_CHARS)
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
464 # Loop through ordered blocks and layer by increasing score
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
465 for block_dict in blocks:
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
466 for block in iter_blocks_split_by_species(block_dict[1].get_at_offset(block_dict[2])): # need to handle each occurance of sequence in block seperately
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
467 if component_overlaps_region(block.get_component_by_src(primary_src), region):
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
468 block = chop_block_by_region(block, primary_src, region, species, mincols) # chop block
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
469 block = orient_block_by_region(block, primary_src, region) # orient block
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
470 start_offset, species_texts = reduce_block_by_primary_genome(block, primary_species, chrom, start)
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
471 for spec, text in species_texts.items():
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
472 # we should trim gaps from both sides, since these are not positions in this species genome (sequence)
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
473 text = text.rstrip(gap_chars_str)
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
474 gap_offset = 0
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
475 while True in [text.startswith(gap_char) for gap_char in GAP_CHARS]: # python2.4 doesn't accept a tuple for .startswith()
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
476 # while text.startswith( gap_chars_tuple ):
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
477 gap_offset += 1
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
478 text = text[1:]
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
479 if not text:
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
480 break
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
481 if text:
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
482 if overwrite_with_gaps:
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
483 alignment.set_range(start_offset + gap_offset, spec, text)
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
484 else:
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
485 for i, char in enumerate(text):
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
486 if char not in GAP_CHARS:
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
487 alignment.set_position(start_offset + gap_offset + i, spec, char)
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
488 return alignment
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
489
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
490 # returns a filled spliced region alignment for specified region with start and end lists
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
491
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
492
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
493 def get_spliced_region_alignment(index, primary_species, chrom, starts, ends, strand='+', species=None, mincols=0, overwrite_with_gaps=True):
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
494 # create spliced alignment object
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
495 if species is not None:
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
496 alignment = SplicedAlignment(starts, ends, species)
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
497 else:
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
498 alignment = SplicedAlignment(starts, ends, [primary_species])
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
499 for exon in alignment.exons:
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
500 fill_region_alignment(exon, index, primary_species, chrom, exon.start, exon.end, strand, species, mincols, overwrite_with_gaps)
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
501 return alignment
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
502
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
503 # loop through string array, only return non-commented lines
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
504
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
505
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
506 def line_enumerator(lines, comment_start='#'):
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
507 i = 0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
508 for line in lines:
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
509 if not line.startswith(comment_start):
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
510 i += 1
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
511 yield (i, line)
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
512
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
513 # read a GeneBed file, return list of starts, ends, raw fields
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
514
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
515
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
516 def get_starts_ends_fields_from_gene_bed(line):
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
517 # Starts and ends for exons
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
518 starts = []
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
519 ends = []
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
520
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
521 fields = line.split()
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
522 # Requires atleast 12 BED columns
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
523 if len(fields) < 12:
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
524 raise Exception("Not a proper 12 column BED line (%s)." % line)
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
525 tx_start = int(fields[1])
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
526 strand = fields[5]
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
527 if strand != '-':
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
528 strand = '+' # Default strand is +
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
529 cds_start = int(fields[6])
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
530 cds_end = int(fields[7])
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
531
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
532 # Calculate and store starts and ends of coding exons
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
533 region_start, region_end = cds_start, cds_end
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
534 exon_starts = map(int, fields[11].rstrip(',\n').split(','))
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
535 exon_starts = map((lambda x: x + tx_start), exon_starts)
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
536 exon_ends = map(int, fields[10].rstrip(',').split(','))
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
537 exon_ends = map((lambda x, y: x + y), exon_starts, exon_ends)
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
538 for start, end in zip(exon_starts, exon_ends):
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
539 start = max(start, region_start)
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
540 end = min(end, region_end)
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
541 if start < end:
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
542 starts.append(start)
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
543 ends.append(end)
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
544 return (starts, ends, fields)
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
545
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
546
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
547 def iter_components_by_src(block, src):
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
548 for c in block.components:
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
549 if c.src == src:
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
550 yield c
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
551
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
552
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
553 def get_components_by_src(block, src):
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
554 return [value for value in iter_components_by_src(block, src)]
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
555
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
556
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
557 def iter_components_by_src_start(block, src):
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
558 for c in block.components:
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
559 if c.src.startswith(src):
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
560 yield c
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
561
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
562
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
563 def get_components_by_src_start(block, src):
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
564 return [value for value in iter_components_by_src_start(block, src)]
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
565
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
566
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
567 def sort_block_components_by_block(block1, block2):
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
568 # orders the components in block1 by the index of the component in block2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
569 # block1 must be a subset of block2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
570 # occurs in-place
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
571 return block1.components.sort(cmp=lambda x, y: block2.components.index(x) - block2.components.index(y))
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
572
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
573
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
574 def get_species_in_maf(maf_filename):
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
575 species = []
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
576 for block in bx.align.maf.Reader(open(maf_filename)):
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
577 for spec in get_species_in_block(block):
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
578 if spec not in species:
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
579 species.append(spec)
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
580 return species
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
581
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
582
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
583 def parse_species_option(species):
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
584 if species:
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
585 species = species.split(',')
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
586 if 'None' not in species:
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
587 return species
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
588 return None # provided species was '', None, or had 'None' in it
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
589
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
590
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
591 def remove_temp_index_file(index_filename):
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
592 try:
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
593 os.unlink(index_filename)
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
594 except Exception:
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
595 pass
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
596
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
597 # Below are methods to deal with FASTA files
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
598
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
599
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
600 def get_fasta_header(component, attributes={}, suffix=None):
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
601 header = ">%s(%s):%i-%i|" % (component.src, component.strand, component.get_forward_strand_start(), component.get_forward_strand_end())
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
602 for key, value in attributes.iteritems():
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
603 header = "%s%s=%s|" % (header, key, value)
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
604 if suffix:
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
605 header = "%s%s" % (header, suffix)
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
606 else:
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
607 header = "%s%s" % (header, src_split(component.src)[0])
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
608 return header
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
609
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
610
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
611 def get_attributes_from_fasta_header(header):
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
612 if not header:
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
613 return {}
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
614 attributes = {}
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
615 header = header.lstrip('>')
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
616 header = header.strip()
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
617 fields = header.split('|')
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
618 try:
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
619 region = fields[0]
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
620 region = region.split('(', 1)
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
621 temp = region[0].split('.', 1)
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
622 attributes['species'] = temp[0]
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
623 if len(temp) == 2:
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
624 attributes['chrom'] = temp[1]
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
625 else:
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
626 attributes['chrom'] = temp[0]
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
627 region = region[1].split(')', 1)
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
628 attributes['strand'] = region[0]
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
629 region = region[1].lstrip(':').split('-')
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
630 attributes['start'] = int(region[0])
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
631 attributes['end'] = int(region[1])
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
632 except Exception:
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
633 # fields 0 is not a region coordinate
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
634 pass
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
635 if len(fields) > 2:
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
636 for i in range(1, len(fields) - 1):
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
637 prop = fields[i].split('=', 1)
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
638 if len(prop) == 2:
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
639 attributes[prop[0]] = prop[1]
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
640 if len(fields) > 1:
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
641 attributes['__suffix__'] = fields[-1]
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
642 return attributes
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
643
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
644
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
645 def iter_fasta_alignment(filename):
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
646 class fastaComponent:
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
647 def __init__(self, species, text=""):
0
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
648 self.species = species
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
649 self.text = text
2126e1b833a2 Imported from capsule None
devteam
parents:
diff changeset
650
2
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
651 def extend(self, text):
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
652 self.text = self.text + text.replace('\n', '').replace('\r', '').strip()
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
653 # yields a list of fastaComponents for a FASTA file
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
654 with open(filename, 'r') as f:
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
655 components = []
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
656 # cur_component = None
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
657 while True:
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
658 line = f.readline()
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
659 if not line:
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
660 if components:
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
661 yield components
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
662 return
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
663 line = line.strip()
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
664 if not line:
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
665 if components:
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
666 yield components
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
667 components = []
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
668 elif line.startswith('>'):
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
669 attributes = get_attributes_from_fasta_header(line)
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
670 components.append(fastaComponent(attributes['species']))
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
671 elif components:
16df616b39e5 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_concatenate_by_species commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 1
diff changeset
672 components[-1].extend(line)