annotate genetrack_util.py @ 4:b41a4bb828a3 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit 2772547f531819d3f6d892ed041fa39b82e3550f
author iuc
date Wed, 05 Jul 2017 11:56:54 -0400
parents 41887967ef14
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
1 import bisect
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
2 import math
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
3 import re
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
4 import subprocess
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
5 import sys
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
6 import tempfile
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
7
3
41887967ef14 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit 19ea4feff5ccf3744c549b9a67259947a1cb90ba
iuc
parents: 1
diff changeset
8 import numpy
4
b41a4bb828a3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit 2772547f531819d3f6d892ed041fa39b82e3550f
iuc
parents: 3
diff changeset
9 from six import Iterator
3
41887967ef14 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit 19ea4feff5ccf3744c549b9a67259947a1cb90ba
iuc
parents: 1
diff changeset
10
0
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
11 GFF_EXT = 'gff'
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
12 SCIDX_EXT = 'scidx'
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
13
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
14
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
15 def noop(data):
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
16 return data
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
17
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
18
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
19 def zeropad_to_numeric(data):
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
20 return re.sub(r'chr0(\d)', r'chr\1', data)
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
21
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
22
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
23 def numeric_to_zeropad(data):
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
24 return re.sub(r'chr(\d([^\d]|$))', r'chr0\1', data)
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
25
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
26
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
27 FORMATS = ['zeropad', 'numeric']
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
28 IN_CONVERT = {'zeropad': zeropad_to_numeric, 'numeric': noop}
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
29 OUT_CONVERT = {'zeropad': numeric_to_zeropad, 'numeric': noop}
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
30
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
31
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
32 def conversion_functions(in_fmt, out_fmt):
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
33 """
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
34 Returns the proper list of functions to apply to perform a conversion
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
35 """
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
36 return [IN_CONVERT[in_fmt], OUT_CONVERT[out_fmt]]
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
37
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
38
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
39 def convert_data(data, in_fmt, out_fmt):
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
40 for fn in conversion_functions(in_fmt, out_fmt):
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
41 data = fn(data)
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
42 return data
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
43
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
44
4
b41a4bb828a3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit 2772547f531819d3f6d892ed041fa39b82e3550f
iuc
parents: 3
diff changeset
45 class ChromosomeManager(Iterator):
0
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
46 """
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
47 Manages a CSV reader of an index file to only load one chrom at a time
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
48 """
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
49
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
50 def __init__(self, reader):
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
51 self.done = False
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
52 self.reader = reader
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
53 self.processed_chromosomes = []
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
54 self.current_index = 0
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
55 self.next_valid()
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
56
4
b41a4bb828a3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit 2772547f531819d3f6d892ed041fa39b82e3550f
iuc
parents: 3
diff changeset
57 def __next__(self):
b41a4bb828a3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit 2772547f531819d3f6d892ed041fa39b82e3550f
iuc
parents: 3
diff changeset
58 self.line = next(self.reader)
0
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
59
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
60 def is_valid(self, line):
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
61 if len(line) not in [4, 5, 9]:
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
62 return False
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
63 try:
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
64 [int(i) for i in line[1:]]
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
65 self.format = SCIDX_EXT
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
66 return True
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
67 except ValueError:
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
68 try:
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
69 if len(line) < 6:
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
70 return False
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
71 [int(line[4]), int(line[5])]
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
72 self.format = GFF_EXT
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
73 return True
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
74 except ValueError:
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
75 return False
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
76
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
77 def next_valid(self):
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
78 """
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
79 Advance to the next valid line in the reader
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
80 """
4
b41a4bb828a3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit 2772547f531819d3f6d892ed041fa39b82e3550f
iuc
parents: 3
diff changeset
81 self.line = next(self.reader)
0
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
82 s = 0
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
83 while not self.is_valid(self.line):
4
b41a4bb828a3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit 2772547f531819d3f6d892ed041fa39b82e3550f
iuc
parents: 3
diff changeset
84 self.line = next(self.reader)
0
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
85 s += 1
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
86 if s > 0:
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
87 # Skip initial line(s) of file
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
88 pass
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
89
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
90 def parse_line(self, line):
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
91 if self.format == SCIDX_EXT:
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
92 return [int(line[1]), int(line[2]), int(line[3])]
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
93 else:
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
94 return [int(line[3]), line[6], line[5]]
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
95
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
96 def chromosome_name(self):
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
97 """
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
98 Return the name of the chromosome about to be loaded
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
99 """
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
100 return self.line[0]
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
101
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
102 def load_chromosome(self, collect_data=True):
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
103 """
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
104 Load the current chromosome into an array and return it
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
105 """
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
106 cname = self.chromosome_name()
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
107 if cname in self.processed_chromosomes:
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
108 stop_err('File is not grouped by chromosome')
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
109 self.data = []
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
110 while self.line[0] == cname:
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
111 if collect_data:
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
112 read = self.parse_line(self.line)
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
113 if read[0] < self.current_index:
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
114 msg = 'Reads in chromosome %s are not sorted by index. (At index %d)' % (cname, self.current_index)
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
115 stop_err(msg)
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
116 self.current_index = read[0]
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
117 self.add_read(read)
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
118 try:
4
b41a4bb828a3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit 2772547f531819d3f6d892ed041fa39b82e3550f
iuc
parents: 3
diff changeset
119 next(self)
0
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
120 except StopIteration:
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
121 self.done = True
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
122 break
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
123 self.processed_chromosomes.append(cname)
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
124 self.current_index = 0
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
125 data = self.data
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
126 # Don't retain reference anymore to save memory
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
127 del self.data
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
128 return data
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
129
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
130 def add_read(self, read):
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
131 if self.format == SCIDX_EXT:
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
132 self.data.append(read)
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
133 else:
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
134 index, strand, value = read
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
135 if value == '' or value == '.':
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
136 value = 1
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
137 else:
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
138 value = int(value)
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
139 if not self.data:
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
140 self.data.append([index, 0, 0])
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
141 current_read = self.data[-1]
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
142 if self.data[-1][0] == index:
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
143 current_read = self.data[-1]
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
144 elif self.data[-1][0] < index:
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
145 self.data.append([index, 0, 0])
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
146 current_read = self.data[-1]
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
147 else:
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
148 msg = 'Reads in chromosome %s are not sorted by index. (At index %d)' % (self.chromosome_name(), index)
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
149 stop_err(msg)
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
150 if strand == '+':
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
151 current_read[1] += value
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
152 elif strand == '-':
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
153 current_read[2] += value
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
154 else:
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
155 msg = 'Strand "%s" at chromosome "%s" index %d is not valid.' % (strand, self.chromosome_name(), index)
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
156 stop_err(msg)
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
157
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
158 def skip_chromosome(self):
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
159 """
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
160 Skip the current chromosome, discarding data
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
161 """
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
162 self.load_chromosome(collect_data=False)
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
163
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
164
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
165 class Peak(object):
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
166 def __init__(self, index, pos_width, neg_width):
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
167 self.index = index
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
168 self.start = index - neg_width
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
169 self.end = index + pos_width
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
170 self.value = 0
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
171 self.deleted = False
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
172 self.safe = False
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
173
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
174 def __repr__(self):
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
175 return '[%d] %d' % (self.index, self.value)
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
176
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
177
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
178 def gff_row(cname, start, end, score, source, type='.', strand='.', phase='.', attrs={}):
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
179 return (cname, source, type, start, end, score, strand, phase, gff_attrs(attrs))
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
180
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
181
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
182 def gff_attrs(d):
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
183 if not d:
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
184 return '.'
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
185 return ';'.join('%s=%s' % item for item in d.items())
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
186
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
187
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
188 def stop_err(msg):
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
189 sys.stderr.write(msg)
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
190 sys.exit(1)
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
191
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
192
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
193 def is_int(i):
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
194 try:
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
195 int(i)
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
196 return True
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
197 except ValueError:
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
198 return False
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
199
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
200
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
201 def make_keys(data):
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
202 return [read[0] for read in data]
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
203
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
204
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
205 def make_peak_keys(peaks):
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
206 return [peak.index for peak in peaks]
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
207
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
208
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
209 def get_window(data, start, end, keys):
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
210 """
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
211 Returns all reads from the data set with index between the two indexes
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
212 """
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
213 start_index = bisect.bisect_left(keys, start)
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
214 end_index = bisect.bisect_right(keys, end)
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
215 return data[start_index:end_index]
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
216
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
217
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
218 def get_index(value, keys):
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
219 """
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
220 Returns the index of the value in the keys using bisect
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
221 """
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
222 return bisect.bisect_left(keys, value)
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
223
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
224
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
225 def get_range(data):
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
226 lo = min([item[0] for item in data])
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
227 hi = max([item[0] for item in data])
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
228 return lo, hi
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
229
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
230
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
231 def get_chunks(lo, hi, size, overlap=500):
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
232 """
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
233 Divides a range into chunks of maximum size size. Returns a list of
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
234 2-tuples (slice_range, process_range), each a 2-tuple (start, end).
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
235 process_range has zero overlap and should be given to process_chromosome
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
236 as-is, and slice_range is overlapped and should be used to slice the
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
237 data (using get_window) to be given to process_chromosome.
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
238 """
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
239 chunks = []
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
240 for start_index in range(lo, hi, size):
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
241 process_start = start_index
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
242 # Don't go over upper bound
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
243 process_end = min(start_index + size, hi)
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
244 # Don't go under lower bound
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
245 slice_start = max(process_start - overlap, lo)
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
246 # Don't go over upper bound
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
247 slice_end = min(process_end + overlap, hi)
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
248 chunks.append(((slice_start, slice_end), (process_start, process_end)))
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
249 return chunks
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
250
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
251
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
252 def allocate_array(data, width):
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
253 """
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
254 Allocates a new array with the dimensions required to fit all reads in
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
255 the argument. The new array is totally empty. Returns the array and the
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
256 shift (number to add to a read index to get the position in the array it
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
257 should be at).
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
258 """
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
259 lo, hi = get_range(data)
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
260 rng = hi - lo
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
261 shift = width - lo
1
df7ac50ade5d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit 6cd0f93f4eb8649802683ae4c189c8ad48827a49
iuc
parents: 0
diff changeset
262 return numpy.zeros(rng + width * 2, numpy.float), shift
0
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
263
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
264
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
265 def normal_array(width, sigma, normalize=True):
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
266 """
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
267 Returns an array of the normal distribution of the specified width
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
268 """
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
269 sigma2 = float(sigma)**2
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
270
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
271 def normal_func(x):
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
272 return math.exp(-x * x / (2 * sigma2))
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
273
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
274 # width is the half of the distribution
4
b41a4bb828a3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit 2772547f531819d3f6d892ed041fa39b82e3550f
iuc
parents: 3
diff changeset
275 values = list(map(normal_func, range(-width, width)))
0
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
276 values = numpy.array(values, numpy.float)
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
277 # normalization
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
278 if normalize:
1
df7ac50ade5d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit 6cd0f93f4eb8649802683ae4c189c8ad48827a49
iuc
parents: 0
diff changeset
279 values = 1.0 / math.sqrt(2 * numpy.pi * sigma2) * values
0
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
280 return values
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
281
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
282
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
283 def call_peaks(array, shift, data, keys, direction, down_width, up_width, exclusion):
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
284 peaks = []
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
285
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
286 def find_peaks():
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
287 # Go through the array and call each peak
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
288 results = (array > numpy.roll(array, 1)) & (array > numpy.roll(array, -1))
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
289 indexes = numpy.where(results)
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
290 for index in indexes[0]:
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
291 pos = down_width or exclusion // 2
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
292 neg = up_width or exclusion // 2
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
293 # Reverse strand
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
294 if direction == 2:
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
295 # Swap positive and negative widths
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
296 pos, neg = neg, pos
1
df7ac50ade5d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit 6cd0f93f4eb8649802683ae4c189c8ad48827a49
iuc
parents: 0
diff changeset
297 peaks.append(Peak(int(index) - shift, pos, neg))
0
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
298 find_peaks()
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
299
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
300 def calculate_reads():
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
301 # Calculate the number of reads in each peak
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
302 for peak in peaks:
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
303 reads = get_window(data, peak.start, peak.end, keys)
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
304 peak.value = sum([read[direction] for read in reads])
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
305 # Flat list of indexes with frequency
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
306 indexes = [r for read in reads for r in [read[0]] * read[direction]]
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
307 peak.stddev = numpy.std(indexes)
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
308 calculate_reads()
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
309
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
310 def perform_exclusion():
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
311 # Process the exclusion zone
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
312 peak_keys = make_peak_keys(peaks)
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
313 peaks_by_value = peaks[:]
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
314 peaks_by_value.sort(key=lambda peak: -peak.value)
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
315 for peak in peaks_by_value:
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
316 peak.safe = True
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
317 window = get_window(peaks,
1
df7ac50ade5d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit 6cd0f93f4eb8649802683ae4c189c8ad48827a49
iuc
parents: 0
diff changeset
318 peak.index - exclusion // 2,
df7ac50ade5d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit 6cd0f93f4eb8649802683ae4c189c8ad48827a49
iuc
parents: 0
diff changeset
319 peak.index + exclusion // 2,
0
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
320 peak_keys)
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
321 for excluded in window:
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
322 if excluded.safe:
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
323 continue
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
324 i = get_index(excluded.index, peak_keys)
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
325 del peak_keys[i]
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
326 del peaks[i]
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
327 perform_exclusion()
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
328 return peaks
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
329
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
330
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
331 def process_chromosome(cname, data, writer, process_bounds, width, sigma, down_width, up_width, exclusion, filter):
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
332 """
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
333 Process a chromosome. Takes the chromosome name, list of reads, a CSV
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
334 writer to write processes results to, the bounds (2-tuple) to write
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
335 results in, and options.
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
336 """
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
337 if not data:
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
338 return
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
339 keys = make_keys(data)
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
340 # Create the arrays that hold the sum of the normals
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
341 forward_array, forward_shift = allocate_array(data, width)
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
342 reverse_array, reverse_shift = allocate_array(data, width)
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
343 normal = normal_array(width, sigma)
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
344
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
345 def populate_array():
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
346 # Add each read's normal to the array
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
347 for read in data:
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
348 index, forward, reverse = read
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
349 # Add the normals to the appropriate regions
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
350 if forward:
1
df7ac50ade5d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit 6cd0f93f4eb8649802683ae4c189c8ad48827a49
iuc
parents: 0
diff changeset
351 forward_array[index + forward_shift - width:index + forward_shift + width] += normal * forward
0
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
352 if reverse:
1
df7ac50ade5d planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit 6cd0f93f4eb8649802683ae4c189c8ad48827a49
iuc
parents: 0
diff changeset
353 reverse_array[index + reverse_shift - width:index + reverse_shift + width] += normal * reverse
0
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
354 populate_array()
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
355 forward_peaks = call_peaks(forward_array, forward_shift, data, keys, 1, down_width, up_width, exclusion)
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
356 reverse_peaks = call_peaks(reverse_array, reverse_shift, data, keys, 2, down_width, up_width, exclusion)
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
357 # Convert chromosome name in preparation for writing output
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
358 cname = convert_data(cname, 'zeropad', 'numeric')
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
359
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
360 def write(cname, strand, peak):
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
361 start = max(peak.start, 1)
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
362 end = peak.end
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
363 value = peak.value
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
364 stddev = peak.stddev
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
365 if value > filter:
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
366 # This version of genetrack outputs only gff files.
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
367 writer.writerow(gff_row(cname=cname,
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
368 source='genetrack',
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
369 start=start,
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
370 end=end,
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
371 score=value,
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
372 strand=strand,
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
373 attrs={'stddev': stddev}))
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
374
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
375 for peak in forward_peaks:
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
376 if process_bounds[0] < peak.index < process_bounds[1]:
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
377 write(cname, '+', peak)
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
378 for peak in reverse_peaks:
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
379 if process_bounds[0] < peak.index < process_bounds[1]:
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
380 write(cname, '-', peak)
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
381
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
382
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
383 def sort_chromosome_reads_by_index(input_path):
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
384 """
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
385 Return a gff file with chromosome reads sorted by index.
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
386 """
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
387 # Will this sort produce different results across platforms?
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
388 output_path = tempfile.NamedTemporaryFile(delete=False).name
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
389 command = 'sort -k 1,1 -k 4,4n "%s" > "%s"' % (input_path, output_path)
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
390 p = subprocess.Popen(command, shell=True)
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
391 p.wait()
25cd59a002d9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/genetrack commit e96df94dba60050fa28aaf55b5bb095717a5f260
iuc
parents:
diff changeset
392 return output_path