annotate smRtools.py @ 1:dce695815b0f draft default tip

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit 87fad8a906abdb9dc2ecaa2457fb26ab2b09895d
author artbio
date Tue, 11 Jul 2017 13:10:00 -0400
parents 234b83159ea8
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
1 #!/usr/bin/python
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
2 # version 1 7-5-2012 unification of the SmRNAwindow class
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
3
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
4 import sys, subprocess
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
5 from collections import defaultdict
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
6 from numpy import mean, median, std
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
7 ##Disable scipy import temporarily, as no working scipy on toolshed.
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
8 ##from scipy import stats
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
9
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
10 def get_fasta (index="/home/galaxy/galaxy-dist/bowtie/5.37_Dmel/5.37_Dmel"):
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
11 '''This function will return a dictionary containing fasta identifiers as keys and the
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
12 sequence as values. Index must be the path to a fasta file.'''
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
13 p = subprocess.Popen(args=["bowtie-inspect","-a", "0", index], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) # bowtie-inspect outputs sequences on single lines
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
14 outputlines = p.stdout.readlines()
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
15 p.wait()
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
16 item_dic = {}
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
17 for line in outputlines:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
18 if (line[0] == ">"):
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
19 try:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
20 item_dic[current_item] = "".join(stringlist) # to dump the sequence of the previous item - try because of the keyerror of the first item
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
21 except: pass
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
22 current_item = line[1:].rstrip().split()[0] #take the first word before space because bowtie splits headers !
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
23 item_dic[current_item] = ""
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
24 stringlist=[]
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
25 else:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
26 stringlist.append(line.rstrip() )
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
27 item_dic[current_item] = "".join(stringlist) # for the last item
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
28 return item_dic
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
29
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
30 def get_fasta_headers (index):
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
31 p = subprocess.Popen(args=["bowtie-inspect","-n", index], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) # bowtie-inspect outputs sequences on single lines
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
32 outputlines = p.stdout.readlines()
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
33 p.wait()
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
34 item_dic = {}
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
35 for line in outputlines:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
36 header = line.rstrip().split()[0] #take the first word before space because bowtie splits headers !
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
37 item_dic[header] = 1
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
38 return item_dic
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
39
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
40
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
41 def get_file_sample (file, numberoflines):
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
42 '''import random to use this function'''
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
43 F=open(file)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
44 fullfile = F.read().splitlines()
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
45 F.close()
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
46 if len(fullfile) < numberoflines:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
47 return "sample size exceeds file size"
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
48 return random.sample(fullfile, numberoflines)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
49
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
50 def get_fasta_from_history (file):
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
51 F = open (file, "r")
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
52 item_dic = {}
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
53 for line in F:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
54 if (line[0] == ">"):
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
55 try:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
56 item_dic[current_item] = "".join(stringlist) # to dump the sequence of the previous item - try because of the keyerror of the first item
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
57 except: pass
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
58 current_item = line[1:-1].split()[0] #take the first word before space because bowtie splits headers !
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
59 item_dic[current_item] = ""
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
60 stringlist=[]
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
61 else:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
62 stringlist.append(line[:-1])
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
63 item_dic[current_item] = "".join(stringlist) # for the last item
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
64 return item_dic
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
65
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
66 def antipara (sequence):
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
67 antidict = {"A":"T", "T":"A", "G":"C", "C":"G", "N":"N"}
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
68 revseq = sequence[::-1]
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
69 return "".join([antidict[i] for i in revseq])
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
70
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
71 def RNAtranslate (sequence):
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
72 return "".join([i if i in "AGCN" else "U" for i in sequence])
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
73 def DNAtranslate (sequence):
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
74 return "".join([i if i in "AGCN" else "T" for i in sequence])
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
75
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
76 def RNAfold (sequence_list):
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
77 thestring= "\n".join(sequence_list)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
78 p = subprocess.Popen(args=["RNAfold","--noPS"], stdin= subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
79 output=p.communicate(thestring)[0]
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
80 p.wait()
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
81 output=output.split("\n")
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
82 if not output[-1]: output = output[:-1] # nasty patch to remove last empty line
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
83 buffer=[]
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
84 for line in output:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
85 if line[0] in ["N","A","T","U","G","C"]:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
86 buffer.append(DNAtranslate(line))
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
87 if line[0] in ["(",".",")"]:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
88 fields=line.split("(")
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
89 energy= fields[-1]
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
90 energy = energy[:-1] # remove the ) parenthesis
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
91 energy=float(energy)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
92 buffer.append(str(energy))
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
93 return dict(zip(buffer[::2], buffer[1::2]))
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
94
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
95 def extractsubinstance (start, end, instance):
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
96 ''' Testing whether this can be an function external to the class to save memory'''
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
97 subinstance = SmRNAwindow (instance.gene, instance.sequence[start-1:end], start)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
98 subinstance.gene = "%s %s %s" % (subinstance.gene, subinstance.windowoffset, subinstance.windowoffset + subinstance.size - 1)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
99 upcoordinate = [i for i in range(start,end+1) if instance.readDict.has_key(i) ]
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
100 downcoordinate = [-i for i in range(start,end+1) if instance.readDict.has_key(-i) ]
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
101 for i in upcoordinate:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
102 subinstance.readDict[i]=instance.readDict[i]
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
103 for i in downcoordinate:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
104 subinstance.readDict[i]=instance.readDict[i]
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
105 return subinstance
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
106
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
107 class HandleSmRNAwindows:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
108 def __init__(self, alignmentFile="~", alignmentFileFormat="tabular", genomeRefFile="~", genomeRefFormat="bowtieIndex", biosample="undetermined", size_inf=None, size_sup=1000, norm=1.0):
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
109 self.biosample = biosample
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
110 self.alignmentFile = alignmentFile
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
111 self.alignmentFileFormat = alignmentFileFormat # can be "tabular" or "sam"
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
112 self.genomeRefFile = genomeRefFile
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
113 self.genomeRefFormat = genomeRefFormat # can be "bowtieIndex" or "fastaSource"
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
114 self.alignedReads = 0
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
115 self.instanceDict = {}
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
116 self.size_inf=size_inf
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
117 self.size_sup=size_sup
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
118 self.norm=norm
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
119 if genomeRefFormat == "bowtieIndex":
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
120 self.itemDict = get_fasta (genomeRefFile)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
121 elif genomeRefFormat == "fastaSource":
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
122 self.itemDict = get_fasta_from_history (genomeRefFile)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
123 for item in self.itemDict:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
124 self.instanceDict[item] = SmRNAwindow(item, sequence=self.itemDict[item], windowoffset=1, biosample=self.biosample, norm=self.norm) # create as many instances as there is items
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
125 self.readfile()
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
126
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
127 def readfile (self) :
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
128 if self.alignmentFileFormat == "tabular":
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
129 F = open (self.alignmentFile, "r")
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
130 for line in F:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
131 fields = line.split()
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
132 polarity = fields[1]
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
133 gene = fields[2]
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
134 offset = int(fields[3])
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
135 size = len (fields[4])
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
136 if self.size_inf:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
137 if (size>=self.size_inf and size<= self.size_sup):
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
138 self.instanceDict[gene].addread (polarity, offset+1, size) # to correct to 1-based coordinates of SmRNAwindow
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
139 self.alignedReads += 1
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
140 else:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
141 self.instanceDict[gene].addread (polarity, offset+1, size) # to correct to 1-based coordinates of SmRNAwindow
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
142 self.alignedReads += 1
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
143 F.close()
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
144 return self.instanceDict
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
145 elif self.alignmentFileFormat == "bam" or self.alignmentFileFormat == "sam":
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
146 import pysam
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
147 samfile = pysam.Samfile(self.alignmentFile)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
148 for read in samfile:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
149 if read.tid == -1:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
150 continue # filter out unaligned reads
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
151 if read.is_reverse:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
152 polarity="-"
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
153 else:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
154 polarity="+"
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
155 gene = samfile.getrname(read.tid)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
156 offset = read.pos
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
157 size = read.qlen
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
158 if self.size_inf:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
159 if (size>=self.size_inf and size<= self.size_sup):
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
160 self.instanceDict[gene].addread (polarity, offset+1, size) # to correct to 1-based coordinates of SmRNAwindow
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
161 self.alignedReads += 1
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
162 else:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
163 self.instanceDict[gene].addread (polarity, offset+1, size) # to correct to 1-based coordinates of SmRNAwindow
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
164 self.alignedReads += 1
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
165 return self.instanceDict
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
166
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
167 def size_histogram (self): # in HandleSmRNAwindows
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
168 '''refactored on 7-9-2014 to debug size_histogram tool'''
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
169 size_dict={}
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
170 size_dict['F']= defaultdict (float)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
171 size_dict['R']= defaultdict (float)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
172 size_dict['both'] = defaultdict (float)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
173 for item in self.instanceDict:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
174 buffer_dict = self.instanceDict[item].size_histogram()
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
175 for polarity in ["F", "R"]:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
176 for size in buffer_dict[polarity]:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
177 size_dict[polarity][size] += buffer_dict[polarity][size]
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
178 for size in buffer_dict["both"]:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
179 size_dict["both"][size] += buffer_dict["F"][size] - buffer_dict["R"][size]
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
180 return size_dict
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
181
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
182 def CountFeatures (self, GFF3="path/to/file"):
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
183 featureDict = defaultdict(int)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
184 F = open (GFF3, "r")
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
185 for line in F:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
186 if line[0] == "#": continue
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
187 fields = line[:-1].split()
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
188 chrom, feature, leftcoord, rightcoord, polarity = fields[0], fields[2], fields[3], fields[4], fields[6]
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
189 featureDict[feature] += self.instanceDict[chrom].readcount(upstream_coord=int(leftcoord), downstream_coord=int(rightcoord), polarity="both", method="destructive")
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
190 F.close()
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
191 return featureDict
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
192
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
193 class SmRNAwindow:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
194
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
195 def __init__(self, gene, sequence="ATGC", windowoffset=1, biosample="Undetermined", norm=1.0):
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
196 self.biosample = biosample
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
197 self.sequence = sequence
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
198 self.gene = gene
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
199 self.windowoffset = windowoffset
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
200 self.size = len(sequence)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
201 self.readDict = defaultdict(list) # with a {+/-offset:[size1, size2, ...], ...}
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
202 self.matchedreadsUp = 0
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
203 self.matchedreadsDown = 0
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
204 self.norm=norm
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
205
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
206 def addread (self, polarity, offset, size):
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
207 '''ATTENTION ATTENTION ATTENTION'''
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
208 ''' We removed the conversion from 0 to 1 based offset, as we do this now during readparsing.'''
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
209 if polarity == "+":
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
210 self.readDict[offset].append(size)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
211 self.matchedreadsUp += 1
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
212 else:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
213 self.readDict[-(offset + size -1)].append(size)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
214 self.matchedreadsDown += 1
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
215 return
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
216
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
217 def barycenter (self, upstream_coord=None, downstream_coord=None):
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
218 '''refactored 24-12-2013 to save memory and introduce offset filtering see readcount method for further discussion on that
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
219 In this version, attempt to replace the dictionary structure by a list of tupple to save memory too'''
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
220 upstream_coord = upstream_coord or self.windowoffset
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
221 downstream_coord = downstream_coord or self.windowoffset+self.size-1
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
222 window_size = downstream_coord - upstream_coord +1
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
223 def weigthAverage (TuppleList):
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
224 weightSum = 0
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
225 PonderWeightSum = 0
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
226 for tuple in TuppleList:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
227 PonderWeightSum += tuple[0] * tuple[1]
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
228 weightSum += tuple[1]
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
229 if weightSum > 0:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
230 return PonderWeightSum / float(weightSum)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
231 else:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
232 return 0
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
233 forwardTuppleList = [(k, len(self.readDict[k])) for k in self.readDict.keys() if (k > 0 and abs(k) >= upstream_coord and abs(k) <= downstream_coord)] # both forward and in the proper offset window
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
234 reverseTuppleList = [(-k, len(self.readDict[k])) for k in self.readDict.keys() if (k < 0 and abs(k) >= upstream_coord and abs(k) <= downstream_coord)] # both reverse and in the proper offset window
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
235 Fbarycenter = (weigthAverage (forwardTuppleList) - upstream_coord) / window_size
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
236 Rbarycenter = (weigthAverage (reverseTuppleList) - upstream_coord) / window_size
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
237 return Fbarycenter, Rbarycenter
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
238
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
239 def correlation_mapper (self, reference, window_size):
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
240 '''to map correlation with a sliding window 26-2-2013'''
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
241 from scipy import stats
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
242
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
243 if window_size > self.size:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
244 return []
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
245 F=open(reference, "r")
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
246 reference_forward = []
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
247 reference_reverse = []
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
248 for line in F:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
249 fields=line.split()
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
250 reference_forward.append(int(float(fields[1])))
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
251 reference_reverse.append(int(float(fields[2])))
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
252 F.close()
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
253 local_object_forward=[]
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
254 local_object_reverse=[]
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
255 ## Dict to list for the local object
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
256 for i in range(1, self.size+1):
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
257 local_object_forward.append(len(self.readDict[i]))
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
258 local_object_reverse.append(len(self.readDict[-i]))
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
259 ## start compiling results by slides
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
260 results=[]
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
261 for coordinate in range(self.size - window_size):
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
262 local_forward=local_object_forward[coordinate:coordinate + window_size]
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
263 local_reverse=local_object_reverse[coordinate:coordinate + window_size]
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
264 if sum(local_forward) == 0 or sum(local_reverse) == 0:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
265 continue
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
266 try:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
267 reference_to_local_cor_forward = stats.spearmanr(local_forward, reference_forward)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
268 reference_to_local_cor_reverse = stats.spearmanr(local_reverse, reference_reverse)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
269 if (reference_to_local_cor_forward[0] > 0.2 or reference_to_local_cor_reverse[0]>0.2):
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
270 results.append([coordinate+1, reference_to_local_cor_forward[0], reference_to_local_cor_reverse[0]])
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
271 except:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
272 pass
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
273 return results
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
274
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
275 def readcount (self, size_inf=0, size_sup=1000, upstream_coord=None, downstream_coord=None, polarity="both", method="conservative"):
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
276 '''refactored 24-12-2013 to save memory and introduce offset filtering
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
277 take a look at the defaut parameters that cannot be defined relatively to the instance are they are defined before instanciation
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
278 the trick is to pass None and then test
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
279 polarity parameter can take "both", "forward" or "reverse" as value'''
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
280 upstream_coord = upstream_coord or self.windowoffset
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
281 downstream_coord = downstream_coord or self.windowoffset+self.size-1
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
282 if upstream_coord == 1 and downstream_coord == self.windowoffset+self.size-1 and polarity == "both":
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
283 return self.matchedreadsUp + self.matchedreadsDown
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
284 if upstream_coord == 1 and downstream_coord == self.windowoffset+self.size-1 and polarity == "forward":
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
285 return self.matchedreadsUp
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
286 if upstream_coord == 1 and downstream_coord == self.windowoffset+self.size-1 and polarity == "reverse":
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
287 return self.matchedreadsDown
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
288 n=0
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
289 if polarity == "both":
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
290 for offset in xrange(upstream_coord, downstream_coord+1):
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
291 if self.readDict.has_key(offset):
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
292 for read in self.readDict[offset]:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
293 if (read>=size_inf and read<= size_sup):
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
294 n += 1
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
295 if method != "conservative":
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
296 del self.readDict[offset] ## Carefull ! precludes re-use on the self.readDict dictionary !!!!!! TEST
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
297 if self.readDict.has_key(-offset):
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
298 for read in self.readDict[-offset]:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
299 if (read>=size_inf and read<= size_sup):
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
300 n += 1
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
301 if method != "conservative":
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
302 del self.readDict[-offset]
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
303 return n
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
304 elif polarity == "forward":
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
305 for offset in xrange(upstream_coord, downstream_coord+1):
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
306 if self.readDict.has_key(offset):
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
307 for read in self.readDict[offset]:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
308 if (read>=size_inf and read<= size_sup):
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
309 n += 1
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
310 return n
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
311 elif polarity == "reverse":
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
312 for offset in xrange(upstream_coord, downstream_coord+1):
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
313 if self.readDict.has_key(-offset):
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
314 for read in self.readDict[-offset]:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
315 if (read>=size_inf and read<= size_sup):
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
316 n += 1
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
317 return n
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
318
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
319 def readsizes (self):
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
320 '''return a dictionary of number of reads by size (the keys)'''
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
321 dicsize = {}
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
322 for offset in self.readDict:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
323 for size in self.readDict[offset]:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
324 dicsize[size] = dicsize.get(size, 0) + 1
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
325 for offset in range (min(dicsize.keys()), max(dicsize.keys())+1):
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
326 dicsize[size] = dicsize.get(size, 0) # to fill offsets with null values
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
327 return dicsize
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
328
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
329
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
330 def size_histogram(self, minquery=None, maxquery=None): # in SmRNAwindow
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
331 '''refactored on 7-9-2014 to debug size_histogram tool'''
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
332 norm=self.norm
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
333 size_dict={}
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
334 size_dict['F']= defaultdict (float)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
335 size_dict['R']= defaultdict (float)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
336 size_dict['both']= defaultdict (float)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
337 for offset in self.readDict:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
338 for size in self.readDict[offset]:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
339 if offset < 0:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
340 size_dict['R'][size] = size_dict['R'][size] - 1*norm
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
341 else:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
342 size_dict['F'][size] = size_dict['F'][size] + 1*norm
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
343 ## patch to avoid missing graphs when parsed by R-lattice. 27-08-2014. Test and validate !
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
344 if not (size_dict['F']) and (not size_dict['R']):
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
345 size_dict['F'][21] = 0
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
346 size_dict['R'][21] = 0
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
347 ##
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
348 allSizeKeys = list (set (size_dict['F'].keys() + size_dict['R'].keys() ) )
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
349 for size in allSizeKeys:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
350 size_dict['both'][size] = size_dict['F'][size] - size_dict['R'][size]
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
351 if minquery:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
352 for polarity in size_dict.keys():
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
353 for size in xrange(minquery, maxquery+1):
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
354 if not size in size_dict[polarity].keys():
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
355 size_dict[polarity][size]=0
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
356 return size_dict
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
357
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
358 def statsizes (self, upstream_coord=None, downstream_coord=None):
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
359 ''' migration to memory saving by specifying possible subcoordinates
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
360 see the readcount method for further discussion'''
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
361 upstream_coord = upstream_coord or self.windowoffset
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
362 downstream_coord = downstream_coord or self.windowoffset+self.size-1
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
363 L = []
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
364 for offset in self.readDict:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
365 if (abs(offset) < upstream_coord or abs(offset) > downstream_coord): continue
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
366 for size in self.readDict[offset]:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
367 L.append(size)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
368 meansize = mean(L)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
369 stdv = std(L)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
370 mediansize = median(L)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
371 return meansize, mediansize, stdv
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
372
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
373 def foldEnergy (self, upstream_coord=None, downstream_coord=None):
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
374 ''' migration to memory saving by specifying possible subcoordinates
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
375 see the readcount method for further discussion'''
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
376 upstream_coord = upstream_coord or self.windowoffset
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
377 downstream_coord = downstream_coord or self.windowoffset+self.size-1
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
378 Energy = RNAfold ([self.sequence[upstream_coord-1:downstream_coord] ])
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
379 return float(Energy[self.sequence[upstream_coord-1:downstream_coord]])
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
380
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
381 def Ufreq (self, size_scope, upstream_coord=None, downstream_coord=None):
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
382 ''' migration to memory saving by specifying possible subcoordinates
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
383 see the readcount method for further discussion. size_scope must be an interable'''
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
384 upstream_coord = upstream_coord or self.windowoffset
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
385 downstream_coord = downstream_coord or self.windowoffset+self.size-1
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
386 freqDic = {"A":0,"T":0,"G":0,"C":0, "N":0}
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
387 convertDic = {"A":"T","T":"A","G":"C","C":"G","N":"N"}
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
388 for offset in self.readDict:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
389 if (abs(offset) < upstream_coord or abs(offset) > downstream_coord): continue
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
390 for size in self.readDict[offset]:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
391 if size in size_scope:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
392 startbase = self.sequence[abs(offset)-self.windowoffset]
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
393 if offset < 0:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
394 startbase = convertDic[startbase]
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
395 freqDic[startbase] += 1
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
396 base_sum = float ( sum( freqDic.values()) )
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
397 if base_sum == 0:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
398 return "."
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
399 else:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
400 return freqDic["T"] / base_sum * 100
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
401
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
402 def Ufreq_stranded (self, size_scope, upstream_coord=None, downstream_coord=None):
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
403 ''' migration to memory saving by specifying possible subcoordinates
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
404 see the readcount method for further discussion. size_scope must be an interable
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
405 This method is similar to the Ufreq method but take strandness into account'''
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
406 upstream_coord = upstream_coord or self.windowoffset
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
407 downstream_coord = downstream_coord or self.windowoffset+self.size-1
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
408 freqDic = {"Afor":0,"Tfor":0,"Gfor":0,"Cfor":0, "Nfor":0,"Arev":0,"Trev":0,"Grev":0,"Crev":0, "Nrev":0}
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
409 convertDic = {"A":"T","T":"A","G":"C","C":"G","N":"N"}
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
410 for offset in self.readDict:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
411 if (abs(offset) < upstream_coord or abs(offset) > downstream_coord): continue
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
412 for size in self.readDict[offset]:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
413 if size in size_scope:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
414 startbase = self.sequence[abs(offset)-self.windowoffset]
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
415 if offset < 0:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
416 startbase = convertDic[startbase]
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
417 freqDic[startbase+"rev"] += 1
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
418 else:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
419 freqDic[startbase+"for"] += 1
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
420 forward_sum = float ( freqDic["Afor"]+freqDic["Tfor"]+freqDic["Gfor"]+freqDic["Cfor"]+freqDic["Nfor"])
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
421 reverse_sum = float ( freqDic["Arev"]+freqDic["Trev"]+freqDic["Grev"]+freqDic["Crev"]+freqDic["Nrev"])
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
422 if forward_sum == 0 and reverse_sum == 0:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
423 return ". | ."
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
424 elif reverse_sum == 0:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
425 return "%s | ." % (freqDic["Tfor"] / forward_sum * 100)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
426 elif forward_sum == 0:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
427 return ". | %s" % (freqDic["Trev"] / reverse_sum * 100)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
428 else:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
429 return "%s | %s" % (freqDic["Tfor"] / forward_sum * 100, freqDic["Trev"] / reverse_sum * 100)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
430
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
431 def readplot (self):
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
432 norm=self.norm
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
433 readmap = {}
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
434 for offset in self.readDict.keys():
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
435 readmap[abs(offset)] = ( len(self.readDict.get(-abs(offset),[]))*norm , len(self.readDict.get(abs(offset),[]))*norm )
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
436 mylist = []
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
437 for offset in sorted(readmap):
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
438 if readmap[offset][1] != 0:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
439 mylist.append("%s\t%s\t%s\t%s" % (self.gene, offset, readmap[offset][1], "F") )
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
440 if readmap[offset][0] != 0:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
441 mylist.append("%s\t%s\t%s\t%s" % (self.gene, offset, -readmap[offset][0], "R") )
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
442 ## patch to avoid missing graphs when parsed by R-lattice. 27-08-2014. Test and validate !
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
443 if not mylist:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
444 mylist.append("%s\t%s\t%s\t%s" % (self.gene, 1, 0, "F") )
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
445 ###
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
446 return mylist
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
447
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
448 def readcoverage (self, upstream_coord=None, downstream_coord=None, windowName=None):
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
449 '''Use by MirParser tool'''
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
450 upstream_coord = upstream_coord or 1
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
451 downstream_coord = downstream_coord or self.size
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
452 windowName = windowName or "%s_%s_%s" % (self.gene, upstream_coord, downstream_coord)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
453 forORrev_coverage = dict ([(i,0) for i in xrange(1, downstream_coord-upstream_coord+1)])
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
454 totalforward = self.readcount(upstream_coord=upstream_coord, downstream_coord=downstream_coord, polarity="forward")
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
455 totalreverse = self.readcount(upstream_coord=upstream_coord, downstream_coord=downstream_coord, polarity="reverse")
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
456 if totalforward > totalreverse:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
457 majorcoverage = "forward"
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
458 for offset in self.readDict.keys():
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
459 if (offset > 0) and ((offset-upstream_coord+1) in forORrev_coverage.keys() ):
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
460 for read in self.readDict[offset]:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
461 for i in xrange(read):
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
462 try:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
463 forORrev_coverage[offset-upstream_coord+1+i] += 1
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
464 except KeyError:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
465 continue # a sense read may span over the downstream limit
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
466 else:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
467 majorcoverage = "reverse"
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
468 for offset in self.readDict.keys():
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
469 if (offset < 0) and (-offset-upstream_coord+1 in forORrev_coverage.keys() ):
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
470 for read in self.readDict[offset]:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
471 for i in xrange(read):
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
472 try:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
473 forORrev_coverage[-offset-upstream_coord-i] += 1 ## positive coordinates in the instance, with + for forward coverage and - for reverse coverage
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
474 except KeyError:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
475 continue # an antisense read may span over the upstream limit
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
476 output_list = []
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
477 maximum = max (forORrev_coverage.values()) or 1
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
478 for n in sorted (forORrev_coverage):
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
479 output_list.append("%s\t%s\t%s\t%s\t%s\t%s\t%s" % (self.biosample, windowName, n, float(n)/(downstream_coord-upstream_coord+1), forORrev_coverage[n], float(forORrev_coverage[n])/maximum, majorcoverage))
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
480 return "\n".join(output_list)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
481
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
482
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
483 def signature (self, minquery, maxquery, mintarget, maxtarget, scope, zscore="no", upstream_coord=None, downstream_coord=None):
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
484 ''' migration to memory saving by specifying possible subcoordinates
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
485 see the readcount method for further discussion
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
486 scope must be a python iterable; scope define the *relative* offset range to be computed'''
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
487 upstream_coord = upstream_coord or self.windowoffset
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
488 downstream_coord = downstream_coord or self.windowoffset+self.size-1
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
489 query_range = range (minquery, maxquery+1)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
490 target_range = range (mintarget, maxtarget+1)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
491 Query_table = {}
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
492 Target_table = {}
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
493 frequency_table = dict ([(i, 0) for i in scope])
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
494 for offset in self.readDict:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
495 if (abs(offset) < upstream_coord or abs(offset) > downstream_coord): continue
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
496 for size in self.readDict[offset]:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
497 if size in query_range:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
498 Query_table[offset] = Query_table.get(offset, 0) + 1
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
499 if size in target_range:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
500 Target_table[offset] = Target_table.get(offset, 0) + 1
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
501 for offset in Query_table:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
502 for i in scope:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
503 frequency_table[i] += min(Query_table[offset], Target_table.get(-offset -i +1, 0))
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
504 if minquery==mintarget and maxquery==maxtarget: ## added to incorporate the division by 2 in the method (26/11/2013), see signature_options.py and lattice_signature.py
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
505 frequency_table = dict([(i,frequency_table[i]/2) for i in frequency_table])
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
506 if zscore == "yes":
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
507 z_mean = mean(frequency_table.values() )
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
508 z_std = std(frequency_table.values() )
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
509 if z_std == 0:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
510 frequency_table = dict([(i,0) for i in frequency_table] )
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
511 else:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
512 frequency_table = dict([(i, (frequency_table[i]- z_mean)/z_std) for i in frequency_table] )
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
513 return frequency_table
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
514
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
515 def hannon_signature (self, minquery, maxquery, mintarget, maxtarget, scope, upstream_coord=None, downstream_coord=None):
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
516 ''' migration to memory saving by specifying possible subcoordinates see the readcount method for further discussion
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
517 note that scope must be an iterable (a list or a tuple), which specifies the relative offsets that will be computed'''
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
518 upstream_coord = upstream_coord or self.windowoffset
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
519 downstream_coord = downstream_coord or self.windowoffset+self.size-1
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
520 query_range = range (minquery, maxquery+1)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
521 target_range = range (mintarget, maxtarget+1)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
522 Query_table = {}
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
523 Target_table = {}
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
524 Total_Query_Numb = 0
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
525 general_frequency_table = dict ([(i,0) for i in scope])
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
526 ## filtering the appropriate reads for the study
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
527 for offset in self.readDict:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
528 if (abs(offset) < upstream_coord or abs(offset) > downstream_coord): continue
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
529 for size in self.readDict[offset]:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
530 if size in query_range:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
531 Query_table[offset] = Query_table.get(offset, 0) + 1
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
532 Total_Query_Numb += 1
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
533 if size in target_range:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
534 Target_table[offset] = Target_table.get(offset, 0) + 1
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
535 for offset in Query_table:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
536 frequency_table = dict ([(i,0) for i in scope])
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
537 number_of_targets = 0
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
538 for i in scope:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
539 frequency_table[i] += Query_table[offset] * Target_table.get(-offset -i +1, 0)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
540 number_of_targets += Target_table.get(-offset -i +1, 0)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
541 for i in scope:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
542 try:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
543 general_frequency_table[i] += (1. / number_of_targets / Total_Query_Numb) * frequency_table[i]
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
544 except ZeroDivisionError :
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
545 continue
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
546 return general_frequency_table
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
547
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
548 def phasing (self, size_range, scope):
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
549 ''' to calculate autocorelation like signal - scope must be an python iterable'''
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
550 read_table = {}
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
551 total_read_number = 0
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
552 general_frequency_table = dict ([(i, 0) for i in scope])
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
553 ## read input filtering
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
554 for offset in self.readDict:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
555 for size in self.readDict[offset]:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
556 if size in size_range:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
557 read_table[offset] = read_table.get(offset, 0) + 1
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
558 total_read_number += 1
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
559 ## per offset read phasing computing
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
560 for offset in read_table:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
561 frequency_table = dict ([(i, 0) for i in scope]) # local frequency table
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
562 number_of_targets = 0
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
563 for i in scope:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
564 if offset > 0:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
565 frequency_table[i] += read_table[offset] * read_table.get(offset + i, 0)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
566 number_of_targets += read_table.get(offset + i, 0)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
567 else:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
568 frequency_table[i] += read_table[offset] * read_table.get(offset - i, 0)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
569 number_of_targets += read_table.get(offset - i, 0)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
570 ## inclusion of local frequency table in the general frequency table (all offsets average)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
571 for i in scope:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
572 try:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
573 general_frequency_table[i] += (1. / number_of_targets / total_read_number) * frequency_table[i]
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
574 except ZeroDivisionError :
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
575 continue
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
576 return general_frequency_table
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
577
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
578
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
579
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
580 def z_signature (self, minquery, maxquery, mintarget, maxtarget, scope):
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
581 '''Must do: from numpy import mean, std, to use this method; scope must be a python iterable and defines the relative offsets to compute'''
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
582 frequency_table = self.signature (minquery, maxquery, mintarget, maxtarget, scope)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
583 z_table = {}
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
584 frequency_list = [frequency_table[i] for i in sorted (frequency_table)]
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
585 if std(frequency_list):
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
586 meanlist = mean(frequency_list)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
587 stdlist = std(frequency_list)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
588 z_list = [(i-meanlist)/stdlist for i in frequency_list]
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
589 return dict (zip (sorted(frequency_table), z_list) )
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
590 else:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
591 return dict (zip (sorted(frequency_table), [0 for i in frequency_table]) )
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
592
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
593 def percent_signature (self, minquery, maxquery, mintarget, maxtarget, scope):
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
594 frequency_table = self.signature (minquery, maxquery, mintarget, maxtarget, scope)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
595 total = float(sum ([self.readsizes().get(i,0) for i in set(range(minquery,maxquery)+range(mintarget,maxtarget))]) )
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
596 if total == 0:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
597 return dict( [(i,0) for i in scope])
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
598 return dict( [(i, frequency_table[i]/total*100) for i in scope])
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
599
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
600 def pairer (self, overlap, minquery, maxquery, mintarget, maxtarget):
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
601 queryhash = defaultdict(list)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
602 targethash = defaultdict(list)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
603 query_range = range (int(minquery), int(maxquery)+1)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
604 target_range = range (int(mintarget), int(maxtarget)+1)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
605 paired_sequences = []
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
606 for offset in self.readDict: # selection of data
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
607 for size in self.readDict[offset]:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
608 if size in query_range:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
609 queryhash[offset].append(size)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
610 if size in target_range:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
611 targethash[offset].append(size)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
612 for offset in queryhash:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
613 if offset >= 0: matched_offset = -offset - overlap + 1
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
614 else: matched_offset = -offset - overlap + 1
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
615 if targethash[matched_offset]:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
616 paired = min ( len(queryhash[offset]), len(targethash[matched_offset]) )
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
617 if offset >= 0:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
618 for i in range (paired):
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
619 paired_sequences.append("+%s" % RNAtranslate ( self.sequence[offset:offset+queryhash[offset][i]]) )
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
620 paired_sequences.append("-%s" % RNAtranslate (antipara (self.sequence[-matched_offset-targethash[matched_offset][i]+1:-matched_offset+1]) ) )
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
621 if offset < 0:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
622 for i in range (paired):
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
623 paired_sequences.append("-%s" % RNAtranslate (antipara (self.sequence[-offset-queryhash[offset][i]+1:-offset+1]) ) )
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
624 paired_sequences.append("+%s" % RNAtranslate (self.sequence[matched_offset:matched_offset+targethash[matched_offset][i]] ) )
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
625 return paired_sequences
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
626
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
627 def pairable (self, overlap, minquery, maxquery, mintarget, maxtarget):
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
628 queryhash = defaultdict(list)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
629 targethash = defaultdict(list)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
630 query_range = range (int(minquery), int(maxquery)+1)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
631 target_range = range (int(mintarget), int(maxtarget)+1)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
632 paired_sequences = []
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
633
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
634 for offset in self.readDict: # selection of data
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
635 for size in self.readDict[offset]:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
636 if size in query_range:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
637 queryhash[offset].append(size)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
638 if size in target_range:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
639 targethash[offset].append(size)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
640
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
641 for offset in queryhash:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
642 matched_offset = -offset - overlap + 1
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
643 if targethash[matched_offset]:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
644 if offset >= 0:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
645 for i in queryhash[offset]:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
646 paired_sequences.append("+%s" % RNAtranslate (self.sequence[offset:offset+i]) )
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
647 for i in targethash[matched_offset]:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
648 paired_sequences.append( "-%s" % RNAtranslate (antipara (self.sequence[-matched_offset-i+1:-matched_offset+1]) ) )
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
649 if offset < 0:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
650 for i in queryhash[offset]:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
651 paired_sequences.append("-%s" % RNAtranslate (antipara (self.sequence[-offset-i+1:-offset+1]) ) )
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
652 for i in targethash[matched_offset]:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
653 paired_sequences.append("+%s" % RNAtranslate (self.sequence[matched_offset:matched_offset+i] ) )
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
654 return paired_sequences
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
655
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
656 def newpairable_bowtie (self, overlap, minquery, maxquery, mintarget, maxtarget):
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
657 ''' revision of pairable on 3-12-2012, with focus on the offset shift problem (bowtie is 1-based cooordinates whereas python strings are 0-based coordinates'''
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
658 queryhash = defaultdict(list)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
659 targethash = defaultdict(list)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
660 query_range = range (int(minquery), int(maxquery)+1)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
661 target_range = range (int(mintarget), int(maxtarget)+1)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
662 bowtie_output = []
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
663
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
664 for offset in self.readDict: # selection of data
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
665 for size in self.readDict[offset]:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
666 if size in query_range:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
667 queryhash[offset].append(size)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
668 if size in target_range:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
669 targethash[offset].append(size)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
670 counter = 0
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
671 for offset in queryhash:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
672 matched_offset = -offset - overlap + 1
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
673 if targethash[matched_offset]:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
674 if offset >= 0:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
675 for i in queryhash[offset]:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
676 counter += 1
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
677 bowtie_output.append("%s\t%s\t%s\t%s\t%s" % (counter, "+", self.gene, offset-1, self.sequence[offset-1:offset-1+i]) ) # attention a la base 1-0 de l'offset
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
678 if offset < 0:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
679 for i in queryhash[offset]:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
680 counter += 1
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
681 bowtie_output.append("%s\t%s\t%s\t%s\t%s" % (counter, "-", self.gene, -offset-i, self.sequence[-offset-i:-offset])) # attention a la base 1-0 de l'offset
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
682 return bowtie_output
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
683
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
684
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
685 def __main__(bowtie_index_path, bowtie_output_path):
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
686 sequenceDic = get_fasta (bowtie_index_path)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
687 objDic = {}
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
688 F = open (bowtie_output_path, "r") # F is the bowtie output taken as input
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
689 for line in F:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
690 fields = line.split()
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
691 polarity = fields[1]
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
692 gene = fields[2]
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
693 offset = int(fields[3])
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
694 size = len (fields[4])
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
695 try:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
696 objDic[gene].addread (polarity, offset, size)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
697 except KeyError:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
698 objDic[gene] = SmRNAwindow(gene, sequenceDic[gene])
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
699 objDic[gene].addread (polarity, offset, size)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
700 F.close()
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
701 for gene in objDic:
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
702 print gene, objDic[gene].pairer(19,19,23,19,23)
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
703
234b83159ea8 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff changeset
704 if __name__ == "__main__" : __main__(sys.argv[1], sys.argv[2])