Mercurial > repos > artbio > small_read_size_histograms
annotate smRtools.py @ 0:234b83159ea8 draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
author | artbio |
---|---|
date | Tue, 11 Jul 2017 11:44:36 -0400 |
parents | |
children |
rev | line source |
---|---|
0
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
1 #!/usr/bin/python |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
2 # version 1 7-5-2012 unification of the SmRNAwindow class |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
3 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
4 import sys, subprocess |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
5 from collections import defaultdict |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
6 from numpy import mean, median, std |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
7 ##Disable scipy import temporarily, as no working scipy on toolshed. |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
8 ##from scipy import stats |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
9 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
10 def get_fasta (index="/home/galaxy/galaxy-dist/bowtie/5.37_Dmel/5.37_Dmel"): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
11 '''This function will return a dictionary containing fasta identifiers as keys and the |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
12 sequence as values. Index must be the path to a fasta file.''' |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
13 p = subprocess.Popen(args=["bowtie-inspect","-a", "0", index], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) # bowtie-inspect outputs sequences on single lines |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
14 outputlines = p.stdout.readlines() |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
15 p.wait() |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
16 item_dic = {} |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
17 for line in outputlines: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
18 if (line[0] == ">"): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
19 try: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
20 item_dic[current_item] = "".join(stringlist) # to dump the sequence of the previous item - try because of the keyerror of the first item |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
21 except: pass |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
22 current_item = line[1:].rstrip().split()[0] #take the first word before space because bowtie splits headers ! |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
23 item_dic[current_item] = "" |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
24 stringlist=[] |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
25 else: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
26 stringlist.append(line.rstrip() ) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
27 item_dic[current_item] = "".join(stringlist) # for the last item |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
28 return item_dic |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
29 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
30 def get_fasta_headers (index): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
31 p = subprocess.Popen(args=["bowtie-inspect","-n", index], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) # bowtie-inspect outputs sequences on single lines |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
32 outputlines = p.stdout.readlines() |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
33 p.wait() |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
34 item_dic = {} |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
35 for line in outputlines: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
36 header = line.rstrip().split()[0] #take the first word before space because bowtie splits headers ! |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
37 item_dic[header] = 1 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
38 return item_dic |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
39 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
40 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
41 def get_file_sample (file, numberoflines): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
42 '''import random to use this function''' |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
43 F=open(file) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
44 fullfile = F.read().splitlines() |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
45 F.close() |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
46 if len(fullfile) < numberoflines: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
47 return "sample size exceeds file size" |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
48 return random.sample(fullfile, numberoflines) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
49 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
50 def get_fasta_from_history (file): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
51 F = open (file, "r") |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
52 item_dic = {} |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
53 for line in F: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
54 if (line[0] == ">"): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
55 try: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
56 item_dic[current_item] = "".join(stringlist) # to dump the sequence of the previous item - try because of the keyerror of the first item |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
57 except: pass |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
58 current_item = line[1:-1].split()[0] #take the first word before space because bowtie splits headers ! |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
59 item_dic[current_item] = "" |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
60 stringlist=[] |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
61 else: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
62 stringlist.append(line[:-1]) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
63 item_dic[current_item] = "".join(stringlist) # for the last item |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
64 return item_dic |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
65 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
66 def antipara (sequence): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
67 antidict = {"A":"T", "T":"A", "G":"C", "C":"G", "N":"N"} |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
68 revseq = sequence[::-1] |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
69 return "".join([antidict[i] for i in revseq]) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
70 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
71 def RNAtranslate (sequence): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
72 return "".join([i if i in "AGCN" else "U" for i in sequence]) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
73 def DNAtranslate (sequence): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
74 return "".join([i if i in "AGCN" else "T" for i in sequence]) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
75 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
76 def RNAfold (sequence_list): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
77 thestring= "\n".join(sequence_list) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
78 p = subprocess.Popen(args=["RNAfold","--noPS"], stdin= subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
79 output=p.communicate(thestring)[0] |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
80 p.wait() |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
81 output=output.split("\n") |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
82 if not output[-1]: output = output[:-1] # nasty patch to remove last empty line |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
83 buffer=[] |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
84 for line in output: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
85 if line[0] in ["N","A","T","U","G","C"]: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
86 buffer.append(DNAtranslate(line)) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
87 if line[0] in ["(",".",")"]: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
88 fields=line.split("(") |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
89 energy= fields[-1] |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
90 energy = energy[:-1] # remove the ) parenthesis |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
91 energy=float(energy) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
92 buffer.append(str(energy)) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
93 return dict(zip(buffer[::2], buffer[1::2])) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
94 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
95 def extractsubinstance (start, end, instance): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
96 ''' Testing whether this can be an function external to the class to save memory''' |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
97 subinstance = SmRNAwindow (instance.gene, instance.sequence[start-1:end], start) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
98 subinstance.gene = "%s %s %s" % (subinstance.gene, subinstance.windowoffset, subinstance.windowoffset + subinstance.size - 1) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
99 upcoordinate = [i for i in range(start,end+1) if instance.readDict.has_key(i) ] |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
100 downcoordinate = [-i for i in range(start,end+1) if instance.readDict.has_key(-i) ] |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
101 for i in upcoordinate: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
102 subinstance.readDict[i]=instance.readDict[i] |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
103 for i in downcoordinate: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
104 subinstance.readDict[i]=instance.readDict[i] |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
105 return subinstance |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
106 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
107 class HandleSmRNAwindows: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
108 def __init__(self, alignmentFile="~", alignmentFileFormat="tabular", genomeRefFile="~", genomeRefFormat="bowtieIndex", biosample="undetermined", size_inf=None, size_sup=1000, norm=1.0): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
109 self.biosample = biosample |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
110 self.alignmentFile = alignmentFile |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
111 self.alignmentFileFormat = alignmentFileFormat # can be "tabular" or "sam" |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
112 self.genomeRefFile = genomeRefFile |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
113 self.genomeRefFormat = genomeRefFormat # can be "bowtieIndex" or "fastaSource" |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
114 self.alignedReads = 0 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
115 self.instanceDict = {} |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
116 self.size_inf=size_inf |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
117 self.size_sup=size_sup |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
118 self.norm=norm |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
119 if genomeRefFormat == "bowtieIndex": |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
120 self.itemDict = get_fasta (genomeRefFile) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
121 elif genomeRefFormat == "fastaSource": |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
122 self.itemDict = get_fasta_from_history (genomeRefFile) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
123 for item in self.itemDict: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
124 self.instanceDict[item] = SmRNAwindow(item, sequence=self.itemDict[item], windowoffset=1, biosample=self.biosample, norm=self.norm) # create as many instances as there is items |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
125 self.readfile() |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
126 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
127 def readfile (self) : |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
128 if self.alignmentFileFormat == "tabular": |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
129 F = open (self.alignmentFile, "r") |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
130 for line in F: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
131 fields = line.split() |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
132 polarity = fields[1] |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
133 gene = fields[2] |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
134 offset = int(fields[3]) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
135 size = len (fields[4]) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
136 if self.size_inf: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
137 if (size>=self.size_inf and size<= self.size_sup): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
138 self.instanceDict[gene].addread (polarity, offset+1, size) # to correct to 1-based coordinates of SmRNAwindow |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
139 self.alignedReads += 1 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
140 else: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
141 self.instanceDict[gene].addread (polarity, offset+1, size) # to correct to 1-based coordinates of SmRNAwindow |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
142 self.alignedReads += 1 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
143 F.close() |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
144 return self.instanceDict |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
145 elif self.alignmentFileFormat == "bam" or self.alignmentFileFormat == "sam": |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
146 import pysam |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
147 samfile = pysam.Samfile(self.alignmentFile) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
148 for read in samfile: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
149 if read.tid == -1: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
150 continue # filter out unaligned reads |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
151 if read.is_reverse: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
152 polarity="-" |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
153 else: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
154 polarity="+" |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
155 gene = samfile.getrname(read.tid) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
156 offset = read.pos |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
157 size = read.qlen |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
158 if self.size_inf: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
159 if (size>=self.size_inf and size<= self.size_sup): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
160 self.instanceDict[gene].addread (polarity, offset+1, size) # to correct to 1-based coordinates of SmRNAwindow |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
161 self.alignedReads += 1 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
162 else: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
163 self.instanceDict[gene].addread (polarity, offset+1, size) # to correct to 1-based coordinates of SmRNAwindow |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
164 self.alignedReads += 1 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
165 return self.instanceDict |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
166 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
167 def size_histogram (self): # in HandleSmRNAwindows |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
168 '''refactored on 7-9-2014 to debug size_histogram tool''' |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
169 size_dict={} |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
170 size_dict['F']= defaultdict (float) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
171 size_dict['R']= defaultdict (float) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
172 size_dict['both'] = defaultdict (float) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
173 for item in self.instanceDict: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
174 buffer_dict = self.instanceDict[item].size_histogram() |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
175 for polarity in ["F", "R"]: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
176 for size in buffer_dict[polarity]: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
177 size_dict[polarity][size] += buffer_dict[polarity][size] |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
178 for size in buffer_dict["both"]: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
179 size_dict["both"][size] += buffer_dict["F"][size] - buffer_dict["R"][size] |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
180 return size_dict |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
181 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
182 def CountFeatures (self, GFF3="path/to/file"): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
183 featureDict = defaultdict(int) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
184 F = open (GFF3, "r") |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
185 for line in F: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
186 if line[0] == "#": continue |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
187 fields = line[:-1].split() |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
188 chrom, feature, leftcoord, rightcoord, polarity = fields[0], fields[2], fields[3], fields[4], fields[6] |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
189 featureDict[feature] += self.instanceDict[chrom].readcount(upstream_coord=int(leftcoord), downstream_coord=int(rightcoord), polarity="both", method="destructive") |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
190 F.close() |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
191 return featureDict |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
192 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
193 class SmRNAwindow: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
194 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
195 def __init__(self, gene, sequence="ATGC", windowoffset=1, biosample="Undetermined", norm=1.0): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
196 self.biosample = biosample |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
197 self.sequence = sequence |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
198 self.gene = gene |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
199 self.windowoffset = windowoffset |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
200 self.size = len(sequence) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
201 self.readDict = defaultdict(list) # with a {+/-offset:[size1, size2, ...], ...} |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
202 self.matchedreadsUp = 0 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
203 self.matchedreadsDown = 0 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
204 self.norm=norm |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
205 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
206 def addread (self, polarity, offset, size): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
207 '''ATTENTION ATTENTION ATTENTION''' |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
208 ''' We removed the conversion from 0 to 1 based offset, as we do this now during readparsing.''' |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
209 if polarity == "+": |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
210 self.readDict[offset].append(size) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
211 self.matchedreadsUp += 1 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
212 else: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
213 self.readDict[-(offset + size -1)].append(size) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
214 self.matchedreadsDown += 1 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
215 return |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
216 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
217 def barycenter (self, upstream_coord=None, downstream_coord=None): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
218 '''refactored 24-12-2013 to save memory and introduce offset filtering see readcount method for further discussion on that |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
219 In this version, attempt to replace the dictionary structure by a list of tupple to save memory too''' |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
220 upstream_coord = upstream_coord or self.windowoffset |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
221 downstream_coord = downstream_coord or self.windowoffset+self.size-1 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
222 window_size = downstream_coord - upstream_coord +1 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
223 def weigthAverage (TuppleList): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
224 weightSum = 0 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
225 PonderWeightSum = 0 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
226 for tuple in TuppleList: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
227 PonderWeightSum += tuple[0] * tuple[1] |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
228 weightSum += tuple[1] |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
229 if weightSum > 0: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
230 return PonderWeightSum / float(weightSum) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
231 else: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
232 return 0 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
233 forwardTuppleList = [(k, len(self.readDict[k])) for k in self.readDict.keys() if (k > 0 and abs(k) >= upstream_coord and abs(k) <= downstream_coord)] # both forward and in the proper offset window |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
234 reverseTuppleList = [(-k, len(self.readDict[k])) for k in self.readDict.keys() if (k < 0 and abs(k) >= upstream_coord and abs(k) <= downstream_coord)] # both reverse and in the proper offset window |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
235 Fbarycenter = (weigthAverage (forwardTuppleList) - upstream_coord) / window_size |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
236 Rbarycenter = (weigthAverage (reverseTuppleList) - upstream_coord) / window_size |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
237 return Fbarycenter, Rbarycenter |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
238 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
239 def correlation_mapper (self, reference, window_size): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
240 '''to map correlation with a sliding window 26-2-2013''' |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
241 from scipy import stats |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
242 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
243 if window_size > self.size: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
244 return [] |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
245 F=open(reference, "r") |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
246 reference_forward = [] |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
247 reference_reverse = [] |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
248 for line in F: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
249 fields=line.split() |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
250 reference_forward.append(int(float(fields[1]))) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
251 reference_reverse.append(int(float(fields[2]))) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
252 F.close() |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
253 local_object_forward=[] |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
254 local_object_reverse=[] |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
255 ## Dict to list for the local object |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
256 for i in range(1, self.size+1): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
257 local_object_forward.append(len(self.readDict[i])) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
258 local_object_reverse.append(len(self.readDict[-i])) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
259 ## start compiling results by slides |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
260 results=[] |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
261 for coordinate in range(self.size - window_size): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
262 local_forward=local_object_forward[coordinate:coordinate + window_size] |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
263 local_reverse=local_object_reverse[coordinate:coordinate + window_size] |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
264 if sum(local_forward) == 0 or sum(local_reverse) == 0: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
265 continue |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
266 try: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
267 reference_to_local_cor_forward = stats.spearmanr(local_forward, reference_forward) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
268 reference_to_local_cor_reverse = stats.spearmanr(local_reverse, reference_reverse) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
269 if (reference_to_local_cor_forward[0] > 0.2 or reference_to_local_cor_reverse[0]>0.2): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
270 results.append([coordinate+1, reference_to_local_cor_forward[0], reference_to_local_cor_reverse[0]]) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
271 except: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
272 pass |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
273 return results |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
274 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
275 def readcount (self, size_inf=0, size_sup=1000, upstream_coord=None, downstream_coord=None, polarity="both", method="conservative"): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
276 '''refactored 24-12-2013 to save memory and introduce offset filtering |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
277 take a look at the defaut parameters that cannot be defined relatively to the instance are they are defined before instanciation |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
278 the trick is to pass None and then test |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
279 polarity parameter can take "both", "forward" or "reverse" as value''' |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
280 upstream_coord = upstream_coord or self.windowoffset |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
281 downstream_coord = downstream_coord or self.windowoffset+self.size-1 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
282 if upstream_coord == 1 and downstream_coord == self.windowoffset+self.size-1 and polarity == "both": |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
283 return self.matchedreadsUp + self.matchedreadsDown |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
284 if upstream_coord == 1 and downstream_coord == self.windowoffset+self.size-1 and polarity == "forward": |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
285 return self.matchedreadsUp |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
286 if upstream_coord == 1 and downstream_coord == self.windowoffset+self.size-1 and polarity == "reverse": |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
287 return self.matchedreadsDown |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
288 n=0 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
289 if polarity == "both": |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
290 for offset in xrange(upstream_coord, downstream_coord+1): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
291 if self.readDict.has_key(offset): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
292 for read in self.readDict[offset]: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
293 if (read>=size_inf and read<= size_sup): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
294 n += 1 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
295 if method != "conservative": |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
296 del self.readDict[offset] ## Carefull ! precludes re-use on the self.readDict dictionary !!!!!! TEST |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
297 if self.readDict.has_key(-offset): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
298 for read in self.readDict[-offset]: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
299 if (read>=size_inf and read<= size_sup): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
300 n += 1 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
301 if method != "conservative": |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
302 del self.readDict[-offset] |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
303 return n |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
304 elif polarity == "forward": |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
305 for offset in xrange(upstream_coord, downstream_coord+1): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
306 if self.readDict.has_key(offset): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
307 for read in self.readDict[offset]: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
308 if (read>=size_inf and read<= size_sup): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
309 n += 1 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
310 return n |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
311 elif polarity == "reverse": |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
312 for offset in xrange(upstream_coord, downstream_coord+1): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
313 if self.readDict.has_key(-offset): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
314 for read in self.readDict[-offset]: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
315 if (read>=size_inf and read<= size_sup): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
316 n += 1 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
317 return n |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
318 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
319 def readsizes (self): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
320 '''return a dictionary of number of reads by size (the keys)''' |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
321 dicsize = {} |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
322 for offset in self.readDict: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
323 for size in self.readDict[offset]: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
324 dicsize[size] = dicsize.get(size, 0) + 1 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
325 for offset in range (min(dicsize.keys()), max(dicsize.keys())+1): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
326 dicsize[size] = dicsize.get(size, 0) # to fill offsets with null values |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
327 return dicsize |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
328 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
329 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
330 def size_histogram(self, minquery=None, maxquery=None): # in SmRNAwindow |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
331 '''refactored on 7-9-2014 to debug size_histogram tool''' |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
332 norm=self.norm |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
333 size_dict={} |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
334 size_dict['F']= defaultdict (float) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
335 size_dict['R']= defaultdict (float) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
336 size_dict['both']= defaultdict (float) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
337 for offset in self.readDict: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
338 for size in self.readDict[offset]: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
339 if offset < 0: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
340 size_dict['R'][size] = size_dict['R'][size] - 1*norm |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
341 else: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
342 size_dict['F'][size] = size_dict['F'][size] + 1*norm |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
343 ## patch to avoid missing graphs when parsed by R-lattice. 27-08-2014. Test and validate ! |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
344 if not (size_dict['F']) and (not size_dict['R']): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
345 size_dict['F'][21] = 0 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
346 size_dict['R'][21] = 0 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
347 ## |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
348 allSizeKeys = list (set (size_dict['F'].keys() + size_dict['R'].keys() ) ) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
349 for size in allSizeKeys: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
350 size_dict['both'][size] = size_dict['F'][size] - size_dict['R'][size] |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
351 if minquery: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
352 for polarity in size_dict.keys(): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
353 for size in xrange(minquery, maxquery+1): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
354 if not size in size_dict[polarity].keys(): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
355 size_dict[polarity][size]=0 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
356 return size_dict |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
357 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
358 def statsizes (self, upstream_coord=None, downstream_coord=None): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
359 ''' migration to memory saving by specifying possible subcoordinates |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
360 see the readcount method for further discussion''' |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
361 upstream_coord = upstream_coord or self.windowoffset |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
362 downstream_coord = downstream_coord or self.windowoffset+self.size-1 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
363 L = [] |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
364 for offset in self.readDict: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
365 if (abs(offset) < upstream_coord or abs(offset) > downstream_coord): continue |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
366 for size in self.readDict[offset]: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
367 L.append(size) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
368 meansize = mean(L) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
369 stdv = std(L) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
370 mediansize = median(L) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
371 return meansize, mediansize, stdv |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
372 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
373 def foldEnergy (self, upstream_coord=None, downstream_coord=None): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
374 ''' migration to memory saving by specifying possible subcoordinates |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
375 see the readcount method for further discussion''' |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
376 upstream_coord = upstream_coord or self.windowoffset |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
377 downstream_coord = downstream_coord or self.windowoffset+self.size-1 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
378 Energy = RNAfold ([self.sequence[upstream_coord-1:downstream_coord] ]) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
379 return float(Energy[self.sequence[upstream_coord-1:downstream_coord]]) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
380 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
381 def Ufreq (self, size_scope, upstream_coord=None, downstream_coord=None): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
382 ''' migration to memory saving by specifying possible subcoordinates |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
383 see the readcount method for further discussion. size_scope must be an interable''' |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
384 upstream_coord = upstream_coord or self.windowoffset |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
385 downstream_coord = downstream_coord or self.windowoffset+self.size-1 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
386 freqDic = {"A":0,"T":0,"G":0,"C":0, "N":0} |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
387 convertDic = {"A":"T","T":"A","G":"C","C":"G","N":"N"} |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
388 for offset in self.readDict: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
389 if (abs(offset) < upstream_coord or abs(offset) > downstream_coord): continue |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
390 for size in self.readDict[offset]: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
391 if size in size_scope: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
392 startbase = self.sequence[abs(offset)-self.windowoffset] |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
393 if offset < 0: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
394 startbase = convertDic[startbase] |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
395 freqDic[startbase] += 1 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
396 base_sum = float ( sum( freqDic.values()) ) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
397 if base_sum == 0: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
398 return "." |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
399 else: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
400 return freqDic["T"] / base_sum * 100 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
401 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
402 def Ufreq_stranded (self, size_scope, upstream_coord=None, downstream_coord=None): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
403 ''' migration to memory saving by specifying possible subcoordinates |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
404 see the readcount method for further discussion. size_scope must be an interable |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
405 This method is similar to the Ufreq method but take strandness into account''' |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
406 upstream_coord = upstream_coord or self.windowoffset |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
407 downstream_coord = downstream_coord or self.windowoffset+self.size-1 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
408 freqDic = {"Afor":0,"Tfor":0,"Gfor":0,"Cfor":0, "Nfor":0,"Arev":0,"Trev":0,"Grev":0,"Crev":0, "Nrev":0} |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
409 convertDic = {"A":"T","T":"A","G":"C","C":"G","N":"N"} |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
410 for offset in self.readDict: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
411 if (abs(offset) < upstream_coord or abs(offset) > downstream_coord): continue |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
412 for size in self.readDict[offset]: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
413 if size in size_scope: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
414 startbase = self.sequence[abs(offset)-self.windowoffset] |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
415 if offset < 0: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
416 startbase = convertDic[startbase] |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
417 freqDic[startbase+"rev"] += 1 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
418 else: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
419 freqDic[startbase+"for"] += 1 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
420 forward_sum = float ( freqDic["Afor"]+freqDic["Tfor"]+freqDic["Gfor"]+freqDic["Cfor"]+freqDic["Nfor"]) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
421 reverse_sum = float ( freqDic["Arev"]+freqDic["Trev"]+freqDic["Grev"]+freqDic["Crev"]+freqDic["Nrev"]) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
422 if forward_sum == 0 and reverse_sum == 0: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
423 return ". | ." |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
424 elif reverse_sum == 0: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
425 return "%s | ." % (freqDic["Tfor"] / forward_sum * 100) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
426 elif forward_sum == 0: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
427 return ". | %s" % (freqDic["Trev"] / reverse_sum * 100) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
428 else: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
429 return "%s | %s" % (freqDic["Tfor"] / forward_sum * 100, freqDic["Trev"] / reverse_sum * 100) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
430 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
431 def readplot (self): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
432 norm=self.norm |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
433 readmap = {} |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
434 for offset in self.readDict.keys(): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
435 readmap[abs(offset)] = ( len(self.readDict.get(-abs(offset),[]))*norm , len(self.readDict.get(abs(offset),[]))*norm ) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
436 mylist = [] |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
437 for offset in sorted(readmap): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
438 if readmap[offset][1] != 0: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
439 mylist.append("%s\t%s\t%s\t%s" % (self.gene, offset, readmap[offset][1], "F") ) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
440 if readmap[offset][0] != 0: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
441 mylist.append("%s\t%s\t%s\t%s" % (self.gene, offset, -readmap[offset][0], "R") ) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
442 ## patch to avoid missing graphs when parsed by R-lattice. 27-08-2014. Test and validate ! |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
443 if not mylist: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
444 mylist.append("%s\t%s\t%s\t%s" % (self.gene, 1, 0, "F") ) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
445 ### |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
446 return mylist |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
447 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
448 def readcoverage (self, upstream_coord=None, downstream_coord=None, windowName=None): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
449 '''Use by MirParser tool''' |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
450 upstream_coord = upstream_coord or 1 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
451 downstream_coord = downstream_coord or self.size |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
452 windowName = windowName or "%s_%s_%s" % (self.gene, upstream_coord, downstream_coord) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
453 forORrev_coverage = dict ([(i,0) for i in xrange(1, downstream_coord-upstream_coord+1)]) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
454 totalforward = self.readcount(upstream_coord=upstream_coord, downstream_coord=downstream_coord, polarity="forward") |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
455 totalreverse = self.readcount(upstream_coord=upstream_coord, downstream_coord=downstream_coord, polarity="reverse") |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
456 if totalforward > totalreverse: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
457 majorcoverage = "forward" |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
458 for offset in self.readDict.keys(): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
459 if (offset > 0) and ((offset-upstream_coord+1) in forORrev_coverage.keys() ): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
460 for read in self.readDict[offset]: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
461 for i in xrange(read): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
462 try: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
463 forORrev_coverage[offset-upstream_coord+1+i] += 1 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
464 except KeyError: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
465 continue # a sense read may span over the downstream limit |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
466 else: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
467 majorcoverage = "reverse" |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
468 for offset in self.readDict.keys(): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
469 if (offset < 0) and (-offset-upstream_coord+1 in forORrev_coverage.keys() ): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
470 for read in self.readDict[offset]: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
471 for i in xrange(read): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
472 try: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
473 forORrev_coverage[-offset-upstream_coord-i] += 1 ## positive coordinates in the instance, with + for forward coverage and - for reverse coverage |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
474 except KeyError: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
475 continue # an antisense read may span over the upstream limit |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
476 output_list = [] |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
477 maximum = max (forORrev_coverage.values()) or 1 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
478 for n in sorted (forORrev_coverage): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
479 output_list.append("%s\t%s\t%s\t%s\t%s\t%s\t%s" % (self.biosample, windowName, n, float(n)/(downstream_coord-upstream_coord+1), forORrev_coverage[n], float(forORrev_coverage[n])/maximum, majorcoverage)) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
480 return "\n".join(output_list) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
481 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
482 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
483 def signature (self, minquery, maxquery, mintarget, maxtarget, scope, zscore="no", upstream_coord=None, downstream_coord=None): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
484 ''' migration to memory saving by specifying possible subcoordinates |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
485 see the readcount method for further discussion |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
486 scope must be a python iterable; scope define the *relative* offset range to be computed''' |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
487 upstream_coord = upstream_coord or self.windowoffset |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
488 downstream_coord = downstream_coord or self.windowoffset+self.size-1 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
489 query_range = range (minquery, maxquery+1) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
490 target_range = range (mintarget, maxtarget+1) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
491 Query_table = {} |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
492 Target_table = {} |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
493 frequency_table = dict ([(i, 0) for i in scope]) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
494 for offset in self.readDict: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
495 if (abs(offset) < upstream_coord or abs(offset) > downstream_coord): continue |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
496 for size in self.readDict[offset]: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
497 if size in query_range: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
498 Query_table[offset] = Query_table.get(offset, 0) + 1 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
499 if size in target_range: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
500 Target_table[offset] = Target_table.get(offset, 0) + 1 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
501 for offset in Query_table: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
502 for i in scope: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
503 frequency_table[i] += min(Query_table[offset], Target_table.get(-offset -i +1, 0)) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
504 if minquery==mintarget and maxquery==maxtarget: ## added to incorporate the division by 2 in the method (26/11/2013), see signature_options.py and lattice_signature.py |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
505 frequency_table = dict([(i,frequency_table[i]/2) for i in frequency_table]) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
506 if zscore == "yes": |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
507 z_mean = mean(frequency_table.values() ) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
508 z_std = std(frequency_table.values() ) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
509 if z_std == 0: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
510 frequency_table = dict([(i,0) for i in frequency_table] ) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
511 else: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
512 frequency_table = dict([(i, (frequency_table[i]- z_mean)/z_std) for i in frequency_table] ) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
513 return frequency_table |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
514 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
515 def hannon_signature (self, minquery, maxquery, mintarget, maxtarget, scope, upstream_coord=None, downstream_coord=None): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
516 ''' migration to memory saving by specifying possible subcoordinates see the readcount method for further discussion |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
517 note that scope must be an iterable (a list or a tuple), which specifies the relative offsets that will be computed''' |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
518 upstream_coord = upstream_coord or self.windowoffset |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
519 downstream_coord = downstream_coord or self.windowoffset+self.size-1 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
520 query_range = range (minquery, maxquery+1) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
521 target_range = range (mintarget, maxtarget+1) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
522 Query_table = {} |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
523 Target_table = {} |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
524 Total_Query_Numb = 0 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
525 general_frequency_table = dict ([(i,0) for i in scope]) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
526 ## filtering the appropriate reads for the study |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
527 for offset in self.readDict: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
528 if (abs(offset) < upstream_coord or abs(offset) > downstream_coord): continue |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
529 for size in self.readDict[offset]: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
530 if size in query_range: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
531 Query_table[offset] = Query_table.get(offset, 0) + 1 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
532 Total_Query_Numb += 1 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
533 if size in target_range: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
534 Target_table[offset] = Target_table.get(offset, 0) + 1 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
535 for offset in Query_table: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
536 frequency_table = dict ([(i,0) for i in scope]) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
537 number_of_targets = 0 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
538 for i in scope: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
539 frequency_table[i] += Query_table[offset] * Target_table.get(-offset -i +1, 0) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
540 number_of_targets += Target_table.get(-offset -i +1, 0) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
541 for i in scope: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
542 try: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
543 general_frequency_table[i] += (1. / number_of_targets / Total_Query_Numb) * frequency_table[i] |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
544 except ZeroDivisionError : |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
545 continue |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
546 return general_frequency_table |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
547 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
548 def phasing (self, size_range, scope): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
549 ''' to calculate autocorelation like signal - scope must be an python iterable''' |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
550 read_table = {} |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
551 total_read_number = 0 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
552 general_frequency_table = dict ([(i, 0) for i in scope]) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
553 ## read input filtering |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
554 for offset in self.readDict: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
555 for size in self.readDict[offset]: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
556 if size in size_range: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
557 read_table[offset] = read_table.get(offset, 0) + 1 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
558 total_read_number += 1 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
559 ## per offset read phasing computing |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
560 for offset in read_table: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
561 frequency_table = dict ([(i, 0) for i in scope]) # local frequency table |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
562 number_of_targets = 0 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
563 for i in scope: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
564 if offset > 0: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
565 frequency_table[i] += read_table[offset] * read_table.get(offset + i, 0) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
566 number_of_targets += read_table.get(offset + i, 0) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
567 else: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
568 frequency_table[i] += read_table[offset] * read_table.get(offset - i, 0) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
569 number_of_targets += read_table.get(offset - i, 0) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
570 ## inclusion of local frequency table in the general frequency table (all offsets average) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
571 for i in scope: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
572 try: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
573 general_frequency_table[i] += (1. / number_of_targets / total_read_number) * frequency_table[i] |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
574 except ZeroDivisionError : |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
575 continue |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
576 return general_frequency_table |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
577 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
578 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
579 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
580 def z_signature (self, minquery, maxquery, mintarget, maxtarget, scope): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
581 '''Must do: from numpy import mean, std, to use this method; scope must be a python iterable and defines the relative offsets to compute''' |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
582 frequency_table = self.signature (minquery, maxquery, mintarget, maxtarget, scope) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
583 z_table = {} |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
584 frequency_list = [frequency_table[i] for i in sorted (frequency_table)] |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
585 if std(frequency_list): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
586 meanlist = mean(frequency_list) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
587 stdlist = std(frequency_list) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
588 z_list = [(i-meanlist)/stdlist for i in frequency_list] |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
589 return dict (zip (sorted(frequency_table), z_list) ) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
590 else: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
591 return dict (zip (sorted(frequency_table), [0 for i in frequency_table]) ) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
592 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
593 def percent_signature (self, minquery, maxquery, mintarget, maxtarget, scope): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
594 frequency_table = self.signature (minquery, maxquery, mintarget, maxtarget, scope) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
595 total = float(sum ([self.readsizes().get(i,0) for i in set(range(minquery,maxquery)+range(mintarget,maxtarget))]) ) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
596 if total == 0: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
597 return dict( [(i,0) for i in scope]) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
598 return dict( [(i, frequency_table[i]/total*100) for i in scope]) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
599 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
600 def pairer (self, overlap, minquery, maxquery, mintarget, maxtarget): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
601 queryhash = defaultdict(list) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
602 targethash = defaultdict(list) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
603 query_range = range (int(minquery), int(maxquery)+1) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
604 target_range = range (int(mintarget), int(maxtarget)+1) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
605 paired_sequences = [] |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
606 for offset in self.readDict: # selection of data |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
607 for size in self.readDict[offset]: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
608 if size in query_range: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
609 queryhash[offset].append(size) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
610 if size in target_range: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
611 targethash[offset].append(size) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
612 for offset in queryhash: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
613 if offset >= 0: matched_offset = -offset - overlap + 1 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
614 else: matched_offset = -offset - overlap + 1 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
615 if targethash[matched_offset]: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
616 paired = min ( len(queryhash[offset]), len(targethash[matched_offset]) ) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
617 if offset >= 0: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
618 for i in range (paired): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
619 paired_sequences.append("+%s" % RNAtranslate ( self.sequence[offset:offset+queryhash[offset][i]]) ) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
620 paired_sequences.append("-%s" % RNAtranslate (antipara (self.sequence[-matched_offset-targethash[matched_offset][i]+1:-matched_offset+1]) ) ) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
621 if offset < 0: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
622 for i in range (paired): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
623 paired_sequences.append("-%s" % RNAtranslate (antipara (self.sequence[-offset-queryhash[offset][i]+1:-offset+1]) ) ) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
624 paired_sequences.append("+%s" % RNAtranslate (self.sequence[matched_offset:matched_offset+targethash[matched_offset][i]] ) ) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
625 return paired_sequences |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
626 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
627 def pairable (self, overlap, minquery, maxquery, mintarget, maxtarget): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
628 queryhash = defaultdict(list) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
629 targethash = defaultdict(list) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
630 query_range = range (int(minquery), int(maxquery)+1) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
631 target_range = range (int(mintarget), int(maxtarget)+1) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
632 paired_sequences = [] |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
633 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
634 for offset in self.readDict: # selection of data |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
635 for size in self.readDict[offset]: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
636 if size in query_range: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
637 queryhash[offset].append(size) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
638 if size in target_range: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
639 targethash[offset].append(size) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
640 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
641 for offset in queryhash: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
642 matched_offset = -offset - overlap + 1 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
643 if targethash[matched_offset]: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
644 if offset >= 0: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
645 for i in queryhash[offset]: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
646 paired_sequences.append("+%s" % RNAtranslate (self.sequence[offset:offset+i]) ) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
647 for i in targethash[matched_offset]: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
648 paired_sequences.append( "-%s" % RNAtranslate (antipara (self.sequence[-matched_offset-i+1:-matched_offset+1]) ) ) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
649 if offset < 0: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
650 for i in queryhash[offset]: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
651 paired_sequences.append("-%s" % RNAtranslate (antipara (self.sequence[-offset-i+1:-offset+1]) ) ) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
652 for i in targethash[matched_offset]: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
653 paired_sequences.append("+%s" % RNAtranslate (self.sequence[matched_offset:matched_offset+i] ) ) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
654 return paired_sequences |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
655 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
656 def newpairable_bowtie (self, overlap, minquery, maxquery, mintarget, maxtarget): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
657 ''' revision of pairable on 3-12-2012, with focus on the offset shift problem (bowtie is 1-based cooordinates whereas python strings are 0-based coordinates''' |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
658 queryhash = defaultdict(list) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
659 targethash = defaultdict(list) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
660 query_range = range (int(minquery), int(maxquery)+1) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
661 target_range = range (int(mintarget), int(maxtarget)+1) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
662 bowtie_output = [] |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
663 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
664 for offset in self.readDict: # selection of data |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
665 for size in self.readDict[offset]: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
666 if size in query_range: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
667 queryhash[offset].append(size) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
668 if size in target_range: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
669 targethash[offset].append(size) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
670 counter = 0 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
671 for offset in queryhash: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
672 matched_offset = -offset - overlap + 1 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
673 if targethash[matched_offset]: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
674 if offset >= 0: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
675 for i in queryhash[offset]: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
676 counter += 1 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
677 bowtie_output.append("%s\t%s\t%s\t%s\t%s" % (counter, "+", self.gene, offset-1, self.sequence[offset-1:offset-1+i]) ) # attention a la base 1-0 de l'offset |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
678 if offset < 0: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
679 for i in queryhash[offset]: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
680 counter += 1 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
681 bowtie_output.append("%s\t%s\t%s\t%s\t%s" % (counter, "-", self.gene, -offset-i, self.sequence[-offset-i:-offset])) # attention a la base 1-0 de l'offset |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
682 return bowtie_output |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
683 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
684 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
685 def __main__(bowtie_index_path, bowtie_output_path): |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
686 sequenceDic = get_fasta (bowtie_index_path) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
687 objDic = {} |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
688 F = open (bowtie_output_path, "r") # F is the bowtie output taken as input |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
689 for line in F: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
690 fields = line.split() |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
691 polarity = fields[1] |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
692 gene = fields[2] |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
693 offset = int(fields[3]) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
694 size = len (fields[4]) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
695 try: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
696 objDic[gene].addread (polarity, offset, size) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
697 except KeyError: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
698 objDic[gene] = SmRNAwindow(gene, sequenceDic[gene]) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
699 objDic[gene].addread (polarity, offset, size) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
700 F.close() |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
701 for gene in objDic: |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
702 print gene, objDic[gene].pairer(19,19,23,19,23) |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
703 |
234b83159ea8
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_read_size_histograms commit ab983b2e57321e8913bd4d5f8fc89c3223c69869
artbio
parents:
diff
changeset
|
704 if __name__ == "__main__" : __main__(sys.argv[1], sys.argv[2]) |