annotate mirgene_functions.py @ 34:66e4d57f11c9 draft

Uploaded
author glogobyte
date Thu, 02 Dec 2021 14:12:59 +0000
parents 810e789ffeab
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
5
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
1 import itertools
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
2 import urllib.request
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
3 from collections import OrderedDict
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
4 import copy
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
5
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
6 ########################################################################################################################################################
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
7
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
8 """ Read a file and return it as a list """
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
9
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
10 def read(path, flag):
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
11 if flag == 0:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
12 with open(path) as fp:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
13 file=fp.readlines()
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
14 fp.close()
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
15 return file
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
16
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
17 if flag == 1:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
18 with open(path) as fp:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
19 file = fp.read().splitlines()
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
20 fp.close()
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
21 return file
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
22
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
23 # Write a list to a txt file
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
24 def write(path, list):
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
25 with open(path,'w') as fp:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
26 for x in list:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
27 fp.write(str("\t".join(x[1:-1])))
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
28 fp.close()
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
29
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
30 ########################################################################################################################################################
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
31
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
32 """ Detect the longest common substring sequence between two mirnas """
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
33
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
34 def longestSubstring(str1, str2):
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
35
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
36 from difflib import SequenceMatcher
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
37 # initialize SequenceMatcher object with
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
38 # input string
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
39 seqMatch = SequenceMatcher(None, str1, str2)
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
40
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
41 # find match of longest sub-string
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
42 # output will be like Match(a=0, b=0, size=5)
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
43 match = seqMatch.find_longest_match(0, len(str1), 0, len(str2))
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
44
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
45 # print longest substring
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
46 if (match.size != 0):
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
47 return str1[match.a: match.a + match.size]
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
48 else:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
49 print('No longest common sub-string found')
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
50
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
51
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
52 #################################################################################################################################################################
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
53
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
54 """
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
55
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
56 Read the sam files from alignment tool and do the followings:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
57
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
58 1) Keep mapped reads
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
59 2) Keep all sequences with length between 18 and 26 nucleotides
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
60 3) Detects the ref and templated miRNAs
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
61 4) Gives names to templated miRNAs based on ref miRNAs
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
62
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
63 """
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
64
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
65 def sam_edit(mature_mirnas,path,file,case,l,samples,data,file_order,unmap_seq,names_n_seqs,deseq,mirna_names,ini_sample,unmap_counts):
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
66
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
67 # read the sam file
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
68 ini_sam=read(path,0)
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
69 main_sam = [x.rstrip("\n").split("\t") for x in ini_sam if "@" not in x.split("\t")[0]]
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
70 unique_seq = [x for x in main_sam if x[1] == '0' and len(x[9])>=18 and len(x[9])<=26]
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
71 filter_sam = [[x[0],x[1],x[2],len(x[9])] for x in main_sam]
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
72 sorted_uni_arms = []
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
73
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
74 # Detection of differences between the canonical miRNA and the detected miRNA
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
75 for i in range(len(mature_mirnas)):
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
76 tmp_count_reads = 0 # calculate the total number of reads
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
77 tmp_count_seq = 0 # calculate the total number of sequences
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
78 for j in range(len(unique_seq)):
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
79
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
80 if mature_mirnas[i] == unique_seq[j][2]:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
81
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
82 temp_mature = mature_mirnas[i+1]
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
83 off_part = longestSubstring(temp_mature, unique_seq[j][9])
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
84
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
85 mat_diff = temp_mature.split(off_part)
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
86 mat_diff = [len(mat_diff[0]), len(mat_diff[1])]
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
87
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
88 unique_diff = unique_seq[j][9].split(off_part)
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
89 unique_diff = [len(unique_diff[0]), len(unique_diff[1])]
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
90
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
91 # Handling of some special mirnas like (hsa-miR-8485)
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
92 if mat_diff[1]!=0 and unique_diff[1]!=0:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
93 unique_seq[j]=1
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
94 pre_pos = 0
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
95 post_pos = 0
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
96
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
97 elif mat_diff[0]!=0 and unique_diff[0]!=0:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
98 unique_seq[j]=1
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
99 pre_pos = 0
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
100 post_pos = 0
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
101
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
102 else:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
103 # Keep the findings
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
104 pre_pos = mat_diff[0]-unique_diff[0]
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
105 post_pos = unique_diff[1]-mat_diff[1]
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
106 tmp_count_reads = tmp_count_reads + int(unique_seq[j][0].split("-")[1])
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
107 tmp_count_seq = tmp_count_seq+1
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
108
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
109 # Store the detected miRNAs with new names according to the findings
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
110 if pre_pos != 0 or post_pos != 0:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
111 if pre_pos == 0:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
112 unique_seq[j][2] = unique_seq[j][2] + "_t_" +str(pre_pos) + "_" + '{:+d}'.format(post_pos)
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
113 elif post_pos == 0:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
114 unique_seq[j][2] = unique_seq[j][2] + "_t_" + '{:+d}'.format(pre_pos) + "_" + str(post_pos)
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
115 else:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
116 unique_seq[j][2] = unique_seq[j][2]+"_t_"+'{:+d}'.format(pre_pos)+"_"+'{:+d}'.format(post_pos)
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
117
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
118 # Remove the values "1" from the handling of special mirnas (hsa-miR-8485)
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
119 for x in range(unique_seq.count(1)):
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
120 unique_seq.remove(1)
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
121
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
122 # metrics for the production of database
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
123 if tmp_count_reads != 0 and tmp_count_seq != 0:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
124 sorted_uni_arms.append([mature_mirnas[i], tmp_count_seq, tmp_count_reads])
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
125
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
126 # Sorting of the metrics for database
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
127 sorted_uni_arms = sorted(sorted_uni_arms, key=lambda x: x[1], reverse=True)
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
128
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
129 # Correction of metrics due to the collapsing and removing of duplicates for the production of Database
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
130 for y in sorted_uni_arms:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
131 counts=0
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
132 seqs=0
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
133 for x in unique_seq:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
134 if y[0]==x[2].split("_")[0]+"_"+x[2].split("_")[1]:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
135 counts+=int(x[0].split("-")[1])
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
136 seqs+=1
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
137
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
138 y[1]=seqs
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
139 y[2]=counts
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
140
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
141 # Output variables
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
142 temp_mirna_names=[]
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
143
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
144 l.acquire()
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
145 if case == "c" or case == "t":
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
146 temp_mirna_names.extend(z[2] for z in unique_seq)
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
147 names_n_seqs.extend([[y[2],y[9]] for y in unique_seq])
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
148 deseq.append([[x[2], x[0].split('-')[1], x[9]] for x in unique_seq])
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
149 mirna_names.extend(temp_mirna_names)
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
150 unmap_seq.value += sum([1 for x in main_sam if x[1] == '4'])
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
151 unmap_counts.value += sum([int(x[0].split("-")[1]) for x in main_sam if x[1] == '4'])
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
152 file_order.append(file)
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
153 samples.append(unique_seq)
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
154 data.append([case,file,unique_seq,sorted_uni_arms])
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
155 ini_sample.append(filter_sam)
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
156 l.release()
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
157
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
158
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
159 ######################################################################################################################################
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
160 """
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
161
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
162 Read a sam file from Bowtie and do the followings:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
163
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
164 1) Keep unmapped reads
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
165 2) Keep all sequences with length between 18 and 26 nucleotides
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
166 3) Detects the non-template isomirs
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
167 4) Gives names to isomir's based on ref miRNAs
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
168
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
169 """
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
170
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
171 def non_sam_edit(mature_mirnas,path,file,case,l,data,file_order,n_deseq,names_n_seqs):
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
172
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
173 # read the sam file
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
174 ini_sam=read(path,0)
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
175 main_sam = [x.rstrip("\n").split("\t") for x in ini_sam if "@" not in x.split("\t")[0]]
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
176 unique_seq=[]
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
177 unique_seq = [x for x in main_sam if x[1] == '4' and len(x[9])>=18 and len(x[9])<=26]
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
178 uni_seq=[]
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
179
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
180 # Calculate the shifted positions for every isomir and add them to the name of it
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
181 sorted_uni_arms = []
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
182 for i in range(1,len(mature_mirnas),2):
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
183 tmp_count_reads = 0 # calculate the total number of reads
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
184 tmp_count_seq = 0 # calculate the total number of sequences
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
185
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
186 for j in range(len(unique_seq)):
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
187
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
188 temp_mature = mature_mirnas[i].strip().replace("U", "T")
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
189
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
190 # Detection of differences between the canonical miRNA and the detected non template miRNA
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
191 if temp_mature in unique_seq[j][9]:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
192
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
193 off_part = longestSubstring(temp_mature, unique_seq[j][9])
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
194
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
195 mat_diff = temp_mature.split(off_part)
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
196 mat_diff = [len(mat_diff[0]), len(mat_diff[1])]
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
197
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
198 unique_diff = unique_seq[j][9].split(off_part)
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
199 if len(unique_diff)<=2:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
200 unique_diff = [len(unique_diff[0]), len(unique_diff[1])]
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
201
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
202 pre_pos = mat_diff[0]-unique_diff[0]
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
203 post_pos = unique_diff[1]-mat_diff[1]
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
204
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
205 lengthofmir = len(off_part) + post_pos
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
206 if pre_pos == 0 and post_pos<4:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
207 tmp_count_reads = tmp_count_reads + int(unique_seq[j][0].split("-")[1])
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
208 tmp_count_seq = tmp_count_seq + 1
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
209
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
210 t_name=copy.deepcopy(unique_seq[j])
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
211 t_name[2]=mature_mirnas[i - 1] + "_nont_" + str(pre_pos) + "_" + '{:+d}'.format(post_pos) + "_" + str(unique_seq[j][9][len(off_part):])
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
212 uni_seq.append(t_name)
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
213
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
214 # metrics for the production of database
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
215 if tmp_count_reads != 0 and tmp_count_seq != 0:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
216 sorted_uni_arms.append([mature_mirnas[i-1], tmp_count_seq, tmp_count_reads])
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
217
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
218
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
219 sorted_uni_arms = sorted(sorted_uni_arms, key=lambda x: x[1], reverse=True)
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
220 unique_seq = list(map(list, OrderedDict.fromkeys(map(tuple,uni_seq))))
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
221
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
222 # Output variables
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
223
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
224 l.acquire()
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
225 if case == "c" or case == "t":
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
226 names_n_seqs.extend([[y[2],y[9]] for y in unique_seq if y[2]!="*"])
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
227 n_deseq.append([[x[2], x[0].split('-')[1], x[9]] for x in unique_seq if x[2]!="*"])
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
228 file_order.append(file)
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
229 data.append([case,file,unique_seq,sorted_uni_arms])
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
230 l.release()
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
231
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
232 #################################################################################################################################################################################################################
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
233
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
234 """
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
235
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
236 This function detects the differences between the two groups (control, treated).
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
237 Then copy the undetected miRNAs from one group to other and add zeros as counts.
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
238 With this way the two groups will have the same number of miRNAs.
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
239
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
240 """
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
241
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
242 def black_white(mirna_names_1,mirna_names_2,group,manager):
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
243
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
244 add_names = [x for x in mirna_names_1 if x not in mirna_names_2]
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
245 add_names.sort()
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
246 add_names = list(add_names for add_names,_ in itertools.groupby(add_names))
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
247
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
248 group.sort()
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
249 group = list(group for group,_ in itertools.groupby(group))
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
250
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
251 zeros=["0"]*(len(group[0])-2)
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
252 [add_names[i].extend(zeros) for i,_ in enumerate(add_names)]
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
253 group=group+add_names
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
254
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
255 manager.extend(group)
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
256
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
257 ########################################################################################################>
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
258
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
259 """
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
260
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
261 This function collapses the miRNAs with same sequences and different names into one entry
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
262 by merging all the different names into one and are separated with the character "/"
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
263
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
264 """
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
265
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
266 def merging_dupes(group,f_dupes):
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
267
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
268 dupes=[]
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
269 temp_mat =[]
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
270
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
271 for num,_ in enumerate(group):
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
272
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
273 if group[num][1] not in temp_mat and group[num][0] not in temp_mat:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
274 temp_mat.append(group[num][1])
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
275 temp_mat.append(group[num][0])
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
276 else:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
277 dupes.append(group[num][1])
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
278
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
279
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
280 dupes=list(set(dupes))
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
281
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
282 dupes=[[x] for x in dupes]
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
283
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
284 for x in group:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
285 for y in dupes:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
286 if x[1]==y[0]:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
287 fl=0
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
288 if len(y)==1:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
289 y.append(x[0])
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
290 else:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
291 for i in range(1,len(y)):
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
292 if y[i].split("_")[0]+"_"+y[i].split("_")[1]==x[0].split("_")[0]+"_"+x[0].split("_")[1]:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
293 fl=1
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
294 if len(x[0])<len(y[i]):
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
295 del y[i]
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
296 y.append(x[0])
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
297 break
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
298
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
299 if fl==0:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
300 y.append((x[0]))
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
301
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
302 for y in dupes:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
303 if len(y)>2:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
304 for i in range(len(y)-1,1,-1):
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
305 y[1]=y[1]+"/"+y[i]
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
306 del y[i]
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
307
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
308 f_dupes.extend(dupes)
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
309
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
310
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
311 ########################################################################################################>
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
312
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
313 """
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
314
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
315 This function removes the duplications of sequences based on output from the fuction merging_dupes
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
316
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
317 """
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
318
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
319 def apply_merging_dupes(group,dupes,managger):
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
320
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
321 for x in group:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
322 for y in dupes:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
323 if x[1]==y[0]:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
324 x[0]=y[1]
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
325
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
326 group.sort()
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
327 group=list(group for group,_ in itertools.groupby(group))
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
328 managger.extend(group)
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
329
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
330 ########################################################################################################>
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
331
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
332 """
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
333
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
334 This function is optional and performs a filter for low counts miRNAs based on
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
335 number of counts and the percentage of the samples, according to user preferences
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
336
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
337 """
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
338
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
339 def filter_low_counts(c_group,t_group,fil_c_group,fil_t_group,per,counts):
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
340
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
341 t_group_new=[]
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
342 c_group_new=[]
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
343
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
344 percent=int(per)/100
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
345 c_col_filter=round(percent*(len(c_group[1])-2))
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
346 t_col_filter=round(percent*(len(t_group[1])-2))
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
347
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
348 for i, _ in enumerate(c_group):
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
349 c_cols=0
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
350 t_cols=0
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
351
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
352 c_cols=sum([1 for j in range(len(c_group[i])-2) if int(c_group[i][j+2])>=int(counts)])
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
353 t_cols=sum([1 for j in range(len(t_group[i])-2) if int(t_group[i][j+2])>=int(counts)])
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
354
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
355 if c_cols>=c_col_filter or t_cols>=t_col_filter:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
356 t_group_new.append(t_group[i])
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
357 c_group_new.append(c_group[i])
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
358
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
359 fil_c_group.extend(c_group_new)
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
360 fil_t_group.extend(t_group_new)
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
361
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
362 ##################################################################################################################################################################################################################
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
363
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
364 """
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
365
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
366 This function exports the count matrices for every group (controls, treated)
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
367 and condition (ref and templated miRNAs, non-templated miRNAs)
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
368
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
369 """
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
370
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
371 def write_main(raw_con, raw_tre, fil_con, fil_tre, con_file_order, tre_file_order, flag, n1, n2, per):
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
372
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
373 if flag == 1 and int(per)!=-1:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
374 fp = open('Counts/Filtered '+n2 +' Templated Counts', 'w')
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
375 fp.write("Name\t")
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
376 fp.write("Sequence")
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
377 for y in tre_file_order:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
378 fp.write("\t"+y)
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
379
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
380 for x in fil_tre:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
381 fp.write("\n%s" % "\t".join(x))
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
382 fp.close()
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
383
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
384 fp = open('Counts/Filtered '+n1+' Templated Counts', 'w')
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
385 fp.write("Name\t")
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
386 fp.write("Sequence")
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
387 for y in con_file_order:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
388 fp.write("\t"+y)
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
389
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
390 for x in fil_con:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
391 fp.write("\n%s" % "\t".join(x))
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
392 fp.close()
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
393
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
394
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
395 if flag == 2 and int(per)!=-1:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
396 fp = open('Counts/Filtered '+n2+' Non-Templated Counts', 'w')
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
397 fp.write("Name\t")
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
398 fp.write("Sequence")
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
399 for y in tre_file_order:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
400 fp.write("\t"+y)
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
401
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
402
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
403 for x in fil_tre:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
404 fp.write("\n%s" % "\t".join(x))
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
405 fp.close()
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
406
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
407 fp = open('Counts/Filtered '+n1+' Non-Templated Counts', 'w')
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
408 fp.write("Name\t")
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
409 fp.write("Sequence")
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
410 for y in con_file_order:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
411 fp.write("\t"+y)
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
412
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
413 for x in fil_con:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
414 fp.write("\n%s" % "\t".join(x))
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
415 fp.close()
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
416
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
417
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
418 if flag == 1:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
419 fp = open('Counts/Raw '+n2+' Templated Counts', 'w')
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
420 fp.write("Name\t")
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
421 fp.write("Sequence")
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
422 for y in tre_file_order:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
423 fp.write("\t"+y)
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
424
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
425 for x in raw_tre:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
426 fp.write("\n%s" % "\t".join(x))
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
427 fp.close()
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
428
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
429 fp = open('Counts/Raw '+n1+' Templated Counts', 'w')
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
430 fp.write("Name\t")
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
431 fp.write("Sequence")
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
432 for y in con_file_order:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
433 fp.write("\t"+y)
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
434
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
435 for x in raw_con:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
436 fp.write("\n%s" % "\t".join(x))
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
437 fp.close()
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
438
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
439 if flag == 2:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
440 fp = open('Counts/Raw '+n2+' Non-Templated Counts', 'w')
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
441 fp.write("Name\t")
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
442 fp.write("Sequence")
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
443 for y in tre_file_order:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
444 fp.write("\t"+y)
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
445
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
446
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
447 for x in raw_tre:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
448 fp.write("\n%s" % "\t".join(x))
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
449 fp.close()
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
450
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
451 fp = open('Counts/Raw '+n1+' Non-Templated Counts', 'w')
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
452 fp.write("Name\t")
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
453 fp.write("Sequence")
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
454 for y in con_file_order:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
455 fp.write("\t"+y)
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
456
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
457 for x in raw_con:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
458 fp.write("\n%s" % "\t".join(x))
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
459 fp.close()
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
460
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
461 ####################################################################################################################################################################################################################
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
462
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
463 """
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
464
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
465 This function exports the files of the database with all the info
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
466 about every type of the detected miRNAs for every sample
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
467
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
468 """
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
469
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
470 def DB_write(con,name,unique_seq,sorted_uni_arms,f):
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
471
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
472 if f==1:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
473 if con=="c":
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
474 fp = open('split1/'+name, 'w')
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
475
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
476 fp.write("%s\t%-42s\t%s\n\n" % ("Number of Reads","Name of isomir","Sequence"))
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
477 if con=="t":
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
478 fp = open('split2/'+name, 'w')
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
479 fp.write("%s\t%-42s\t%s\n\n" % ("Number of Reads","Name of isomir","Sequence"))
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
480
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
481 for i in range(len(sorted_uni_arms)):
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
482 temp = []
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
483 for j in range(len(unique_seq)):
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
484
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
485 if sorted_uni_arms[i][0] in (unique_seq[j][2].split("_")[0]+"_"+unique_seq[j][2].split("_")[1]):
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
486
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
487 temp.append(unique_seq[j])
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
488
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
489 temp = sorted(temp, key=lambda x: int(x[0].split('-')[1]), reverse=True)
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
490 fp.write("*********************************************************************************************************\n")
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
491 fp.write("%-8s\t%-22s\t%-25s\t%-30s\t%s\n" % ("|",str(sorted_uni_arms[i][0]),"Sequence count = "+str(sorted_uni_arms[i][1]),"Total reads = "+str(sorted_uni_arms[i][2]),"|"))
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
492 fp.write("*********************************************************************************************************\n\n")
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
493 [fp.write("%-8s\t%-40s\t%s\n" % (x[0].split("-")[1], x[2],x[9])) for x in temp]
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
494 fp.write("\n" + "\n")
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
495 fp.close()
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
496
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
497 if f==2:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
498
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
499 if con=="c":
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
500 fp = open('split3/'+name, 'w')
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
501 fp.write("%s\t%-42s\t%s\n\n" % ("Number of Reads","Name of isomir","Sequence"))
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
502 if con=="t":
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
503 fp = open('split4/'+name, 'w')
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
504 fp.write("%s\t%-42s\t%s\n\n" % ("Number of Reads","Name of isomir","Sequence"))
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
505
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
506 for i in range(len(sorted_uni_arms)):
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
507 temp = []
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
508 for j in range(len(unique_seq)):
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
509 if sorted_uni_arms[i][0]==unique_seq[j][2].split("_nont_")[0]:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
510 temp.append(unique_seq[j])
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
511 if temp!=[]:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
512 temp = sorted(temp, key=lambda x: int(x[0].split('-')[1]), reverse=True)
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
513 fp.write("*********************************************************************************************************\n")
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
514 fp.write("%-8s\t%-22s\t%-25s\t%-30s\t%s\n" % ("|",str(sorted_uni_arms[i][0]),"Sequence count = "+str(sorted_uni_arms[i][1]),"Total reads = "+str(sorted_uni_arms[i][2]),"|"))
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
515 fp.write("*********************************************************************************************************\n\n")
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
516 [fp.write("%-8s\t%-40s\t%s\n" % (x[0].split("-")[1], x[2],x[9])) for x in temp]
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
517 fp.write("\n" + "\n")
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
518 fp.close()
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
519
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
520
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
521 #########################################################################################################################
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
522
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
523 """
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
524
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
525 This function merges the different names for the same mirna sequence per group (controls, treated) to avoid duplicates
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
526
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
527 """
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
528
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
529 def merging_names(ini_mat,new):
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
530
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
531 dupes=[]
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
532 temp_mat =[]
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
533
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
534 for num in range(len(ini_mat)):
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
535
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
536 if ini_mat[num][1] not in temp_mat and ini_mat[num][0] not in temp_mat:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
537 temp_mat.append(ini_mat[num][1])
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
538 temp_mat.append(ini_mat[num][0])
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
539 else:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
540 dupes.append(ini_mat[num][1])
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
541
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
542 dupes=list(set(dupes))
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
543
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
544 for i in range(len(dupes)):
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
545 dupes[i]=[dupes[i]]
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
546
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
547 for x in ini_mat:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
548 for y in dupes:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
549 if x[1]==y[0]:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
550 fl=0
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
551 if len(y)==1:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
552 y.append(x[0])
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
553 else:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
554 for i in range(1,len(y)):
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
555 if y[i].split("_")[0]+"_"+y[i].split("_")[1]==x[0].split("_")[0]+"_"+x[0].split("_")[1]:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
556 fl=1
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
557 if len(x[0])<len(y[i]):
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
558 del y[i]
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
559 y.append(x[0])
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
560 break
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
561
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
562 if fl==0:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
563 y.append((x[0]))
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
564
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
565 for y in dupes:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
566 if len(y)>2:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
567 for i in range(len(y)-1,1,-1):
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
568 y[1]=y[1]+"/"+y[i]
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
569 del y[i]
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
570
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
571
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
572 for x in ini_mat:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
573 for y in dupes:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
574 if x[1]==y[0]:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
575 x[0]=y[1]
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
576
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
577 ini_mat.sort()
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
578 ini_mat=list(ini_mat for ini_mat,_ in itertools.groupby(ini_mat))
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
579 new.extend(ini_mat)
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
580
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
581 ####################################################################################################################################################################################################################
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
582
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
583 """
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
584
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
585 This function exports the count matrices for differential expresion
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
586 if user chose analysis with non-templated miRNAs detection
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
587
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
588 """
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
589
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
590 def nontemp_counts_to_diff(tem_names,tem_samp,non_names,non_samp,folder,pro):
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
591
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
592 for i in range(2,len(tem_samp[0])):
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
593
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
594 fp = open(folder+tem_names[i-2]+'.txt','w')
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
595 fp.write("miRNA id"+"\t"+tem_names[i-2]+"\n")
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
596
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
597 for x in tem_samp:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
598 fp.write("%s" % "\t".join([x[0],x[i]])+"\n")
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
599
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
600 for j in range(len(non_names)):
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
601 if non_names[j]==tem_names[i-2]:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
602 for x in non_samp:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
603 fp.write("%s" % "\t".join([x[0],x[j+2]])+"\n")
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
604 fp.close()
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
605
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
606 #################################################################################################################################################################################################################
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
607
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
608 """
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
609
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
610 This function exports the count matrices for differential expresion
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
611 if user chose analysis only with templated miRNAs detection
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
612
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
613 """
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
614
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
615 def temp_counts_to_diff(names,samp,folder,pro):
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
616
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
617 for i in range(2,len(samp[0])):
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
618
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
619 fp = open(folder+names[i-2]+'.txt','w')
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
620 fp.write("miRNA id"+"\t"+names[i-2]+"\n")
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
621
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
622 for x in samp:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
623 fp.write("%s" % "\t".join([x[0],x[i]])+"\n")
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
624 fp.close()
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
625
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
626 #################################################################################################################################################################################################################
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
627
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
628 """
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
629
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
630 This function downloads the fasta files from MirGene site
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
631 with ref miRNAs and star miRNAs sequences and merges them
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
632 into one list
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
633
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
634 """
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
635
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
636 def download_matures(matures,org_name):
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
637
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
638 mature_mir=[]
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
639
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
640 mat_url = 'http://mirgenedb.org/fasta/'+org_name+'?mat=1'
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
641 star_url = 'http://mirgenedb.org/fasta/'+org_name+'?star=1'
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
642
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
643 data = urllib.request.urlopen(mat_url).read()
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
644 file_mirna = data.decode('utf-8')
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
645 mature_mir = file_mirna.split("\n")
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
646 mature_mir = [x.replace(">","") for x in mature_mir]
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
647 del mature_mir[-1]
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
648
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
649 data = urllib.request.urlopen(star_url).read()
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
650 file_mirna = data.decode('utf-8')
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
651 star_mir = file_mirna.split("\n")
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
652 star_mir = [x.replace(">","") for x in star_mir]
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
653 del star_mir[-1]
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
654
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
655 mature_mir.extend(star_mir)
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
656
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
657 for i in range(1,len(mature_mir),2):
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
658 mature_mir[i]=mature_mir[i].replace("U","T")
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
659
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
660 matures.extend(mature_mir)
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
661
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
662 ###################################################################################################################
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
663
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
664 """
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
665
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
666 This function detects the templated isoforms from the 1st part of analysis
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
667 These isoforms and ref miRNAs will be used for the detection of non-templated miRNAs
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
668
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
669 """
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
670
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
671 def non_template_ref(sc,st,all_isoforms):
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
672
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
673 pre_uni_seq_con = list(sc)
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
674 pre_uni_seq_tre = list(st)
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
675
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
676 for x in pre_uni_seq_con:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
677 for y in x:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
678 if y[2] not in all_isoforms and "_t_" in y[2]:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
679 all_isoforms.append(y[2])
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
680 all_isoforms.append(y[9])
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
681
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
682 for x in pre_uni_seq_tre:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
683 for y in x:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
684 if y[2] not in all_isoforms and "_t_" in y[2]:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
685 all_isoforms.append(y[2])
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
686 all_isoforms.append(y[9])
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
687
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
688 ################################################################################################################################################################################################
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
689
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
690 """
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
691
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
692 This function adds uncommon detected mirnas among the samples with zeros as counts
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
693
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
694 """
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
695
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
696 def uncommon_mirnas(sample,mir_names,l,new_d,sample_name,sample_order):
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
697
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
698 for y in mir_names:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
699 flag=0
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
700 for x in sample:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
701 if y[0]==x[0]: # check if miRNA exists in the sample
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
702 flag=1
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
703 break
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
704 if flag==0:
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
705 sample.append([y[0],"0",y[1]]) # add the name of mirna to the sample with zero counts and its sequence
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
706
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
707 # sorting and remove duplicates
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
708 sample.sort(key=lambda x: x[0])
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
709 sample=list(sample for sample,_ in itertools.groupby(sample))
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
710
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
711 # Return the updated sample
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
712 l.acquire()
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
713 new_d.append(sample)
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
714 sample_order.append(sample_name)
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
715 l.release()
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
716
810e789ffeab Uploaded
glogobyte
parents:
diff changeset
717 ###############################################################################################################################################################################################