Mercurial > repos > vandelj > giant_hierarchical_clustering
comparison src/LIMMA_options.py @ 0:14045c80a222 draft
"planemo upload for repository https://github.com/juliechevalier/GIANT/tree/master commit cb276a594444c8f32e9819fefde3a21f121d35df"
author | vandelj |
---|---|
date | Fri, 26 Jun 2020 09:38:23 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:14045c80a222 |
---|---|
1 import re | |
2 | |
3 def get_column_names( file_path, toNotConsider=None, toNotConsiderBis=None): | |
4 options=[] | |
5 inputfile = open(file_path) | |
6 firstLine = next(inputfile).strip().split("\t") | |
7 for i, field_component in enumerate( firstLine ): | |
8 if i!=0 and field_component!=toNotConsider and field_component!=toNotConsiderBis:#to squeeze the first column | |
9 options.append( ( field_component, field_component, False ) ) | |
10 inputfile.close() | |
11 return options | |
12 | |
13 def get_row_names( file_path, factorName ): | |
14 inputfile = open(file_path) | |
15 firstLine = next(inputfile).strip().split("\t") | |
16 iColumn=-1 | |
17 for i, field_component in enumerate( firstLine ): | |
18 if field_component==factorName:#to test | |
19 iColumn=i | |
20 options=[] | |
21 if iColumn!=-1: | |
22 for nextLine in inputfile: | |
23 nextLine=nextLine.strip().split("\t") | |
24 if len(nextLine)>1: | |
25 if (nextLine[iColumn], nextLine[iColumn], False) not in options: | |
26 options.append( (nextLine[iColumn], nextLine[iColumn], False) ) | |
27 inputfile.close() | |
28 return options | |
29 | |
30 def get_row_names_interaction( file_path, factorNameA, factorNameB ): | |
31 inputfile = open(file_path) | |
32 firstLine = next(inputfile).strip().split("\t") | |
33 iColumnA=-1 | |
34 iColumnB=-1 | |
35 for i, field_component in enumerate( firstLine ): | |
36 if field_component==factorNameA:#to test | |
37 iColumnA=i | |
38 if field_component==factorNameB:#to test | |
39 iColumnB=i | |
40 possibleValuesA=[] | |
41 possibleValuesB=[] | |
42 if iColumnA!=-1 and iColumnB!=-1: | |
43 for nextLine in inputfile: | |
44 nextLine=nextLine.strip().split("\t") | |
45 if len(nextLine)>1: | |
46 if nextLine[iColumnA] not in possibleValuesA: | |
47 possibleValuesA.append(nextLine[iColumnA]) | |
48 if nextLine[iColumnB] not in possibleValuesB: | |
49 possibleValuesB.append(nextLine[iColumnB]) | |
50 inputfile.close() | |
51 options=[] | |
52 if len(possibleValuesA)>=1 and len(possibleValuesB)>=1 and possibleValuesA[0]!="None" and possibleValuesB[0]!="None": | |
53 for counterA in range(len(possibleValuesA)): | |
54 for counterB in range(len(possibleValuesB)): | |
55 options.append( (possibleValuesA[counterA]+"*"+possibleValuesB[counterB], possibleValuesA[counterA]+"*"+possibleValuesB[counterB], False) ) | |
56 return options | |
57 | |
58 def get_comparisonsA( factorA, valuesA ): | |
59 options=[] | |
60 formatValuesA=re.sub("(^\[u')|('\]$)","", str(valuesA)) | |
61 possibleValues=formatValuesA.split("', u'") | |
62 if len(possibleValues)>=2: | |
63 for counter in range(len(possibleValues)-1): | |
64 for innerCounter in range(counter+1,len(possibleValues)): | |
65 options.append( (possibleValues[counter]+" - "+possibleValues[innerCounter], possibleValues[counter]+" - "+possibleValues[innerCounter], False) ) | |
66 options.append( (possibleValues[innerCounter]+" - "+possibleValues[counter], possibleValues[innerCounter]+" - "+possibleValues[counter], False) ) | |
67 return options | |
68 | |
69 def get_comparisonsAB(factorA, valuesA, factorB, valuesB, interaction): | |
70 options=[] | |
71 formatValuesA=re.sub("(^\[u')|('\]$)","", str(valuesA)) | |
72 possibleValuesA=formatValuesA.split("', u'") | |
73 formatValuesB=re.sub("(^\[u')|('\]$)","", str(valuesB)) | |
74 possibleValuesB=formatValuesB.split("', u'") | |
75 if str(interaction)=="False": | |
76 if len(possibleValuesA)>=2: | |
77 for counter in range(len(possibleValuesA)-1): | |
78 for innerCounter in range(counter+1,len(possibleValuesA)): | |
79 options.append( (possibleValuesA[counter]+" - "+possibleValuesA[innerCounter], possibleValuesA[counter]+" - "+possibleValuesA[innerCounter], False) ) | |
80 options.append( (possibleValuesA[innerCounter]+" - "+possibleValuesA[counter], possibleValuesA[innerCounter]+" - "+possibleValuesA[counter], False) ) | |
81 if len(possibleValuesB)>=2: | |
82 for counter in range(len(possibleValuesB)-1): | |
83 for innerCounter in range(counter+1,len(possibleValuesB)): | |
84 options.append( (possibleValuesB[counter]+" - "+possibleValuesB[innerCounter], possibleValuesB[counter]+" - "+possibleValuesB[innerCounter], False) ) | |
85 options.append( (possibleValuesB[innerCounter]+" - "+possibleValuesB[counter], possibleValuesB[innerCounter]+" - "+possibleValuesB[counter], False) ) | |
86 else: | |
87 if len(possibleValuesA)>=1 and len(possibleValuesB)>=1 and possibleValuesA[0]!="None" and possibleValuesB[0]!="None": | |
88 for counterA in range(len(possibleValuesA)): | |
89 for innerCounterA in range(len(possibleValuesA)): | |
90 for counterB in range(len(possibleValuesB)): | |
91 for innerCounterB in range(len(possibleValuesB)): | |
92 if not(counterA==innerCounterA and counterB==innerCounterB): | |
93 options.append( ("("+possibleValuesA[counterA]+" * "+possibleValuesB[counterB]+") - ("+possibleValuesA[innerCounterA]+" * "+possibleValuesB[innerCounterB]+")","("+possibleValuesA[counterA]+" * "+possibleValuesB[counterB]+") - ("+possibleValuesA[innerCounterA]+" * "+possibleValuesB[innerCounterB]+")", False) ) | |
94 return options | |
95 | |
96 def get_row_names_allInteractions( file_path, factorSelected): | |
97 formatFactors=re.sub("(^\[u')|('\]$)","", str(factorSelected)) | |
98 factorsList=formatFactors.split("', u'") | |
99 iColumn=[None] * len(factorsList) | |
100 valuesList=[None] * len(factorsList) | |
101 | |
102 inputfile = open(file_path) | |
103 firstLine = next(inputfile).strip().split("\t") | |
104 for iField, fieldComponent in enumerate( firstLine ): | |
105 for iFactor, factorComponent in enumerate(factorsList): | |
106 if fieldComponent==factorComponent: | |
107 iColumn[iFactor]=iField | |
108 valuesList[iFactor]=[] | |
109 | |
110 for nextLine in inputfile: | |
111 nextLine=nextLine.strip().split("\t") | |
112 if len(nextLine)>1: | |
113 for iFactor, factorComponent in enumerate(factorsList): | |
114 if nextLine[iColumn[iFactor]] not in valuesList[iFactor]: | |
115 valuesList[iFactor].append(nextLine[iColumn[iFactor]]) | |
116 inputfile.close() | |
117 | |
118 allCombinations=[] | |
119 for iFactor, factorComponent in enumerate(factorsList): | |
120 if iFactor==0: | |
121 allCombinations=valuesList[iFactor] | |
122 else: | |
123 currentCombinations=allCombinations | |
124 allCombinations=[] | |
125 for iValue, valueComponent in enumerate(valuesList[iFactor]): | |
126 for iCombination, combination in enumerate(currentCombinations): | |
127 allCombinations.append(combination+"*"+valueComponent) | |
128 | |
129 options=[] | |
130 for iCombination, combination in enumerate(allCombinations): | |
131 options.append((combination,combination,False)) | |
132 | |
133 return options | |
134 | |
135 def get_allrow_names( file_path, factorSelected ): | |
136 formatFactors=re.sub("(^\[u')|('\]$)","", str(factorSelected)) | |
137 factorsList=formatFactors.split("', u'") | |
138 iColumn=[None] * len(factorsList) | |
139 valuesList=[None] * len(factorsList) | |
140 | |
141 inputfile = open(file_path) | |
142 firstLine = next(inputfile).strip().split("\t") | |
143 for iField, fieldComponent in enumerate( firstLine ): | |
144 for iFactor, factorComponent in enumerate(factorsList): | |
145 if fieldComponent==factorComponent: | |
146 iColumn[iFactor]=iField | |
147 valuesList[iFactor]=[] | |
148 | |
149 for nextLine in inputfile: | |
150 nextLine=nextLine.strip().split("\t") | |
151 if len(nextLine)>1: | |
152 for iFactor, factorComponent in enumerate(factorsList): | |
153 if nextLine[iColumn[iFactor]] not in valuesList[iFactor]: | |
154 valuesList[iFactor].append(nextLine[iColumn[iFactor]]) | |
155 inputfile.close() | |
156 | |
157 allValues=[] | |
158 for iFactor, factorComponent in enumerate(factorsList): | |
159 for iValue, valueComponent in enumerate(valuesList[iFactor]): | |
160 allValues.append(factorComponent+":"+valueComponent) | |
161 | |
162 options=[] | |
163 for iValue, valueComponent in enumerate(allValues): | |
164 options.append((valueComponent,valueComponent,False)) | |
165 | |
166 return options | |
167 | |
168 def replaceNamesInFiles(expressionFile_name,conditionFile_name,outputExpressionFile,outputConditionFile,ouputDictionnary): | |
169 dico={} | |
170 forbidenCharacters={"*",":",",","|"} | |
171 ##start with expression file, read only the first line | |
172 inputfile = open(expressionFile_name) | |
173 outputfile = open(outputExpressionFile, 'w') | |
174 firstLine = next(inputfile).rstrip().split("\t") | |
175 iCondition=1 | |
176 newFirstLine="" | |
177 for i, field_component in enumerate( firstLine ): | |
178 if (i>0): | |
179 #conditions names should not be redundant with other conditions | |
180 if(field_component not in dico): | |
181 dico[field_component]="Condition"+str(iCondition) | |
182 newFirstLine+="\t"+"Condition"+str(iCondition) | |
183 iCondition+=1 | |
184 else: | |
185 raise NameError('condition name allready exists!') | |
186 else: | |
187 newFirstLine+=field_component | |
188 outputfile.write(newFirstLine+"\n") | |
189 for line in inputfile: | |
190 outputfile.write(line) | |
191 outputfile.close() | |
192 inputfile.close() | |
193 #then parse condition file, read all lines in this case | |
194 inputfile = open(conditionFile_name) | |
195 outputfile = open(outputConditionFile, 'w') | |
196 firstLine=1 | |
197 iFactor=1 | |
198 iValue=1 | |
199 for line in inputfile: | |
200 currentLine = line.rstrip().split("\t") | |
201 newCurrentLine="" | |
202 for i, field_component in enumerate( currentLine ): | |
203 #special treatment for the first line | |
204 if (firstLine==1): | |
205 if (i==0): | |
206 newCurrentLine=field_component | |
207 else: | |
208 #factor names should not be redundant with other factors or conditions | |
209 if(field_component not in dico): | |
210 dico[field_component]="Factor"+str(iFactor) | |
211 newCurrentLine+="\t"+"Factor"+str(iFactor) | |
212 iFactor+=1 | |
213 else: | |
214 raise NameError('factor name allready exists!') | |
215 else: | |
216 if (i==0): | |
217 #check if condition name allready exist and used it if it is, or create a new one if not | |
218 if(field_component not in dico): | |
219 dico[field_component]="Condition"+str(iCondition) | |
220 newCurrentLine="Condition"+str(iCondition) | |
221 iCondition+=1 | |
222 else: | |
223 newCurrentLine=dico[field_component] | |
224 else: | |
225 if(field_component not in dico): | |
226 dico[field_component]="Value"+str(iValue) | |
227 newCurrentLine+="\tValue"+str(iValue) | |
228 iValue+=1 | |
229 else: | |
230 newCurrentLine+="\t"+dico[field_component] | |
231 outputfile.write(newCurrentLine+"\n") | |
232 firstLine=0 | |
233 outputfile.close() | |
234 inputfile.close() | |
235 ##check if any entries in dictionnary contains forbiden character | |
236 for key, value in dico.items(): | |
237 for specialCharacter in forbidenCharacters: | |
238 if value.startswith("Condition")==False and key.find(specialCharacter)!=-1: | |
239 return 1 | |
240 ##then write dictionnary in a additional file | |
241 outputfile = open(ouputDictionnary, 'w') | |
242 for key, value in dico.items(): | |
243 outputfile.write(key+"\t"+value+"\n") | |
244 outputfile.close() | |
245 return 0 | |
246 | |
247 | |
248 def replaceNamesBlockInFiles(expressionFile_name,conditionFile_name,blockingFile_name,outputExpressionFile,outputConditionFile,outputBlockingFile,ouputDictionnary): | |
249 dico={} | |
250 forbidenCharacters={"*",":",",","|"} | |
251 ##start with expression file, read only the first line | |
252 inputfile = open(expressionFile_name) | |
253 outputfile = open(outputExpressionFile, 'w') | |
254 firstLine = next(inputfile).rstrip().split("\t") | |
255 iCondition=1 | |
256 newFirstLine="" | |
257 for i, field_component in enumerate( firstLine ): | |
258 if (i>0): | |
259 #conditions names should not be redundant with other conditions | |
260 if(field_component not in dico): | |
261 dico[field_component]="Condition"+str(iCondition) | |
262 newFirstLine+="\t"+"Condition"+str(iCondition) | |
263 iCondition+=1 | |
264 else: | |
265 raise NameError('condition name allready exists!') | |
266 else: | |
267 newFirstLine+=field_component | |
268 outputfile.write(newFirstLine+"\n") | |
269 for line in inputfile: | |
270 outputfile.write(line) | |
271 outputfile.close() | |
272 inputfile.close() | |
273 #then parse condition file, read all lines in this case | |
274 iFactor=1 | |
275 iValue=1 | |
276 for fileNum in range(2): | |
277 if fileNum==0: | |
278 inputfile = open(conditionFile_name) | |
279 outputfile = open(outputConditionFile, 'w') | |
280 else: | |
281 inputfile = open(blockingFile_name) | |
282 outputfile = open(outputBlockingFile, 'w') | |
283 firstLine=1 | |
284 for line in inputfile: | |
285 currentLine = line.rstrip().split("\t") | |
286 newCurrentLine="" | |
287 for i, field_component in enumerate( currentLine ): | |
288 #special treatment for the first line | |
289 if (firstLine==1): | |
290 if (i==0): | |
291 newCurrentLine=field_component | |
292 else: | |
293 #factor names should not be redundant with other factors or conditions | |
294 if(field_component not in dico): | |
295 dico[field_component]="Factor"+str(iFactor) | |
296 newCurrentLine+="\t"+"Factor"+str(iFactor) | |
297 iFactor+=1 | |
298 else: | |
299 raise NameError('factor name allready exists!') | |
300 else: | |
301 if (i==0): | |
302 #check if condition name allready exist and used it if it is, or create a new one if not | |
303 if(field_component not in dico): | |
304 dico[field_component]="Condition"+str(iCondition) | |
305 newCurrentLine="Condition"+str(iCondition) | |
306 iCondition+=1 | |
307 else: | |
308 newCurrentLine=dico[field_component] | |
309 else: | |
310 if(field_component not in dico): | |
311 dico[field_component]="Value"+str(iValue) | |
312 newCurrentLine+="\tValue"+str(iValue) | |
313 iValue+=1 | |
314 else: | |
315 newCurrentLine+="\t"+dico[field_component] | |
316 outputfile.write(newCurrentLine+"\n") | |
317 firstLine=0 | |
318 outputfile.close() | |
319 inputfile.close() | |
320 ##check if any entries in dictionnary contains forbiden character | |
321 for key, value in dico.items(): | |
322 for specialCharacter in forbidenCharacters: | |
323 if value.startswith("Condition")==False and key.find(specialCharacter)!=-1: | |
324 return 1 | |
325 ##then write dictionnary in a additional file | |
326 outputfile = open(ouputDictionnary, 'w') | |
327 for key, value in dico.items(): | |
328 outputfile.write(key+"\t"+value+"\n") | |
329 outputfile.close() | |
330 return 0 |