comparison src/LIMMA_options.py @ 0:14045c80a222 draft

"planemo upload for repository https://github.com/juliechevalier/GIANT/tree/master commit cb276a594444c8f32e9819fefde3a21f121d35df"
author vandelj
date Fri, 26 Jun 2020 09:38:23 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:14045c80a222
1 import re
2
3 def get_column_names( file_path, toNotConsider=None, toNotConsiderBis=None):
4 options=[]
5 inputfile = open(file_path)
6 firstLine = next(inputfile).strip().split("\t")
7 for i, field_component in enumerate( firstLine ):
8 if i!=0 and field_component!=toNotConsider and field_component!=toNotConsiderBis:#to squeeze the first column
9 options.append( ( field_component, field_component, False ) )
10 inputfile.close()
11 return options
12
13 def get_row_names( file_path, factorName ):
14 inputfile = open(file_path)
15 firstLine = next(inputfile).strip().split("\t")
16 iColumn=-1
17 for i, field_component in enumerate( firstLine ):
18 if field_component==factorName:#to test
19 iColumn=i
20 options=[]
21 if iColumn!=-1:
22 for nextLine in inputfile:
23 nextLine=nextLine.strip().split("\t")
24 if len(nextLine)>1:
25 if (nextLine[iColumn], nextLine[iColumn], False) not in options:
26 options.append( (nextLine[iColumn], nextLine[iColumn], False) )
27 inputfile.close()
28 return options
29
30 def get_row_names_interaction( file_path, factorNameA, factorNameB ):
31 inputfile = open(file_path)
32 firstLine = next(inputfile).strip().split("\t")
33 iColumnA=-1
34 iColumnB=-1
35 for i, field_component in enumerate( firstLine ):
36 if field_component==factorNameA:#to test
37 iColumnA=i
38 if field_component==factorNameB:#to test
39 iColumnB=i
40 possibleValuesA=[]
41 possibleValuesB=[]
42 if iColumnA!=-1 and iColumnB!=-1:
43 for nextLine in inputfile:
44 nextLine=nextLine.strip().split("\t")
45 if len(nextLine)>1:
46 if nextLine[iColumnA] not in possibleValuesA:
47 possibleValuesA.append(nextLine[iColumnA])
48 if nextLine[iColumnB] not in possibleValuesB:
49 possibleValuesB.append(nextLine[iColumnB])
50 inputfile.close()
51 options=[]
52 if len(possibleValuesA)>=1 and len(possibleValuesB)>=1 and possibleValuesA[0]!="None" and possibleValuesB[0]!="None":
53 for counterA in range(len(possibleValuesA)):
54 for counterB in range(len(possibleValuesB)):
55 options.append( (possibleValuesA[counterA]+"*"+possibleValuesB[counterB], possibleValuesA[counterA]+"*"+possibleValuesB[counterB], False) )
56 return options
57
58 def get_comparisonsA( factorA, valuesA ):
59 options=[]
60 formatValuesA=re.sub("(^\[u')|('\]$)","", str(valuesA))
61 possibleValues=formatValuesA.split("', u'")
62 if len(possibleValues)>=2:
63 for counter in range(len(possibleValues)-1):
64 for innerCounter in range(counter+1,len(possibleValues)):
65 options.append( (possibleValues[counter]+" - "+possibleValues[innerCounter], possibleValues[counter]+" - "+possibleValues[innerCounter], False) )
66 options.append( (possibleValues[innerCounter]+" - "+possibleValues[counter], possibleValues[innerCounter]+" - "+possibleValues[counter], False) )
67 return options
68
69 def get_comparisonsAB(factorA, valuesA, factorB, valuesB, interaction):
70 options=[]
71 formatValuesA=re.sub("(^\[u')|('\]$)","", str(valuesA))
72 possibleValuesA=formatValuesA.split("', u'")
73 formatValuesB=re.sub("(^\[u')|('\]$)","", str(valuesB))
74 possibleValuesB=formatValuesB.split("', u'")
75 if str(interaction)=="False":
76 if len(possibleValuesA)>=2:
77 for counter in range(len(possibleValuesA)-1):
78 for innerCounter in range(counter+1,len(possibleValuesA)):
79 options.append( (possibleValuesA[counter]+" - "+possibleValuesA[innerCounter], possibleValuesA[counter]+" - "+possibleValuesA[innerCounter], False) )
80 options.append( (possibleValuesA[innerCounter]+" - "+possibleValuesA[counter], possibleValuesA[innerCounter]+" - "+possibleValuesA[counter], False) )
81 if len(possibleValuesB)>=2:
82 for counter in range(len(possibleValuesB)-1):
83 for innerCounter in range(counter+1,len(possibleValuesB)):
84 options.append( (possibleValuesB[counter]+" - "+possibleValuesB[innerCounter], possibleValuesB[counter]+" - "+possibleValuesB[innerCounter], False) )
85 options.append( (possibleValuesB[innerCounter]+" - "+possibleValuesB[counter], possibleValuesB[innerCounter]+" - "+possibleValuesB[counter], False) )
86 else:
87 if len(possibleValuesA)>=1 and len(possibleValuesB)>=1 and possibleValuesA[0]!="None" and possibleValuesB[0]!="None":
88 for counterA in range(len(possibleValuesA)):
89 for innerCounterA in range(len(possibleValuesA)):
90 for counterB in range(len(possibleValuesB)):
91 for innerCounterB in range(len(possibleValuesB)):
92 if not(counterA==innerCounterA and counterB==innerCounterB):
93 options.append( ("("+possibleValuesA[counterA]+" * "+possibleValuesB[counterB]+") - ("+possibleValuesA[innerCounterA]+" * "+possibleValuesB[innerCounterB]+")","("+possibleValuesA[counterA]+" * "+possibleValuesB[counterB]+") - ("+possibleValuesA[innerCounterA]+" * "+possibleValuesB[innerCounterB]+")", False) )
94 return options
95
96 def get_row_names_allInteractions( file_path, factorSelected):
97 formatFactors=re.sub("(^\[u')|('\]$)","", str(factorSelected))
98 factorsList=formatFactors.split("', u'")
99 iColumn=[None] * len(factorsList)
100 valuesList=[None] * len(factorsList)
101
102 inputfile = open(file_path)
103 firstLine = next(inputfile).strip().split("\t")
104 for iField, fieldComponent in enumerate( firstLine ):
105 for iFactor, factorComponent in enumerate(factorsList):
106 if fieldComponent==factorComponent:
107 iColumn[iFactor]=iField
108 valuesList[iFactor]=[]
109
110 for nextLine in inputfile:
111 nextLine=nextLine.strip().split("\t")
112 if len(nextLine)>1:
113 for iFactor, factorComponent in enumerate(factorsList):
114 if nextLine[iColumn[iFactor]] not in valuesList[iFactor]:
115 valuesList[iFactor].append(nextLine[iColumn[iFactor]])
116 inputfile.close()
117
118 allCombinations=[]
119 for iFactor, factorComponent in enumerate(factorsList):
120 if iFactor==0:
121 allCombinations=valuesList[iFactor]
122 else:
123 currentCombinations=allCombinations
124 allCombinations=[]
125 for iValue, valueComponent in enumerate(valuesList[iFactor]):
126 for iCombination, combination in enumerate(currentCombinations):
127 allCombinations.append(combination+"*"+valueComponent)
128
129 options=[]
130 for iCombination, combination in enumerate(allCombinations):
131 options.append((combination,combination,False))
132
133 return options
134
135 def get_allrow_names( file_path, factorSelected ):
136 formatFactors=re.sub("(^\[u')|('\]$)","", str(factorSelected))
137 factorsList=formatFactors.split("', u'")
138 iColumn=[None] * len(factorsList)
139 valuesList=[None] * len(factorsList)
140
141 inputfile = open(file_path)
142 firstLine = next(inputfile).strip().split("\t")
143 for iField, fieldComponent in enumerate( firstLine ):
144 for iFactor, factorComponent in enumerate(factorsList):
145 if fieldComponent==factorComponent:
146 iColumn[iFactor]=iField
147 valuesList[iFactor]=[]
148
149 for nextLine in inputfile:
150 nextLine=nextLine.strip().split("\t")
151 if len(nextLine)>1:
152 for iFactor, factorComponent in enumerate(factorsList):
153 if nextLine[iColumn[iFactor]] not in valuesList[iFactor]:
154 valuesList[iFactor].append(nextLine[iColumn[iFactor]])
155 inputfile.close()
156
157 allValues=[]
158 for iFactor, factorComponent in enumerate(factorsList):
159 for iValue, valueComponent in enumerate(valuesList[iFactor]):
160 allValues.append(factorComponent+":"+valueComponent)
161
162 options=[]
163 for iValue, valueComponent in enumerate(allValues):
164 options.append((valueComponent,valueComponent,False))
165
166 return options
167
168 def replaceNamesInFiles(expressionFile_name,conditionFile_name,outputExpressionFile,outputConditionFile,ouputDictionnary):
169 dico={}
170 forbidenCharacters={"*",":",",","|"}
171 ##start with expression file, read only the first line
172 inputfile = open(expressionFile_name)
173 outputfile = open(outputExpressionFile, 'w')
174 firstLine = next(inputfile).rstrip().split("\t")
175 iCondition=1
176 newFirstLine=""
177 for i, field_component in enumerate( firstLine ):
178 if (i>0):
179 #conditions names should not be redundant with other conditions
180 if(field_component not in dico):
181 dico[field_component]="Condition"+str(iCondition)
182 newFirstLine+="\t"+"Condition"+str(iCondition)
183 iCondition+=1
184 else:
185 raise NameError('condition name allready exists!')
186 else:
187 newFirstLine+=field_component
188 outputfile.write(newFirstLine+"\n")
189 for line in inputfile:
190 outputfile.write(line)
191 outputfile.close()
192 inputfile.close()
193 #then parse condition file, read all lines in this case
194 inputfile = open(conditionFile_name)
195 outputfile = open(outputConditionFile, 'w')
196 firstLine=1
197 iFactor=1
198 iValue=1
199 for line in inputfile:
200 currentLine = line.rstrip().split("\t")
201 newCurrentLine=""
202 for i, field_component in enumerate( currentLine ):
203 #special treatment for the first line
204 if (firstLine==1):
205 if (i==0):
206 newCurrentLine=field_component
207 else:
208 #factor names should not be redundant with other factors or conditions
209 if(field_component not in dico):
210 dico[field_component]="Factor"+str(iFactor)
211 newCurrentLine+="\t"+"Factor"+str(iFactor)
212 iFactor+=1
213 else:
214 raise NameError('factor name allready exists!')
215 else:
216 if (i==0):
217 #check if condition name allready exist and used it if it is, or create a new one if not
218 if(field_component not in dico):
219 dico[field_component]="Condition"+str(iCondition)
220 newCurrentLine="Condition"+str(iCondition)
221 iCondition+=1
222 else:
223 newCurrentLine=dico[field_component]
224 else:
225 if(field_component not in dico):
226 dico[field_component]="Value"+str(iValue)
227 newCurrentLine+="\tValue"+str(iValue)
228 iValue+=1
229 else:
230 newCurrentLine+="\t"+dico[field_component]
231 outputfile.write(newCurrentLine+"\n")
232 firstLine=0
233 outputfile.close()
234 inputfile.close()
235 ##check if any entries in dictionnary contains forbiden character
236 for key, value in dico.items():
237 for specialCharacter in forbidenCharacters:
238 if value.startswith("Condition")==False and key.find(specialCharacter)!=-1:
239 return 1
240 ##then write dictionnary in a additional file
241 outputfile = open(ouputDictionnary, 'w')
242 for key, value in dico.items():
243 outputfile.write(key+"\t"+value+"\n")
244 outputfile.close()
245 return 0
246
247
248 def replaceNamesBlockInFiles(expressionFile_name,conditionFile_name,blockingFile_name,outputExpressionFile,outputConditionFile,outputBlockingFile,ouputDictionnary):
249 dico={}
250 forbidenCharacters={"*",":",",","|"}
251 ##start with expression file, read only the first line
252 inputfile = open(expressionFile_name)
253 outputfile = open(outputExpressionFile, 'w')
254 firstLine = next(inputfile).rstrip().split("\t")
255 iCondition=1
256 newFirstLine=""
257 for i, field_component in enumerate( firstLine ):
258 if (i>0):
259 #conditions names should not be redundant with other conditions
260 if(field_component not in dico):
261 dico[field_component]="Condition"+str(iCondition)
262 newFirstLine+="\t"+"Condition"+str(iCondition)
263 iCondition+=1
264 else:
265 raise NameError('condition name allready exists!')
266 else:
267 newFirstLine+=field_component
268 outputfile.write(newFirstLine+"\n")
269 for line in inputfile:
270 outputfile.write(line)
271 outputfile.close()
272 inputfile.close()
273 #then parse condition file, read all lines in this case
274 iFactor=1
275 iValue=1
276 for fileNum in range(2):
277 if fileNum==0:
278 inputfile = open(conditionFile_name)
279 outputfile = open(outputConditionFile, 'w')
280 else:
281 inputfile = open(blockingFile_name)
282 outputfile = open(outputBlockingFile, 'w')
283 firstLine=1
284 for line in inputfile:
285 currentLine = line.rstrip().split("\t")
286 newCurrentLine=""
287 for i, field_component in enumerate( currentLine ):
288 #special treatment for the first line
289 if (firstLine==1):
290 if (i==0):
291 newCurrentLine=field_component
292 else:
293 #factor names should not be redundant with other factors or conditions
294 if(field_component not in dico):
295 dico[field_component]="Factor"+str(iFactor)
296 newCurrentLine+="\t"+"Factor"+str(iFactor)
297 iFactor+=1
298 else:
299 raise NameError('factor name allready exists!')
300 else:
301 if (i==0):
302 #check if condition name allready exist and used it if it is, or create a new one if not
303 if(field_component not in dico):
304 dico[field_component]="Condition"+str(iCondition)
305 newCurrentLine="Condition"+str(iCondition)
306 iCondition+=1
307 else:
308 newCurrentLine=dico[field_component]
309 else:
310 if(field_component not in dico):
311 dico[field_component]="Value"+str(iValue)
312 newCurrentLine+="\tValue"+str(iValue)
313 iValue+=1
314 else:
315 newCurrentLine+="\t"+dico[field_component]
316 outputfile.write(newCurrentLine+"\n")
317 firstLine=0
318 outputfile.close()
319 inputfile.close()
320 ##check if any entries in dictionnary contains forbiden character
321 for key, value in dico.items():
322 for specialCharacter in forbidenCharacters:
323 if value.startswith("Condition")==False and key.find(specialCharacter)!=-1:
324 return 1
325 ##then write dictionnary in a additional file
326 outputfile = open(ouputDictionnary, 'w')
327 for key, value in dico.items():
328 outputfile.write(key+"\t"+value+"\n")
329 outputfile.close()
330 return 0