Mercurial > repos > vandelj > giant_aptsummarize

--- a/galaxy/wrappers/ArrayNormalization.xml	Fri Jun 26 09:35:11 2020 -0400
+++ b/galaxy/wrappers/ArrayNormalization.xml	Wed Sep 09 10:43:27 2020 +0000
@@ -1,7 +1,7 @@
 <tool name="GIANT-Normalization with APT Summarize" id="giant_aptsummarize" version="0.1.1">
   <description>Apply Affymetrix Power Tool summarize function to .CEL collection</description>
   <requirements>
-    <requirement type="package" version="2.10.1">apt-probeset-summarize</requirement>
+    <requirement type="package" version="2.10.2">apt-probeset-summarize</requirement>
   </requirements>
   <stdio>
     <regex match="Execution halted"
@@ -11,11 +11,8 @@
     <exit_code range="10" level="fatal" description="Error in post-treatments, see log file for more information." />
     <exit_code range="1:9" level="fatal" description="Error during apt command execution, see log file for more information." />
   </stdio>
-
-
-
-   <command>	<![CDATA[
-  ##ONLY FOR LOCAL TEST PURPOSE, COMMENT OTHERWISE
+  <command>	<![CDATA[
+  ##only for local test, comment otherwise
   ##set $pathToApt='/mnt/galaxy/home/galaxy/Software/ThermoFischer/apt-1.20.6-x86_64-intel-linux/bin'

     mkdir ./apt_output/;
@@ -104,8 +101,6 @@
 	]]>
   </command>

-
-
   <configfiles>
     <configfile name="scriptFormat">
 <![CDATA[
@@ -176,8 +171,6 @@
    </configfile>
   </configfiles>

-
-
   <inputs>
     <param type="text" name="title" value="APT_toPersonalize" label="Title for output"/>

@@ -253,13 +246,11 @@
   </conditional>
   </inputs>

-
-
   <outputs>
     <data format="tabular" name="outputData" label="${title}_NormalizedData"/>
     <data format="txt" name="log" label="${title}_Log" />
   </outputs>
-
+


  <tests>
@@ -268,16 +259,15 @@
     <param name="commandLine" value="rma-gc-scale" />
     <param name="normLevel" value="core" />
     <param name="arraySelection" value="other" />
-    <param name="CDFfile" value="./Mouse430_2.cdf" />
-    <param name="annotationFile" value="./formatedAnnotation.csv" />
+    <param name="CDFfile" value="./APT-summarize/input/Mouse430_2.cdf" />
+    <param name="annotationFile" value="./APT-summarize/input/formatedAnnotation.csv" />
     <param name="addAnnotation" value="true" />
     <param name="keepAnnotated" value="false" />
     <param name="mergingMethod" value="mean" />
-    <output name="outputData" file="./APT-summarize/expressionOutput.csv" />
-    <output name="log" file="./APT-summarize/output.log" lines_diff="2" />
+    <output name="outputData" file="./APT-summarize/output/outputExpression.csv" />
+    <output name="log" file="./APT-summarize/output/outputLog.txt" compare="sim_size" delta_frac="0.15" />
   </test>
-</tests>
-
+ </tests>
--- a/src/General_functions.py	Fri Jun 26 09:35:11 2020 -0400
+++ b/src/General_functions.py	Wed Sep 09 10:43:27 2020 +0000
@@ -1,5 +1,6 @@
 import re
 import numpy as np
+import galaxy.model

 def get_column_names( file_path, toNotConsider=-1, each=1):
 	options=[]
@@ -76,7 +77,7 @@

 def get_condition_file_names( file_list, toNotConsider=-1, each=1):
 	options=[]
-	if not isinstance(file_list,list):#if input file is a tabular file, act as get_column_names
+	if not (isinstance(file_list,list) or isinstance(file_list,galaxy.model.HistoryDatasetCollectionAssociation) or isinstance(file_list,galaxy.model.DatasetCollection)) :#if input file is a tabular file, act as get_column_names
 		inputfile = open(file_list.file_name)
 		firstLine = next(inputfile).strip().split("\t")
 		cpt=0
@@ -88,13 +89,17 @@
 				if cpt==each:
 					cpt=0
 		inputfile.close()
-	else:#if input file is a .cel file list or a collection
-		if not hasattr(file_list[0],'collection'):#if it is not a collection, get name easily
-			for i, field_component in enumerate( file_list ):
-				options.append( ( field_component.name, field_component.name, False ) )
-		else:#if the file is a collection, have to get deeper in the corresponding HistoryDatasetCollectionAssociation object
-			for i, field_component in enumerate( file_list[0].collection.elements ):
-				options.append( ( field_component.element_identifier, field_component.element_identifier, False ) )
+	else:#if input file is a .cel file list, a DatasetCollection or a HistoryDatasetCollectionAssociation
+			if isinstance(file_list,list):#if it is a list, retrieve names easily
+				for i, field_component in enumerate( file_list ):
+					options.append( ( field_component.name, field_component.name, False ) )
+			else:#if the file is a DatasetCollection, have to get deeper in the corresponding DatasetCollection object
+				if isinstance(file_list,galaxy.model.DatasetCollection):#if it is a list, retrieve names easily
+					for i, field_component in enumerate( file_list.elements ):
+						options.append( ( field_component.element_identifier, field_component.element_identifier, False ) )
+				else:#if the file is a HistoryDatasetCollectionAssociation, have to get a little bit deeper in the corresponding HistoryDatasetCollectionAssociation object
+					for i, field_component in enumerate( file_list.collection.elements ):
+						options.append( ( field_component.element_identifier, field_component.element_identifier, False ) )
 	return options

 def generateFactorFile( file_list, factor_list, outputFileName, logFile):
@@ -102,10 +107,7 @@
 	outputfile = open(outputFileName, 'w')
 	outputLog = open(logFile, 'w')
 	sampleList=[]
-	if not isinstance(file_list,list):
-		conditionNames=get_condition_file_names(file_list,0) #unique expression file, remove the first column (index=0)
-	else :
-		conditionNames=get_condition_file_names(file_list) #.CEL files
+	conditionNames=get_condition_file_names(file_list,0) #if it's a unique expression file, remove the first column (index=0)
 	for iSample, sample_component in enumerate (conditionNames):
 		sampleList.append(str(sample_component[1]))
 	outputLog.write("[INFO] "+str(len(sampleList))+" sample are detected as input\n")