giant_hierarchical_clustering: src/General

comparison src/General_functions.py @ 2:ccca6ad98f78 draft

Uploaded

author	vandelj
date	Mon, 14 Sep 2020 13:17:45 +0000
parents	14045c80a222
children

comparison

equal deleted inserted replaced

-:0b09345fa632
+:ccca6ad98f78
 import re
 import numpy as np
+import galaxy.model
 def get_column_names( file_path, toNotConsider=-1, each=1):
 	options=[]
 	inputfile = open(file_path)
 	firstLine = next(inputfile).strip().split("\t")
 	inputfile.close()
 	return options
 def get_condition_file_names( file_list, toNotConsider=-1, each=1):
 	options=[]
-	if not isinstance(file_list,list):#if input file is a tabular file, act as get_column_names
+	if not (isinstance(file_list,list) or isinstance(file_list,galaxy.model.HistoryDatasetCollectionAssociation) or isinstance(file_list,galaxy.model.DatasetCollection)) :#if input file is a tabular file, act as get_column_names
 		inputfile = open(file_list.file_name)
 		firstLine = next(inputfile).strip().split("\t")
 		cpt=0
 		for i, field_component in enumerate( firstLine ):
 			if i!=toNotConsider:#to squeeze the first column
 					options.append( ( field_component, field_component, False ) )
 				cpt+=1
 				if cpt==each:
 					cpt=0
 		inputfile.close()
-	else:#if input file is a .cel file list or a collection
+	else:#if input file is a .cel file list, a DatasetCollection or a HistoryDatasetCollectionAssociation
-		if not hasattr(file_list[0],'collection'):#if it is not a collection, get name easily
+			if isinstance(file_list,list):#if it is a list, retrieve names easily
-			for i, field_component in enumerate( file_list ):
+				for i, field_component in enumerate( file_list ):
-				options.append( ( field_component.name, field_component.name, False ) )
+					options.append( ( field_component.name, field_component.name, False ) )
-		else:#if the file is a collection, have to get deeper in the corresponding HistoryDatasetCollectionAssociation object
+			else:#if the file is a DatasetCollection, have to get deeper in the corresponding DatasetCollection object
-			for i, field_component in enumerate( file_list[0].collection.elements ):
+				if isinstance(file_list,galaxy.model.DatasetCollection):#if it is a list, retrieve names easily
-				options.append( ( field_component.element_identifier, field_component.element_identifier, False ) )
+					for i, field_component in enumerate( file_list.elements ):
+						options.append( ( field_component.element_identifier, field_component.element_identifier, False ) )
+				else:#if the file is a HistoryDatasetCollectionAssociation, have to get a little bit deeper in the corresponding HistoryDatasetCollectionAssociation object
+					for i, field_component in enumerate( file_list.collection.elements ):
+						options.append( ( field_component.element_identifier, field_component.element_identifier, False ) )
 	return options
 def generateFactorFile( file_list, factor_list, outputFileName, logFile):
 	forbidenCharacters={"*",":",",","|"}
 	outputfile = open(outputFileName, 'w')
 	outputLog = open(logFile, 'w')
 	sampleList=[]
-	if not isinstance(file_list,list):
+	conditionNames=get_condition_file_names(file_list,0) #if it's a unique expression file, remove the first column (index=0)
-		conditionNames=get_condition_file_names(file_list,0) #unique expression file, remove the first column (index=0)
-	else :
-		conditionNames=get_condition_file_names(file_list) #.CEL files
 	for iSample, sample_component in enumerate (conditionNames):
 		sampleList.append(str(sample_component[1]))
 	outputLog.write("[INFO] "+str(len(sampleList))+" sample are detected as input\n")
 	globalDict=dict()
 	factorNameList=[]

Mercurial > repos > vandelj > giant_hierarchical_clustering

comparison src/General_functions.py @ 2:ccca6ad98f78 draft