Mercurial > repos > vandelj > giant_limma_analysis
comparison src/General_functions.py @ 1:9f2ddab68c9e draft
"planemo upload for repository https://github.com/juliechevalier/GIANT/tree/master commit e2b27d6ff2eab66454f984dbf1a519192f41db97"
| author | vandelj |
|---|---|
| date | Wed, 09 Sep 2020 10:28:02 +0000 |
| parents | f274c8d45613 |
| children |
comparison
equal
deleted
inserted
replaced
| 0:f274c8d45613 | 1:9f2ddab68c9e |
|---|---|
| 1 import re | 1 import re |
| 2 import numpy as np | 2 import numpy as np |
| 3 import galaxy.model | |
| 3 | 4 |
| 4 def get_column_names( file_path, toNotConsider=-1, each=1): | 5 def get_column_names( file_path, toNotConsider=-1, each=1): |
| 5 options=[] | 6 options=[] |
| 6 inputfile = open(file_path) | 7 inputfile = open(file_path) |
| 7 firstLine = next(inputfile).strip().split("\t") | 8 firstLine = next(inputfile).strip().split("\t") |
| 74 inputfile.close() | 75 inputfile.close() |
| 75 return options | 76 return options |
| 76 | 77 |
| 77 def get_condition_file_names( file_list, toNotConsider=-1, each=1): | 78 def get_condition_file_names( file_list, toNotConsider=-1, each=1): |
| 78 options=[] | 79 options=[] |
| 79 if not isinstance(file_list,list):#if input file is a tabular file, act as get_column_names | 80 if not (isinstance(file_list,list) or isinstance(file_list,galaxy.model.HistoryDatasetCollectionAssociation) or isinstance(file_list,galaxy.model.DatasetCollection)) :#if input file is a tabular file, act as get_column_names |
| 80 inputfile = open(file_list.file_name) | 81 inputfile = open(file_list.file_name) |
| 81 firstLine = next(inputfile).strip().split("\t") | 82 firstLine = next(inputfile).strip().split("\t") |
| 82 cpt=0 | 83 cpt=0 |
| 83 for i, field_component in enumerate( firstLine ): | 84 for i, field_component in enumerate( firstLine ): |
| 84 if i!=toNotConsider:#to squeeze the first column | 85 if i!=toNotConsider:#to squeeze the first column |
| 86 options.append( ( field_component, field_component, False ) ) | 87 options.append( ( field_component, field_component, False ) ) |
| 87 cpt+=1 | 88 cpt+=1 |
| 88 if cpt==each: | 89 if cpt==each: |
| 89 cpt=0 | 90 cpt=0 |
| 90 inputfile.close() | 91 inputfile.close() |
| 91 else:#if input file is a .cel file list or a collection | 92 else:#if input file is a .cel file list, a DatasetCollection or a HistoryDatasetCollectionAssociation |
| 92 if not hasattr(file_list[0],'collection'):#if it is not a collection, get name easily | 93 if isinstance(file_list,list):#if it is a list, retrieve names easily |
| 93 for i, field_component in enumerate( file_list ): | 94 for i, field_component in enumerate( file_list ): |
| 94 options.append( ( field_component.name, field_component.name, False ) ) | 95 options.append( ( field_component.name, field_component.name, False ) ) |
| 95 else:#if the file is a collection, have to get deeper in the corresponding HistoryDatasetCollectionAssociation object | 96 else:#if the file is a DatasetCollection, have to get deeper in the corresponding DatasetCollection object |
| 96 for i, field_component in enumerate( file_list[0].collection.elements ): | 97 if isinstance(file_list,galaxy.model.DatasetCollection):#if it is a list, retrieve names easily |
| 97 options.append( ( field_component.element_identifier, field_component.element_identifier, False ) ) | 98 for i, field_component in enumerate( file_list.elements ): |
| 99 options.append( ( field_component.element_identifier, field_component.element_identifier, False ) ) | |
| 100 else:#if the file is a HistoryDatasetCollectionAssociation, have to get a little bit deeper in the corresponding HistoryDatasetCollectionAssociation object | |
| 101 for i, field_component in enumerate( file_list.collection.elements ): | |
| 102 options.append( ( field_component.element_identifier, field_component.element_identifier, False ) ) | |
| 98 return options | 103 return options |
| 99 | 104 |
| 100 def generateFactorFile( file_list, factor_list, outputFileName, logFile): | 105 def generateFactorFile( file_list, factor_list, outputFileName, logFile): |
| 101 forbidenCharacters={"*",":",",","|"} | 106 forbidenCharacters={"*",":",",","|"} |
| 102 outputfile = open(outputFileName, 'w') | 107 outputfile = open(outputFileName, 'w') |
| 103 outputLog = open(logFile, 'w') | 108 outputLog = open(logFile, 'w') |
| 104 sampleList=[] | 109 sampleList=[] |
| 105 if not isinstance(file_list,list): | 110 conditionNames=get_condition_file_names(file_list,0) #if it's a unique expression file, remove the first column (index=0) |
| 106 conditionNames=get_condition_file_names(file_list,0) #unique expression file, remove the first column (index=0) | |
| 107 else : | |
| 108 conditionNames=get_condition_file_names(file_list) #.CEL files | |
| 109 for iSample, sample_component in enumerate (conditionNames): | 111 for iSample, sample_component in enumerate (conditionNames): |
| 110 sampleList.append(str(sample_component[1])) | 112 sampleList.append(str(sample_component[1])) |
| 111 outputLog.write("[INFO] "+str(len(sampleList))+" sample are detected as input\n") | 113 outputLog.write("[INFO] "+str(len(sampleList))+" sample are detected as input\n") |
| 112 globalDict=dict() | 114 globalDict=dict() |
| 113 factorNameList=[] | 115 factorNameList=[] |
