0
|
1 """
|
|
2 Author: Timothy Tickle
|
|
3 Description: Constants.
|
|
4 """
|
|
5
|
|
6 #####################################################################################
|
|
7 #Copyright (C) <2012>
|
|
8 #
|
|
9 #Permission is hereby granted, free of charge, to any person obtaining a copy of
|
|
10 #this software and associated documentation files (the "Software"), to deal in the
|
|
11 #Software without restriction, including without limitation the rights to use, copy,
|
|
12 #modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
|
13 #and to permit persons to whom the Software is furnished to do so, subject to
|
|
14 #the following conditions:
|
|
15 #
|
|
16 #The above copyright notice and this permission notice shall be included in all copies
|
|
17 #or substantial portions of the Software.
|
|
18 #
|
|
19 #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
|
|
20 #INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
|
|
21 #PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
22 #HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
|
23 #OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
24 #SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
25 #####################################################################################
|
|
26
|
|
27 __author__ = "Timothy Tickle"
|
|
28 __copyright__ = "Copyright 2012"
|
|
29 __credits__ = ["Timothy Tickle"]
|
|
30 __license__ = "MIT"
|
|
31 __maintainer__ = "Timothy Tickle"
|
|
32 __email__ = "ttickle@sph.harvard.edu"
|
|
33 __status__ = "Development"
|
|
34
|
|
35 from breadcrumbs.src.Metric import Metric
|
|
36
|
|
37 class ConstantsMicropita():
|
|
38 """
|
|
39 Class to hold project constants.
|
|
40 """
|
|
41
|
|
42 #Character Constants
|
|
43 COLON = ":"
|
|
44 COMMA = ","
|
|
45 FASTA_ID_LINE_START = ">"
|
|
46 QUOTE = "\""
|
|
47 TAB = '\t'
|
|
48 WHITE_SPACE = " "
|
|
49 PIPE = "|"
|
|
50 c_outputFileDelim = '\t'
|
|
51
|
|
52 c_sEmptyPredictFileValue = 'NA'
|
|
53
|
|
54 #Used to stop divide by zero errors
|
|
55 c_smallNumber = 0.00000000001
|
|
56
|
|
57 #SVM related
|
|
58 c_COST_RANGE_KEY = "range"
|
|
59 c_lCostRange = [-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7,8,9,10]
|
|
60 c_SCALED_FILE_EXT = ".scaled"
|
|
61 c_intScaleLowerBound = 0
|
|
62 #LIBSVM file extensions
|
|
63 c_SCALING_PARAMETERS = ".range"
|
|
64 c_CV_FILE_EXT = ".cv.out"
|
|
65 c_CV_IMAGE_EXT = ".cv.png"
|
|
66 c_MODEL_FILE_EXT = ".model"
|
|
67 c_PREDICT_FILE_EXT = ".predict"
|
|
68 c_fProbabilitistic = True
|
|
69 c_SCALED_FOR_PREDICTION_FILE_EXT = ".scaledForpredict"
|
|
70
|
|
71 #SVM output Dictionary keywords for files
|
|
72 c_strKeywordInputFile = "INPUT"
|
|
73 c_strKeywordScaledFile = "SCALED"
|
|
74 c_strKeywordRangeFile = "RANGE"
|
|
75 c_strKeywordCVOutFile = "CV_OUT"
|
|
76 c_strKeywordModelFile = "MODEL"
|
|
77 c_strKeywordScaledPredFile = "SCALED_FOR_PREDICTION"
|
|
78 c_strKeywordPredFile = "PREDICTION"
|
|
79 c_strKeywordCostValue = "C"
|
|
80 c_strKeywordAccuracy = "ACCURACY"
|
|
81
|
|
82 #Default values for missing data in the Abundance Table
|
|
83 c_strEmptyAbundanceData = "0"
|
|
84 c_strEmptyDataMetadata = "NA"
|
|
85 lNAs = list(set(["NA","na","Na","nA",c_strEmptyDataMetadata]))
|
|
86
|
|
87 #Occurence filter [min abundance, min samples occuring in]
|
|
88 #To turn off make == [0,0]
|
|
89 c_liOccurenceFilter = [0,0]
|
|
90
|
|
91 #Break ties in targeted feature with diversity
|
|
92 c_fBreakRankTiesByDiversity = False
|
|
93
|
|
94 ####Commandline arguments
|
|
95 #a Custom diversity metrics found in cogent
|
|
96 c_strCustomAlphaDiversityHelp = "A key word for any PyCogent supplied alpha diveristy metric (Richness, evenness, or diversity). Please supply an unnormalized (counts) abundance table for these metrics. Metrics include "+" ".join(Metric.setAlphaDiversities)+"."
|
|
97
|
|
98 #b Custom diversity metrics found in cogent
|
|
99 c_strCustomBetaDiversityHelp = "A key word for any PyCogent supplied beta diversity metric. Metrics include "+" ".join(list(Metric.setBetaDiversities)+[Metric.c_strUnifracUnweighted,Metric.c_strUnifracWeighted])+"."
|
|
100
|
|
101 #c,checked Checked abundance file
|
|
102 c_strCheckedAbundanceFileArgument = "--checked"
|
|
103 c_strCheckedAbundanceFileHelp = "Before analysis abundance files are checked and a new file results which analysis is perfromed on. The name of the checked file can be specified of the default will will be used (appending a -Checked to the end of the file name)."
|
|
104
|
|
105 #d,id Name of the sample id row
|
|
106 c_strIDNameArgument = "--id"
|
|
107 c_strIDNameHelp = "The row in the abundance file that is the sample name/id row. Should be the sample name/Id in first column of the row."
|
|
108
|
|
109 #e,label Supervised Label
|
|
110 c_strSupervisedLabelArgument = "--label"
|
|
111 c_strSupervisedLabelHelp = "The name of the metadata on which to perform supervised methods"
|
|
112
|
|
113 #f, invertDiversity
|
|
114 c_strInvertDiversityHelp = "".join(["When using this flag, the diversity will be inverted (multiplicative inverse) before ranking in the highest diversity method. ",
|
|
115 "Recommended to use with dominance, menhinick, reciprocal_simpson, berger_parker_d, mcintosh_e, simpson_e, strong and any metric where 0 indicates most diverse."])
|
|
116
|
|
117 #g,logging Path of the logging file
|
|
118 c_strLoggingFileArgument = "--logfile"
|
|
119 c_strLoggingFileHelp = "File path to save the logging file."
|
|
120
|
|
121 #h help
|
|
122
|
|
123 #i,tree
|
|
124 c_strCustomEnvironmentFileHelp = "File describing the smaple environments; for use with Unifrac distance metrics."
|
|
125
|
|
126 #j,delim File delimiter
|
|
127 c_strFileDelimiterArgument = "--delim"
|
|
128 c_strFileDelimiterHelp = "The delimiter for the abundance table (default = TAB)"
|
|
129
|
|
130 #k,featdelim Feature delimiter
|
|
131 c_strFeatureNameDelimiterArgument = "--featdelim"
|
|
132 c_strFeatureNameDelimiterHelp = "The delimiter for a feature name if it contains a consensus sequence."
|
|
133
|
|
134 #l,lastmeta The name of the last metadata
|
|
135 c_strLastMetadataNameArgument = "--lastmeta"
|
|
136 c_strLastMetadataNameHelp = "The row in the abundance file that is the sample name/id row. Should be the metadata name/Id in first column of the metadta row."
|
|
137
|
|
138 #m,method
|
|
139 c_strSelectionTechniquesHelp = "Select techniques listed one after another."
|
|
140
|
|
141 #n,num The Number of unsupervised sample selection
|
|
142 c_strCountArgument = "-n"
|
|
143 c_strCountHelp = "The number of samples to select with unsupervised methodology. (An integer greater than 0.)."
|
|
144
|
|
145 #o,tree
|
|
146 c_strCustomPhylogeneticTreeHelp = "Tree for phylogenetic when selecting custom beta-diversities in the representative sampling criteria."
|
|
147
|
|
148 #p,suppredfile File path fo the predict file for the supervised methods
|
|
149 c_strSupervisedPredictedFile = "--suppredfile"
|
|
150 c_strSupervisedPredictedFileHelp = "The file path for the predict file."
|
|
151
|
|
152 #q,alphameta
|
|
153 c_strCustomAlphaDiversityMetadataHelp = "Metric in the pcl file which has custom alpha diversity measurements to use with the highest diversity sampling criteria. Should be a number between 0.0 and 1.0 with 1.0 meaning most diverse."
|
|
154
|
|
155 #r,targetmethod Taxa selection method
|
|
156 c_strTargetedFeatureMethodArgument = "--feature_method"
|
|
157 c_strTargetedFeatureMethodHelp = "The ranking method used to select targeted features."
|
|
158
|
|
159 #s,stratify Unsupervised stratify metadata
|
|
160 c_strUnsupervisedStratifyMetadataArgument = "--stratify"
|
|
161 c_strUnsupervisedStratifyMetadataHelp = "The metatdata to stratify unsupervised analysis."
|
|
162
|
|
163 #t,target Targeted feature file
|
|
164 c_strTargetedSelectionFileArgument = "--targets"
|
|
165 c_strTargetedSelectionFileHelp = "A file containing taxa/OTUs/clades to be used in targeted feature sampling criteria."
|
|
166
|
|
167 #u,supinputfile File path for the input file for the supervised methods
|
|
168 c_strSupervisedInputFile = "--supinputfile"
|
|
169 c_strSupervisedInputFileHelp = "The file path for the input file for supervised methods."
|
|
170
|
|
171 #v,logging String for logging level
|
|
172 c_strLoggingArgument = "--logging"
|
|
173 c_strLoggingHelp = "".join(["Logging level which will be logged to a .log file with the",
|
|
174 " same name as the strOutFile (but with a .log extension). Valid values are DEBUG, INFO, WARNING, ERROR, or CRITICAL."])
|
|
175 c_lsLoggingChoices = ["DEBUG","INFO","WARNING","ERROR","CRITICAL"]
|
|
176
|
|
177 #w, Last Feature Metadata indicator
|
|
178 c_strFeatureMetadataArgument = "--lastFeatureMetadata"
|
|
179 c_strFeatureMetadataHelp = "The last metadata describing a (bug) feature (not sample). Not all studies have feature metadata, if so this can be ignored and not used. See doc for PCL-Description.txt"
|
|
180
|
|
181 #x,betamatrix
|
|
182 c_strCustomBetaDiversityMatrixHelp = "Precalculated beta-diversity matrix to be used in the representative sampling criteria. Should be a number between 0.0 and 1.0 with 1.0 meaning most dissimilar."
|
|
183
|
|
184 #Order is important, the first is the default
|
|
185 c_strTargetedRanked = "rank"
|
|
186 c_strTargetedAbundance = "abundance"
|
|
187 lsTargetedFeatureMethodValues = [c_strTargetedRanked, c_strTargetedAbundance]
|
|
188
|
|
189 #Selection methods
|
|
190 c_strDiversity = "diverse"
|
|
191 c_strExtreme = "extreme"
|
|
192 c_strDiscriminant = "discriminant"
|
|
193 c_strDistinct = "distinct"
|
|
194 c_strRandom = "random"
|
|
195 c_strRepresentative = "representative"
|
|
196 c_strFeature = "features"
|
|
197 c_custom = "custom"
|
|
198 c_lsAllUnsupervisedMethods = [c_strRepresentative,c_strDiversity,c_strExtreme,c_strFeature,c_strRandom]
|
|
199 c_lsAllSupervisedMethods = [c_strDiscriminant,c_strDistinct]
|
|
200 c_lsAllMethods = c_lsAllUnsupervisedMethods + c_lsAllSupervisedMethods
|
|
201
|
|
202 #Technique Names
|
|
203 c_strDiversity2 = c_strDiversity+"_C"
|
|
204
|
|
205 ####################################
|
|
206 #Arguments without commandline flags
|
|
207 c_strAbundanceFileHelp = "Input file as either a PCL or Biome file."
|
|
208 c_strGenericOutputDataFileHelp = "The generated output data file."
|