comparison src/ConstantsMicropita.py @ 0:2f4f6f08c8c4 draft

Uploaded
author george-weingart
date Tue, 13 May 2014 21:58:57 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:2f4f6f08c8c4
1 """
2 Author: Timothy Tickle
3 Description: Constants.
4 """
5
6 #####################################################################################
7 #Copyright (C) <2012>
8 #
9 #Permission is hereby granted, free of charge, to any person obtaining a copy of
10 #this software and associated documentation files (the "Software"), to deal in the
11 #Software without restriction, including without limitation the rights to use, copy,
12 #modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
13 #and to permit persons to whom the Software is furnished to do so, subject to
14 #the following conditions:
15 #
16 #The above copyright notice and this permission notice shall be included in all copies
17 #or substantial portions of the Software.
18 #
19 #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
20 #INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
21 #PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
22 #HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 #OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 #SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 #####################################################################################
26
27 __author__ = "Timothy Tickle"
28 __copyright__ = "Copyright 2012"
29 __credits__ = ["Timothy Tickle"]
30 __license__ = "MIT"
31 __maintainer__ = "Timothy Tickle"
32 __email__ = "ttickle@sph.harvard.edu"
33 __status__ = "Development"
34
35 from breadcrumbs.src.Metric import Metric
36
37 class ConstantsMicropita():
38 """
39 Class to hold project constants.
40 """
41
42 #Character Constants
43 COLON = ":"
44 COMMA = ","
45 FASTA_ID_LINE_START = ">"
46 QUOTE = "\""
47 TAB = '\t'
48 WHITE_SPACE = " "
49 PIPE = "|"
50 c_outputFileDelim = '\t'
51
52 c_sEmptyPredictFileValue = 'NA'
53
54 #Used to stop divide by zero errors
55 c_smallNumber = 0.00000000001
56
57 #SVM related
58 c_COST_RANGE_KEY = "range"
59 c_lCostRange = [-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7,8,9,10]
60 c_SCALED_FILE_EXT = ".scaled"
61 c_intScaleLowerBound = 0
62 #LIBSVM file extensions
63 c_SCALING_PARAMETERS = ".range"
64 c_CV_FILE_EXT = ".cv.out"
65 c_CV_IMAGE_EXT = ".cv.png"
66 c_MODEL_FILE_EXT = ".model"
67 c_PREDICT_FILE_EXT = ".predict"
68 c_fProbabilitistic = True
69 c_SCALED_FOR_PREDICTION_FILE_EXT = ".scaledForpredict"
70
71 #SVM output Dictionary keywords for files
72 c_strKeywordInputFile = "INPUT"
73 c_strKeywordScaledFile = "SCALED"
74 c_strKeywordRangeFile = "RANGE"
75 c_strKeywordCVOutFile = "CV_OUT"
76 c_strKeywordModelFile = "MODEL"
77 c_strKeywordScaledPredFile = "SCALED_FOR_PREDICTION"
78 c_strKeywordPredFile = "PREDICTION"
79 c_strKeywordCostValue = "C"
80 c_strKeywordAccuracy = "ACCURACY"
81
82 #Default values for missing data in the Abundance Table
83 c_strEmptyAbundanceData = "0"
84 c_strEmptyDataMetadata = "NA"
85 lNAs = list(set(["NA","na","Na","nA",c_strEmptyDataMetadata]))
86
87 #Occurence filter [min abundance, min samples occuring in]
88 #To turn off make == [0,0]
89 c_liOccurenceFilter = [0,0]
90
91 #Break ties in targeted feature with diversity
92 c_fBreakRankTiesByDiversity = False
93
94 ####Commandline arguments
95 #a Custom diversity metrics found in cogent
96 c_strCustomAlphaDiversityHelp = "A key word for any PyCogent supplied alpha diveristy metric (Richness, evenness, or diversity). Please supply an unnormalized (counts) abundance table for these metrics. Metrics include "+" ".join(Metric.setAlphaDiversities)+"."
97
98 #b Custom diversity metrics found in cogent
99 c_strCustomBetaDiversityHelp = "A key word for any PyCogent supplied beta diversity metric. Metrics include "+" ".join(list(Metric.setBetaDiversities)+[Metric.c_strUnifracUnweighted,Metric.c_strUnifracWeighted])+"."
100
101 #c,checked Checked abundance file
102 c_strCheckedAbundanceFileArgument = "--checked"
103 c_strCheckedAbundanceFileHelp = "Before analysis abundance files are checked and a new file results which analysis is perfromed on. The name of the checked file can be specified of the default will will be used (appending a -Checked to the end of the file name)."
104
105 #d,id Name of the sample id row
106 c_strIDNameArgument = "--id"
107 c_strIDNameHelp = "The row in the abundance file that is the sample name/id row. Should be the sample name/Id in first column of the row."
108
109 #e,label Supervised Label
110 c_strSupervisedLabelArgument = "--label"
111 c_strSupervisedLabelHelp = "The name of the metadata on which to perform supervised methods"
112
113 #f, invertDiversity
114 c_strInvertDiversityHelp = "".join(["When using this flag, the diversity will be inverted (multiplicative inverse) before ranking in the highest diversity method. ",
115 "Recommended to use with dominance, menhinick, reciprocal_simpson, berger_parker_d, mcintosh_e, simpson_e, strong and any metric where 0 indicates most diverse."])
116
117 #g,logging Path of the logging file
118 c_strLoggingFileArgument = "--logfile"
119 c_strLoggingFileHelp = "File path to save the logging file."
120
121 #h help
122
123 #i,tree
124 c_strCustomEnvironmentFileHelp = "File describing the smaple environments; for use with Unifrac distance metrics."
125
126 #j,delim File delimiter
127 c_strFileDelimiterArgument = "--delim"
128 c_strFileDelimiterHelp = "The delimiter for the abundance table (default = TAB)"
129
130 #k,featdelim Feature delimiter
131 c_strFeatureNameDelimiterArgument = "--featdelim"
132 c_strFeatureNameDelimiterHelp = "The delimiter for a feature name if it contains a consensus sequence."
133
134 #l,lastmeta The name of the last metadata
135 c_strLastMetadataNameArgument = "--lastmeta"
136 c_strLastMetadataNameHelp = "The row in the abundance file that is the sample name/id row. Should be the metadata name/Id in first column of the metadta row."
137
138 #m,method
139 c_strSelectionTechniquesHelp = "Select techniques listed one after another."
140
141 #n,num The Number of unsupervised sample selection
142 c_strCountArgument = "-n"
143 c_strCountHelp = "The number of samples to select with unsupervised methodology. (An integer greater than 0.)."
144
145 #o,tree
146 c_strCustomPhylogeneticTreeHelp = "Tree for phylogenetic when selecting custom beta-diversities in the representative sampling criteria."
147
148 #p,suppredfile File path fo the predict file for the supervised methods
149 c_strSupervisedPredictedFile = "--suppredfile"
150 c_strSupervisedPredictedFileHelp = "The file path for the predict file."
151
152 #q,alphameta
153 c_strCustomAlphaDiversityMetadataHelp = "Metric in the pcl file which has custom alpha diversity measurements to use with the highest diversity sampling criteria. Should be a number between 0.0 and 1.0 with 1.0 meaning most diverse."
154
155 #r,targetmethod Taxa selection method
156 c_strTargetedFeatureMethodArgument = "--feature_method"
157 c_strTargetedFeatureMethodHelp = "The ranking method used to select targeted features."
158
159 #s,stratify Unsupervised stratify metadata
160 c_strUnsupervisedStratifyMetadataArgument = "--stratify"
161 c_strUnsupervisedStratifyMetadataHelp = "The metatdata to stratify unsupervised analysis."
162
163 #t,target Targeted feature file
164 c_strTargetedSelectionFileArgument = "--targets"
165 c_strTargetedSelectionFileHelp = "A file containing taxa/OTUs/clades to be used in targeted feature sampling criteria."
166
167 #u,supinputfile File path for the input file for the supervised methods
168 c_strSupervisedInputFile = "--supinputfile"
169 c_strSupervisedInputFileHelp = "The file path for the input file for supervised methods."
170
171 #v,logging String for logging level
172 c_strLoggingArgument = "--logging"
173 c_strLoggingHelp = "".join(["Logging level which will be logged to a .log file with the",
174 " same name as the strOutFile (but with a .log extension). Valid values are DEBUG, INFO, WARNING, ERROR, or CRITICAL."])
175 c_lsLoggingChoices = ["DEBUG","INFO","WARNING","ERROR","CRITICAL"]
176
177 #w, Last Feature Metadata indicator
178 c_strFeatureMetadataArgument = "--lastFeatureMetadata"
179 c_strFeatureMetadataHelp = "The last metadata describing a (bug) feature (not sample). Not all studies have feature metadata, if so this can be ignored and not used. See doc for PCL-Description.txt"
180
181 #x,betamatrix
182 c_strCustomBetaDiversityMatrixHelp = "Precalculated beta-diversity matrix to be used in the representative sampling criteria. Should be a number between 0.0 and 1.0 with 1.0 meaning most dissimilar."
183
184 #Order is important, the first is the default
185 c_strTargetedRanked = "rank"
186 c_strTargetedAbundance = "abundance"
187 lsTargetedFeatureMethodValues = [c_strTargetedRanked, c_strTargetedAbundance]
188
189 #Selection methods
190 c_strDiversity = "diverse"
191 c_strExtreme = "extreme"
192 c_strDiscriminant = "discriminant"
193 c_strDistinct = "distinct"
194 c_strRandom = "random"
195 c_strRepresentative = "representative"
196 c_strFeature = "features"
197 c_custom = "custom"
198 c_lsAllUnsupervisedMethods = [c_strRepresentative,c_strDiversity,c_strExtreme,c_strFeature,c_strRandom]
199 c_lsAllSupervisedMethods = [c_strDiscriminant,c_strDistinct]
200 c_lsAllMethods = c_lsAllUnsupervisedMethods + c_lsAllSupervisedMethods
201
202 #Technique Names
203 c_strDiversity2 = c_strDiversity+"_C"
204
205 ####################################
206 #Arguments without commandline flags
207 c_strAbundanceFileHelp = "Input file as either a PCL or Biome file."
208 c_strGenericOutputDataFileHelp = "The generated output data file."