Mercurial > repos > george-weingart > micropita
comparison src/ConstantsMicropita.py @ 0:2f4f6f08c8c4 draft
Uploaded
author | george-weingart |
---|---|
date | Tue, 13 May 2014 21:58:57 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:2f4f6f08c8c4 |
---|---|
1 """ | |
2 Author: Timothy Tickle | |
3 Description: Constants. | |
4 """ | |
5 | |
6 ##################################################################################### | |
7 #Copyright (C) <2012> | |
8 # | |
9 #Permission is hereby granted, free of charge, to any person obtaining a copy of | |
10 #this software and associated documentation files (the "Software"), to deal in the | |
11 #Software without restriction, including without limitation the rights to use, copy, | |
12 #modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, | |
13 #and to permit persons to whom the Software is furnished to do so, subject to | |
14 #the following conditions: | |
15 # | |
16 #The above copyright notice and this permission notice shall be included in all copies | |
17 #or substantial portions of the Software. | |
18 # | |
19 #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, | |
20 #INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A | |
21 #PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
22 #HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION | |
23 #OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |
24 #SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |
25 ##################################################################################### | |
26 | |
27 __author__ = "Timothy Tickle" | |
28 __copyright__ = "Copyright 2012" | |
29 __credits__ = ["Timothy Tickle"] | |
30 __license__ = "MIT" | |
31 __maintainer__ = "Timothy Tickle" | |
32 __email__ = "ttickle@sph.harvard.edu" | |
33 __status__ = "Development" | |
34 | |
35 from breadcrumbs.src.Metric import Metric | |
36 | |
37 class ConstantsMicropita(): | |
38 """ | |
39 Class to hold project constants. | |
40 """ | |
41 | |
42 #Character Constants | |
43 COLON = ":" | |
44 COMMA = "," | |
45 FASTA_ID_LINE_START = ">" | |
46 QUOTE = "\"" | |
47 TAB = '\t' | |
48 WHITE_SPACE = " " | |
49 PIPE = "|" | |
50 c_outputFileDelim = '\t' | |
51 | |
52 c_sEmptyPredictFileValue = 'NA' | |
53 | |
54 #Used to stop divide by zero errors | |
55 c_smallNumber = 0.00000000001 | |
56 | |
57 #SVM related | |
58 c_COST_RANGE_KEY = "range" | |
59 c_lCostRange = [-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7,8,9,10] | |
60 c_SCALED_FILE_EXT = ".scaled" | |
61 c_intScaleLowerBound = 0 | |
62 #LIBSVM file extensions | |
63 c_SCALING_PARAMETERS = ".range" | |
64 c_CV_FILE_EXT = ".cv.out" | |
65 c_CV_IMAGE_EXT = ".cv.png" | |
66 c_MODEL_FILE_EXT = ".model" | |
67 c_PREDICT_FILE_EXT = ".predict" | |
68 c_fProbabilitistic = True | |
69 c_SCALED_FOR_PREDICTION_FILE_EXT = ".scaledForpredict" | |
70 | |
71 #SVM output Dictionary keywords for files | |
72 c_strKeywordInputFile = "INPUT" | |
73 c_strKeywordScaledFile = "SCALED" | |
74 c_strKeywordRangeFile = "RANGE" | |
75 c_strKeywordCVOutFile = "CV_OUT" | |
76 c_strKeywordModelFile = "MODEL" | |
77 c_strKeywordScaledPredFile = "SCALED_FOR_PREDICTION" | |
78 c_strKeywordPredFile = "PREDICTION" | |
79 c_strKeywordCostValue = "C" | |
80 c_strKeywordAccuracy = "ACCURACY" | |
81 | |
82 #Default values for missing data in the Abundance Table | |
83 c_strEmptyAbundanceData = "0" | |
84 c_strEmptyDataMetadata = "NA" | |
85 lNAs = list(set(["NA","na","Na","nA",c_strEmptyDataMetadata])) | |
86 | |
87 #Occurence filter [min abundance, min samples occuring in] | |
88 #To turn off make == [0,0] | |
89 c_liOccurenceFilter = [0,0] | |
90 | |
91 #Break ties in targeted feature with diversity | |
92 c_fBreakRankTiesByDiversity = False | |
93 | |
94 ####Commandline arguments | |
95 #a Custom diversity metrics found in cogent | |
96 c_strCustomAlphaDiversityHelp = "A key word for any PyCogent supplied alpha diveristy metric (Richness, evenness, or diversity). Please supply an unnormalized (counts) abundance table for these metrics. Metrics include "+" ".join(Metric.setAlphaDiversities)+"." | |
97 | |
98 #b Custom diversity metrics found in cogent | |
99 c_strCustomBetaDiversityHelp = "A key word for any PyCogent supplied beta diversity metric. Metrics include "+" ".join(list(Metric.setBetaDiversities)+[Metric.c_strUnifracUnweighted,Metric.c_strUnifracWeighted])+"." | |
100 | |
101 #c,checked Checked abundance file | |
102 c_strCheckedAbundanceFileArgument = "--checked" | |
103 c_strCheckedAbundanceFileHelp = "Before analysis abundance files are checked and a new file results which analysis is perfromed on. The name of the checked file can be specified of the default will will be used (appending a -Checked to the end of the file name)." | |
104 | |
105 #d,id Name of the sample id row | |
106 c_strIDNameArgument = "--id" | |
107 c_strIDNameHelp = "The row in the abundance file that is the sample name/id row. Should be the sample name/Id in first column of the row." | |
108 | |
109 #e,label Supervised Label | |
110 c_strSupervisedLabelArgument = "--label" | |
111 c_strSupervisedLabelHelp = "The name of the metadata on which to perform supervised methods" | |
112 | |
113 #f, invertDiversity | |
114 c_strInvertDiversityHelp = "".join(["When using this flag, the diversity will be inverted (multiplicative inverse) before ranking in the highest diversity method. ", | |
115 "Recommended to use with dominance, menhinick, reciprocal_simpson, berger_parker_d, mcintosh_e, simpson_e, strong and any metric where 0 indicates most diverse."]) | |
116 | |
117 #g,logging Path of the logging file | |
118 c_strLoggingFileArgument = "--logfile" | |
119 c_strLoggingFileHelp = "File path to save the logging file." | |
120 | |
121 #h help | |
122 | |
123 #i,tree | |
124 c_strCustomEnvironmentFileHelp = "File describing the smaple environments; for use with Unifrac distance metrics." | |
125 | |
126 #j,delim File delimiter | |
127 c_strFileDelimiterArgument = "--delim" | |
128 c_strFileDelimiterHelp = "The delimiter for the abundance table (default = TAB)" | |
129 | |
130 #k,featdelim Feature delimiter | |
131 c_strFeatureNameDelimiterArgument = "--featdelim" | |
132 c_strFeatureNameDelimiterHelp = "The delimiter for a feature name if it contains a consensus sequence." | |
133 | |
134 #l,lastmeta The name of the last metadata | |
135 c_strLastMetadataNameArgument = "--lastmeta" | |
136 c_strLastMetadataNameHelp = "The row in the abundance file that is the sample name/id row. Should be the metadata name/Id in first column of the metadta row." | |
137 | |
138 #m,method | |
139 c_strSelectionTechniquesHelp = "Select techniques listed one after another." | |
140 | |
141 #n,num The Number of unsupervised sample selection | |
142 c_strCountArgument = "-n" | |
143 c_strCountHelp = "The number of samples to select with unsupervised methodology. (An integer greater than 0.)." | |
144 | |
145 #o,tree | |
146 c_strCustomPhylogeneticTreeHelp = "Tree for phylogenetic when selecting custom beta-diversities in the representative sampling criteria." | |
147 | |
148 #p,suppredfile File path fo the predict file for the supervised methods | |
149 c_strSupervisedPredictedFile = "--suppredfile" | |
150 c_strSupervisedPredictedFileHelp = "The file path for the predict file." | |
151 | |
152 #q,alphameta | |
153 c_strCustomAlphaDiversityMetadataHelp = "Metric in the pcl file which has custom alpha diversity measurements to use with the highest diversity sampling criteria. Should be a number between 0.0 and 1.0 with 1.0 meaning most diverse." | |
154 | |
155 #r,targetmethod Taxa selection method | |
156 c_strTargetedFeatureMethodArgument = "--feature_method" | |
157 c_strTargetedFeatureMethodHelp = "The ranking method used to select targeted features." | |
158 | |
159 #s,stratify Unsupervised stratify metadata | |
160 c_strUnsupervisedStratifyMetadataArgument = "--stratify" | |
161 c_strUnsupervisedStratifyMetadataHelp = "The metatdata to stratify unsupervised analysis." | |
162 | |
163 #t,target Targeted feature file | |
164 c_strTargetedSelectionFileArgument = "--targets" | |
165 c_strTargetedSelectionFileHelp = "A file containing taxa/OTUs/clades to be used in targeted feature sampling criteria." | |
166 | |
167 #u,supinputfile File path for the input file for the supervised methods | |
168 c_strSupervisedInputFile = "--supinputfile" | |
169 c_strSupervisedInputFileHelp = "The file path for the input file for supervised methods." | |
170 | |
171 #v,logging String for logging level | |
172 c_strLoggingArgument = "--logging" | |
173 c_strLoggingHelp = "".join(["Logging level which will be logged to a .log file with the", | |
174 " same name as the strOutFile (but with a .log extension). Valid values are DEBUG, INFO, WARNING, ERROR, or CRITICAL."]) | |
175 c_lsLoggingChoices = ["DEBUG","INFO","WARNING","ERROR","CRITICAL"] | |
176 | |
177 #w, Last Feature Metadata indicator | |
178 c_strFeatureMetadataArgument = "--lastFeatureMetadata" | |
179 c_strFeatureMetadataHelp = "The last metadata describing a (bug) feature (not sample). Not all studies have feature metadata, if so this can be ignored and not used. See doc for PCL-Description.txt" | |
180 | |
181 #x,betamatrix | |
182 c_strCustomBetaDiversityMatrixHelp = "Precalculated beta-diversity matrix to be used in the representative sampling criteria. Should be a number between 0.0 and 1.0 with 1.0 meaning most dissimilar." | |
183 | |
184 #Order is important, the first is the default | |
185 c_strTargetedRanked = "rank" | |
186 c_strTargetedAbundance = "abundance" | |
187 lsTargetedFeatureMethodValues = [c_strTargetedRanked, c_strTargetedAbundance] | |
188 | |
189 #Selection methods | |
190 c_strDiversity = "diverse" | |
191 c_strExtreme = "extreme" | |
192 c_strDiscriminant = "discriminant" | |
193 c_strDistinct = "distinct" | |
194 c_strRandom = "random" | |
195 c_strRepresentative = "representative" | |
196 c_strFeature = "features" | |
197 c_custom = "custom" | |
198 c_lsAllUnsupervisedMethods = [c_strRepresentative,c_strDiversity,c_strExtreme,c_strFeature,c_strRandom] | |
199 c_lsAllSupervisedMethods = [c_strDiscriminant,c_strDistinct] | |
200 c_lsAllMethods = c_lsAllUnsupervisedMethods + c_lsAllSupervisedMethods | |
201 | |
202 #Technique Names | |
203 c_strDiversity2 = c_strDiversity+"_C" | |
204 | |
205 #################################### | |
206 #Arguments without commandline flags | |
207 c_strAbundanceFileHelp = "Input file as either a PCL or Biome file." | |
208 c_strGenericOutputDataFileHelp = "The generated output data file." |