annotate src/breadcrumbs/scripts/scriptManipulateTable.py @ 3:b4cf8c75305b draft default tip

Pointing to the right root directory
author george-weingart
date Tue, 30 Aug 2016 13:10:34 -0400
parents 2f4f6f08c8c4
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
1 #!/usr/bin/env python
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
2 """
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
3 Author: Timothy Tickle
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
4 Description: Performs common manipulations on tables
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
5 """
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
6
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
7 __author__ = "Timothy Tickle"
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
8 __copyright__ = "Copyright 2012"
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
9 __credits__ = ["Timothy Tickle"]
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
10 __license__ = ""
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
11 __version__ = ""
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
12 __maintainer__ = "Timothy Tickle"
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
13 __email__ = "ttickle@sph.harvard.edu"
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
14 __status__ = "Development"
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
15
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
16 import argparse
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
17 import csv
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
18 import sys
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
19 import re
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
20 import os
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
21 import numpy as np
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
22 from src.AbundanceTable import AbundanceTable
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
23 #from src.PCA import PCA
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
24 from src.ValidateData import ValidateData
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
25
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
26 #Set up arguments reader
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
27 argp = argparse.ArgumentParser( prog = "scriptManipulateTable.py",
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
28 description = """Performs common manipulations on tables.\nExample: python scriptManipulateTable.py -i TID -l STSite Test.pcl""" )
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
29
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
30 #Arguments
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
31 #Describe table
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
32 argp.add_argument("-i","--id", dest="sIDName", default="ID", help="Abundance Table ID")
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
33 argp.add_argument("-l","--meta", dest="sLastMetadataName", help="Last metadata name")
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
34 argp.add_argument("-d","--fileDelim", dest= "cFileDelimiter", action= "store", default="\t", help="File delimiter, default tab")
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
35 argp.add_argument("-f","--featureDelim", dest= "cFeatureDelimiter", action= "store", default="|", help="Feature (eg. bug or function) delimiter, default '|'")
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
36
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
37 #Checked x 2
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
38 argp.add_argument("-n","--doNorm", dest="fNormalize", action="store_true", default=False, help="Flag to turn on normalization")
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
39 argp.add_argument("-s","--doSum", dest="fSum", action="store_true", default=False, help="Flag to turn on summation")
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
40
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
41 #Unsupervised filtering
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
42 argp.add_argument("-A","--doFilterAbundance", dest="strFilterAbundance", action="store", default=None, help="Turns on filtering by abundance (remove features that do not have a minimum abundance in a minimum number of samples); Should be a real number and an integer in the form 'minAbundance,minSamples', (should be performed on a normalized file).")
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
43 argp.add_argument("-P","--doFilterPercentile", dest="strFilterPercentile", action="store", default=None, help="Turns on filtering by percentile Should be two numbers between 0 and 1 in the form 'percentile,percentage'. (should be performed on a normalized file).")
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
44 argp.add_argument("-O","--doFilterOccurrence", dest="strFilterOccurence", action="store", default=None, help="Turns on filtering by occurrence. Should be two integers in the form 'minSequence,minSample' (should NOT be performed on a normalized file).")
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
45 #argp.add_argument("-D","--doFilterDeviation", dest="dCuttOff", action="store", type=float, default=None, help="Flag to turn on filtering by standard deviation (should NOT be performed on a normalized file).")
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
46
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
47 #Change bug membership
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
48 argp.add_argument("-t","--makeTerminal", dest="fMakeTerminal", action="store_true", default=False, help="Works reduces the file to teminal features in the original file.")
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
49 argp.add_argument("-u","--reduceOTUs", dest="fRemoveOTUs", action="store_true", default=False, help="Remove otu entries from file.")
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
50 argp.add_argument("-c","--reduceToClade", dest="iClade", action="store", type=int, default=None, help="Specify a level of clade to reduce to [].")
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
51 argp.add_argument("-b","--reduceToFeatures", dest="strFeatures", action="store", default=None, help="Reduce measurements to certain features (bugs or functions). This can be a comma delimited string (of atleast 2 bugs) or a file.")
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
52
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
53 #Manipulate based on metadata
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
54 #Checked
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
55 argp.add_argument("-y","--stratifyBy", dest="strStratifyBy", action="store", default=None, help="Metadata to stratify tables by.")
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
56 argp.add_argument("-r","--removeMetadata", dest="strRemoveMetadata", action="store", default=None, help="Remove samples of this metadata and value (format comma delimited string with metadata id first and the values to remove after 'id,lvalue1,value2').")
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
57
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
58 #Manipulate lineage
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
59 #Checked
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
60 argp.add_argument("-x","--doPrefixClades", dest="fPrefixClades", action="store_true", default=False, help="Flag to turn on adding prefixes to clades to better identify them, for example s__ will be placed infront of each species.")
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
61
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
62 #Combine tables
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
63 #argp.add_argument("-m","--combineIntersect", dest="fCombineIntersect", action="store_true", default=False, help="Combine two tables including only common features/metadata (intersection).")
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
64 #argp.add_argument("-e","--combineUnion", dest="fCombineUnion", action="store_true", default=False, help="Combine two tables (union).")
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
65
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
66 #Dimensionality Reduction
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
67 #argp.add_argument("-p","--doPCA", dest="fDoPCA",action="store_true", default=False, help="Flag to turn on adding metabugs and metametadata by performing PCA on each of bug relative abundance and continuous metadata and add the resulting components")
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
68
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
69 #Checked
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
70 argp.add_argument("-o","--output", dest="strOutFile", action="store", default=None, help="Indicate output pcl file.")
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
71 argp.add_argument("strFileAbund", help ="Input data file")
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
72
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
73 args = argp.parse_args( )
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
74
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
75 # Creat output file if needed.
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
76 if not args.strOutFile:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
77 args.strOutFile = os.path.splitext(args.strFileAbund)[0]+"-mod.pcl"
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
78 lsPieces = os.path.splitext(args.strOutFile)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
79
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
80 #List of abundance tables
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
81 lsTables = []
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
82
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
83 #Read in abundance table
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
84 abndTable = AbundanceTable.funcMakeFromFile(xInputFile=args.strFileAbund,
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
85 cDelimiter = args.cFileDelimiter,
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
86 sMetadataID = args.sIDName,
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
87 sLastMetadata = args.sLastMetadataName,
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
88 lOccurenceFilter = None,
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
89 cFeatureNameDelimiter=args.cFeatureDelimiter,
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
90 xOutputFile = args.strOutFile)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
91
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
92 #TODO Check filtering, can not have some filtering together
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
93
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
94 # Make feature list
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
95 lsFeatures = []
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
96 if args.strFeatures:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
97 print "Get features not completed"
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
98 # if "," in args.strFeatures:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
99 # lsFeatures = args.strFeatures.split(",")
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
100 # print "ManipulateTable::Reading in feature list "+str(len(lsFeatures))+"."
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
101 # else:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
102 # csvr = csv.reader(open(args.strFeatures, "rU"))
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
103 # print "ManipulateTable::Reading in feature file "+args.strFeatures+"."
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
104 # for lsLine in csvr:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
105 # lsFeatures.extend(lsLine)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
106
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
107 lsTables.append(abndTable)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
108
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
109 # Do summing
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
110 #Sum if need
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
111 if args.fSum:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
112 for abndTable in lsTables:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
113 print "ManipulateTable::"+abndTable.funcGetName()+" had "+str(len(abndTable.funcGetFeatureNames()))+" features before summing."
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
114 fResult = abndTable.funcSumClades()
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
115 if fResult:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
116 print "ManipulateTable::"+abndTable.funcGetName()+" was summed."
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
117 print "ManipulateTable::"+abndTable.funcGetName()+" has "+str(len(abndTable.funcGetFeatureNames()))+" features after summing."
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
118 else:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
119 print "ManipulateTable::ERROR. "+abndTable.funcGetName()+" was NOT summed."
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
120
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
121 # Filter on counts
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
122 if args.strFilterOccurence:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
123 iMinimumSequence,iMinimumSample = args.strFilterOccurence.split(",")
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
124 for abndTable in lsTables:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
125 if abndTable.funcIsNormalized():
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
126 print "ManipulateTable::ERROR. "+abndTable.funcGetName()+" is normalized and can not be filtered by occurence. This filter needs counts."
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
127 else:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
128 fResult = abndTable.funcFilterAbundanceBySequenceOccurence(iMinSequence = int(iMinimumSequence), iMinSamples = int(iMinimumSample))
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
129 if fResult:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
130 print "ManipulateTable::"+abndTable.funcGetName()+" was filtered by occurence and now has "+str(len(abndTable.funcGetFeatureNames()))+" features."
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
131 else:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
132 print "ManipulateTable::ERROR. "+abndTable.funcGetName()+" was NOT filtered by occurence."
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
133
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
134 # Change bug membership
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
135 if args.fMakeTerminal:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
136 lsTerminalTables = []
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
137 for abndTable in lsTables:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
138 print "ManipulateTable::"+abndTable.funcGetName()+" had "+str(len(abndTable.funcGetFeatureNames()))+" features before making terminal."
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
139 abndTable = abndTable.funcGetFeatureAbundanceTable(abndTable.funcGetTerminalNodes())
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
140 if abndTable:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
141 print "ManipulateTable::"+abndTable.funcGetName()+" has "+str(len(abndTable.funcGetFeatureNames()))+" terminal features."
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
142 lsTerminalTables.append(abndTable)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
143 else:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
144 print "ManipulateTable::ERROR. "+abndTable.funcGetName()+" was not made terminal."
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
145 lsTables = lsTerminalTables
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
146
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
147 if args.fRemoveOTUs:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
148 lsNotOTUs = []
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
149 for abndTable in lsTables:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
150 print "ManipulateTable::"+abndTable.funcGetName()+" had "+str(len(abndTable.funcGetFeatureNames()))+" features before removing OTUs."
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
151 abndTable = abndTable.funcGetWithoutOTUs()
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
152 if abndTable:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
153 print "ManipulateTable::"+abndTable.funcGetName()+" had OTUs removed and now has "+str(len(abndTable.funcGetFeatureNames()))+" features."
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
154 lsNotOTUs.append(abndTable)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
155 else:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
156 print "ManipulateTable::ERROR. "+abndTable.funcGetName()+" OTUs were not removed."
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
157 lsTables = lsNotOTUs
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
158
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
159 if args.iClade:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
160 for abndTable in lsTables:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
161 fResult = abndTable.funcReduceFeaturesToCladeLevel(args.iClade)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
162 if fResult:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
163 print "ManipulateTable::"+abndTable.funcGetName()+" was reduced to clade level "+str(args.iClade)+"."
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
164 else:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
165 print "ManipulateTable::ERROR. "+abndTable.funcGetName()+" was NOT reduced in clade levels."
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
166
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
167 if args.strFeatures:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
168 for abndTable in lsTables:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
169 fResult = abndTable.funcGetFeatureAbundanceTable(lsFeatures)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
170 if fResult:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
171 print "ManipulateTable::"+abndTable.funcGetName()+" has been reduced to given features and now has "+str(len(abndTable.funcGetFeatureNames()))+" features."
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
172 else:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
173 print "ManipulateTable::ERROR. "+abndTable.funcGetName()+" could not be reduced to the given list."
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
174
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
175 if args.strRemoveMetadata:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
176 lsMetadata = args.strRemoveMetadata.split(",")
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
177 for abndTable in lsTables:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
178 fResult = abndTable.funcRemoveSamplesByMetadata(sMetadata=lsMetadata[0], lValuesToRemove=lsMetadata[1:])
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
179 if fResult:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
180 print "ManipulateTable::"+abndTable.funcGetName()+" has had samples removed and now has "+str(len(abndTable.funcGetSampleNames()))+" samples."
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
181 else:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
182 print "ManipulateTable::ERROR. Could not remove samples from "+abndTable.funcGetName()+"."
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
183
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
184 # Normalize if needed
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
185 if args.fNormalize:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
186 for abndTable in lsTables:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
187 fResult = abndTable.funcNormalize()
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
188 if fResult:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
189 print "ManipulateTable::"+abndTable.funcGetName()+" was normalized."
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
190 else:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
191 print "ManipulateTable::"+abndTable.funcGetName()+" was NOT normalized."
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
192
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
193 # Filter on percentile
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
194 if args.strFilterPercentile:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
195 dPercentile,dPercentage = args.strFilterPercentile.split(",")
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
196 for abndTable in lsTables:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
197 if abndTable.funcIsNormalized():
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
198 fResult = abndTable.funcFilterAbundanceByPercentile(dPercentileCutOff = float(dPercentile), dPercentageAbovePercentile = float(dPercentage))
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
199 if fResult:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
200 print "ManipulateTable::"+abndTable.funcGetName()+" has been reduced by percentile and now has "+str(len(abndTable.funcGetFeatureNames()))+" features."
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
201 else:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
202 print "ManipulateTable::ERROR. "+abndTable.funcGetName()+" could not be reduced by percentile."
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
203 else:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
204 print "ManipulateTable::"+abndTable.funcGetName()+" was NOT normalized and so the percentile filter is invalid, please indicate to normalize the table."
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
205
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
206 # Filter on abundance (should go after filter on percentile because the filter on percentile
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
207 # needs the full distribution of features in a sample
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
208 if args.strFilterAbundance:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
209 dAbundance,iMinSamples = args.strFilterAbundance.split(",")
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
210 dAbundance = float(dAbundance)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
211 iMinSamples = int(iMinSamples)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
212 for abndTable in lsTables:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
213 if abndTable.funcIsNormalized():
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
214 fResult = abndTable.funcFilterAbundanceByMinValue(dMinAbundance=dAbundance,iMinSamples=iMinSamples)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
215 if fResult:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
216 print "ManipulateTable::"+abndTable.funcGetName()+" has been reduced by minimum relative abundance value and now has "+str(len(abndTable.funcGetFeatureNames()))+" features."
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
217 else:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
218 print "ManipulateTable::ERROR. "+abndTable.funcGetName()+" could not be reduced by percentile."
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
219 else:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
220 print "ManipulateTable::"+abndTable.funcGetName()+" was NOT normalized and so the abundance filter is invalid, please indicate to normalize the table."
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
221
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
222 #if args.dCuttOff:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
223 # print "Standard deviation filtering not completed"
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
224 # for abndTable in lsTables:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
225 # abndTable.funcFilterFeatureBySD(dMinSDCuttOff=args.dCuttOff)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
226 # if fResult:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
227 # print "ManipulateTable::"+abndTable.funcGetName()+" has been reduced by standard deviation and now has "+str(len(abndTable.funcGetFeatureNames()))+" features."
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
228 # else:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
229 # print "ManipulateTable::ERROR. "+abndTable.funcGetName()+" could not be reduced by standard devation."
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
230
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
231 # Need to normalize again after abundance data filtering given removing features breaks the normalization
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
232 # This happends twice because normalization is required to make the abundance data to filter on ;-)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
233 # Normalize if needed
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
234 if args.fNormalize:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
235 for abndTable in lsTables:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
236 fResult = abndTable.funcNormalize()
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
237 if fResult:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
238 print "ManipulateTable::"+abndTable.funcGetName()+" was normalized after filtering on abundance data."
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
239
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
240 #Manipulate lineage
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
241 if args.fPrefixClades:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
242 for abndTable in lsTables:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
243 fResult = abndTable.funcAddCladePrefixToFeatures()
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
244 if fResult:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
245 print "ManipulateTable::Clade Prefix was added to "+abndTable.funcGetName()
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
246 else:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
247 print "ManipulateTable::ERROR. Clade Prefix was NOT added to "+abndTable.funcGetName()
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
248
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
249 # Under development
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
250 # Reduce dimensionality
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
251 #if args.fDoPCA:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
252 # pcaCur = PCA()
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
253 # for abndTable in lsTables:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
254 #
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
255 # # Add data features
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
256 # # Make data components and add to abundance table
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
257 # pcaCur.loadData(abndTable,True)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
258 # pcaCur.run(fASTransform=True)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
259 # ldVariance = pcaCur.getVariance()
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
260 # lldComponents = pcaCur.getComponents()
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
261 # # Make Names
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
262 # lsNamesData = ["Data_PC"+str((tpleVariance[0]+1))+"_"+re.sub("[\.|-]","_",str(tpleVariance[1])) for tpleVariance in enumerate(ldVariance)]
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
263 # abndTable.funcAddDataFeature(lsNamesData,lldComponents)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
264 #
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
265 # # Add metadata features
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
266 # # Convert metadata to an input for PCA
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
267 # pcaCur.loadData(pcaCur.convertMetadataForPCA(abndTable),False)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
268 # fSuccessful = pcaCur.run(fASTransform=False)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
269 # if(fSuccessful):
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
270 # ldVariance = pcaCur.getVariance()
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
271 # lldComponents = pcaCur.getComponents()
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
272 # # Make Names
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
273 # lsNamesMetadata = ["Metadata_PC"+str((tpleVariance[0]+1))+"_"+re.sub("[\.|-]","_",str(tpleVariance[1])) for tpleVariance in enumerate(ldVariance)]
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
274 # # Make metadata components and add to abundance
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
275 # llsMetadata = [list(npdRow) for npdRow in lldComponents]
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
276 # abndTable.funcAddMetadataFeature(lsNamesMetadata, llsMetadata)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
277 # else:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
278 # print "ManipulateTable::No metadata to PCA, no PCA components added to file based on metadata"
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
279
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
280 #Manipulate based on metadata
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
281 if args.strStratifyBy:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
282 labndStratifiedTables = []
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
283 for abndTable in lsTables:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
284 labndResult = abndTable.funcStratifyByMetadata(strMetadata=args.strStratifyBy)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
285 print "ManipulateTable::"+abndTable.funcGetName()+" was stratified by "+args.strStratifyBy+" in to "+str(len(labndResult))+" tables."
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
286 labndStratifiedTables.extend(labndResult)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
287 lsTables = labndStratifiedTables
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
288
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
289 if len(lsTables) == 1:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
290 lsTables[0].funcWriteToFile(args.strOutFile)
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
291 else:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
292 iIndex = 1
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
293 for abndManTable in lsTables:
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
294 abndManTable.funcWriteToFile(lsPieces[0]+str(iIndex)+lsPieces[1])
2f4f6f08c8c4 Uploaded
george-weingart
parents:
diff changeset
295 iIndex = iIndex + 1