micropita_v2: src/breadcrumbs/src/AbundanceTable.py comparison

comparison src/breadcrumbs/src/AbundanceTable.py @ 0:0de566f21448 draft default tip

author	sagun98
date	Thu, 03 Jun 2021 18:13:32 +0000
parents
children

comparison

equal deleted inserted replaced

--1:000000000000
+:0de566f21448
+"""
+Author: Timothy Tickle
+Description: Class to abstract an abundance table and methods to run on such a table.
+"""
+#####################################################################################
+#Copyright (C) <2012>
+#
+#Permission is hereby granted, free of charge, to any person obtaining a copy of
+#this software and associated documentation files (the "Software"), to deal in the
+#Software without restriction, including without limitation the rights to use, copy,
+#modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
+#and to permit persons to whom the Software is furnished to do so, subject to
+#the following conditions:
+#
+#The above copyright notice and this permission notice shall be included in all copies
+#or substantial portions of the Software.
+#
+#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
+#INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+#PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+#HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+#OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+#SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#####################################################################################
+__author__ = "Timothy Tickle"
+__copyright__ = "Copyright 2012"
+__credits__ = ["Timothy Tickle"]
+__license__ = "MIT"
+__maintainer__ = "Timothy Tickle"
+__email__ = "ttickle@sph.harvard.edu"
+__status__ = "Development"
+import csv
+import sys
+import blist
+from CClade import CClade
+from ConstantsBreadCrumbs import ConstantsBreadCrumbs
+import copy
+from datetime import date
+import numpy as np
+import os
+import re
+import scipy.stats
+import string
+from ValidateData import ValidateData
+from biom.parse import *
+from biom.table import *
+c_dTarget	= 1.0
+c_fRound	= False
+c_iSumAllCladeLevels = -1
+c_fOutputLeavesOnly = False
+import warnings
+warnings.simplefilter(action = "ignore", category = FutureWarning)
+class RowMetadata:
+	"""
+	Holds the row (feature) metadata and associated functions.
+	"""
+	def __init__(self, dictRowMetadata, iLongestMetadataEntry=None, lsRowMetadataIDs=None):
+		""" Constructor requires a dictionary or row metadata.
+		:param dictRowMetadata:	The row metadata values with the ids as the keys, must be stable (keep order)
+		:type:			{string feature id: {'metadata': {'taxonomy': [list of metadata values]}}}
+		"""
+		self.dictRowMetadata = dictRowMetadata
+		self.iLongestMetadataEntry = iLongestMetadataEntry
+		self.lsRowMetadataIDs = lsRowMetadataIDs
+		self.dictMetadataIDs = {}
+		# Get the ids for the metadata
+		if self.dictRowMetadata:
+			for dictMetadata in self.dictRowMetadata.values():
+				dictMetadata = dictMetadata.get(ConstantsBreadCrumbs.c_metadata_lowercase, None)
+				if dictMetadata:
+					for key,value in dictMetadata.items():
+						if self.dictMetadataIDs.get(key, None):
+							self.dictMetadataIDs[key] = max(self.dictMetadataIDs[key],len(dictMetadata[key]))
+						else:
+							self.dictMetadataIDs[key] = len(dictMetadata[key])
+	def funcMakeIDs(self):
+		""" There should be a one to one mapping of row metadata ids and the values associated here with a feature ID.
+		    If not make ids from the key by appending numbers.
+		"""
+		# If there exists a ids list already return (this allows ID order to be given and preserved)
+		# Else make a list of IDs
+		if self.lsRowMetadataIDs:
+			return self.lsRowMetadataIDs
+		lsIDs = []
+		lsKeys = []
+		for key, value in self.dictMetadataIDs.items():
+			lsKeys.append( key )
+			if value > 1:
+				lsIDs.extend( [ "_".join( [ key, str( iIndex ) ] ) for iIndex in xrange( value ) ] )
+			else:
+				lsIDs.append( key )
+		return [ lsIDs, lsKeys ]
+	def funGetFeatureMetadata(self, sFeature, sMetadata):
+		"""
+		Returns a list of values in the order of row metadta ids for a microbial feature given an id.
+		:param sFeature:	Feature id to get metadata
+		:type:			string
+		:param sMetadata:	Metadata to get
+		:type:			string
+		:return:		list of metadata associated with the metadata
+		"""
+		lsMetadata = []
+		if self.dictRowMetadata:
+			dictFeature = self.dictRowMetadata.get( sFeature, None )
+			if dictFeature:
+				dictFeatureMetadata = dictFeature.get(ConstantsBreadCrumbs.c_metadata_lowercase, None)
+				if dictFeatureMetadata:
+					lsMetadata = dictFeatureMetadata.get(sMetadata, None)
+		return lsMetadata
+class AbundanceTable:
+	"""
+	Represents an abundance table and contains common function to perform on the object.
+	This class is made from an abundance data file. What is expected is a text file delimited by
+	a character (which is given to the object). The first column is expected to be the id column
+	for each of the rows. Metadata is expected before measurement data. Columns are samples and
+	rows are features (bugs).
+	This object is currently not hashable.
+	"""
+	def __init__(self, npaAbundance, dictMetadata, strName, strLastMetadata, rwmtRowMetadata = None, dictFileMetadata = None, lOccurenceFilter = None, cFileDelimiter = ConstantsBreadCrumbs.c_cTab, cFeatureNameDelimiter = "|"):
+		"""
+		Constructor for an abundance table.
+		:param	npaAbundance:	Structured Array of abundance data (Row=Features, Columns=Samples)
+		:type:	Numpy Structured Array abundance data (Row=Features, Columns=Samples)
+		:param	dictMetadata:	Dictionary of metadata {"String ID":["strValue","strValue","strValue","strValue","strValue"]}
+		:type:	Dictionary	Dictionary
+		:param	npaRowMetdata	Structured Array of row (feature) metadata (optional)
+		:type:	Numpy Structured Array abundance data (Row=Features, Columns=Feature metadata)
+	 	:param	strName:	The name of the metadata that serves as the ID for the columns (For example a sample ID)
+		:type:	string
+		:param	strLastMetadata: The string last metadata name
+		:type:	string
+		:param	lOccurenceFilter: List of integers used in an occurence filter. [Min abundance, Min sample]
+		:type:	List of integers
+		:param	cFileDelimiter:	Character used as the delimiter of the file that is read in to create the abundance table.
+								Will also be used to write the abudance table file to a file to keep file consistency.
+		:type:	Character delimiter for reading the data in (default = TAB)
+		:param	cFeatureNameDelimiter:	Character used as the delimiter of the feature names (column 1). This is useful if the name are complex, for instance consensus lineages in metagenomics.
+		:type:	Character delimiter for feature names (default = |)
+		"""
+		### File Metadata
+		#Date
+		self.dateCreationDate = dictFileMetadata.get(ConstantsBreadCrumbs.c_strDateKey,None) if dictFileMetadata else None
+		#Indicates if the table has been filtered and how
+		self._strCurrentFilterState = ""
+		#The delimiter from the source file
+		self._cDelimiter = cFileDelimiter
+		#The feature name delimiter
+		self._cFeatureDelimiter = cFeatureNameDelimiter
+		#File type
+		self.strFileFormatType = dictFileMetadata.get(ConstantsBreadCrumbs.c_strFormatKey,None) if dictFileMetadata else None
+		#File generation source
+		self.strFileGenerationSource = dictFileMetadata.get(ConstantsBreadCrumbs.c_strSourceKey,None) if dictFileMetadata else None
+		#File type
+		self.strFileType = dictFileMetadata.get(ConstantsBreadCrumbs.c_strTypekey,None) if dictFileMetadata else None
+		#File url
+		self.strFileURL = dictFileMetadata.get(ConstantsBreadCrumbs.c_strURLKey,None) if dictFileMetadata else None
+		#The id of the file
+		self.strId = dictFileMetadata.get(ConstantsBreadCrumbs.c_strIDKey,None) if dictFileMetadata else None
+		#The lastmetadata name (which should be preserved when writing the file)
+		# Can be a None if biom file is read in.
+		self._strLastMetadataName = strLastMetadata
+		#The original number of features in the table
+		self._iOriginalFeatureCount = -1
+		#The name of the object relating to the file it was read from or would have been read from if it exists
+		#Keeps tract of changes to the file through the name
+		#Will be used to write out the object to a file as needed
+		self._strOriginalName = strName
+		#The original number of samples in the table
+		self._iOriginalSampleCount = -1
+		#Data sparsity type
+		self.fSparseMatrix = dictFileMetadata.get(ConstantsBreadCrumbs.c_strSparsityKey,False) if dictFileMetadata else False
+		### Data metadata
+		#The column (sample) metdata
+		self._dictTableMetadata = dictMetadata
+		#The row (feature) metadata (Row Metadata object)
+		self.rwmtRowMetadata = rwmtRowMetadata
+		### Data
+		#The abundance data
+		self._npaFeatureAbundance = npaAbundance
+		### Logistical
+		#Clade prefixes for biological samples
+		self._lsCladePrefixes = ["k__","p__","c__","o__","f__","g__","s__"]
+		#This is not a hashable object
+		self.__hash__ = None
+		### Prep the object
+		self._fIsNormalized = self._fIsSummed = None
+		#If contents is not a false then set contents to appropriate objects
+		# Checking to see if the data is normalized, summed and if we need to run a filter on it.
+		if ( self._npaFeatureAbundance != None ) and self._dictTableMetadata:
+			self._iOriginalFeatureCount = self._npaFeatureAbundance.shape[0]
+			self._iOriginalSampleCount = len(self.funcGetSampleNames())
+			self._fIsNormalized = ( max( [max( list(a)[1:] or [0] ) for a in self._npaFeatureAbundance] or [0] ) <= 1 )
+			lsLeaves = AbundanceTable.funcGetTerminalNodesFromList( [a[0] for a in self._npaFeatureAbundance], self._cFeatureDelimiter )
+			self._fIsSummed = ( len( lsLeaves ) != len( self._npaFeatureAbundance ) )
+			#Occurence filtering
+			#Removes features that do not have a given level iLowestAbundance in a given amount of samples iLowestSampleOccurence
+			if ( not self._fIsNormalized ) and lOccurenceFilter:
+				iLowestAbundance, iLowestSampleOccurrence = lOccurenceFilter
+				self.funcFilterAbundanceBySequenceOccurence( iLowestAbundance, iLowestSampleOccurrence )
+#	  else:
+#		sys.stderr.write( "Abundance or metadata was None, should be atleast an empty object\n" )
+	@staticmethod
+	def funcMakeFromFile(xInputFile, cDelimiter = ConstantsBreadCrumbs.c_cTab, sMetadataID = None, sLastMetadataRow = None, sLastMetadata = None,
+	   lOccurenceFilter = None, cFeatureNameDelimiter="|", xOutputFile = None):
+		"""
+		Creates an abundance table from a table file.
+		:param	xInputFile:	Path to input file.
+		:type:	String		String path.
+		:param	cDelimiter:	Delimiter for parsing the input file.
+		:type:	Character	Character
+		:param	sMetadataID:	String ID that is a metadata row ID (found on the first column) and used as an ID for samples
+		:type:	String		String ID
+		:param sLastRowMetadata: The id of the last (most right column) row metadata
+		:type: String	String ID
+		:param	sLastMetadata:	The ID of the metadata that is the last metadata before measurement or feature rows.
+		:type:	String		String ID
+		:param	lOccurenceFilter: List of integers used in an occurence filter. [Min abundance, Min sample]
+		:type:	List of integers
+		:param	cFeatureNameDelimiter:	Used to parse feature (bug) names if they are complex.
+						For example if they are consensus lineages and contain parent clade information.
+		:type:	Character	Delimiting letter
+		:param	xOutputFile:	File to output the abundance table which was read in.
+		:type:	FileStream or String file path
+		:return	AbundanceTable:	Will return an AbundanceTable object on no error. Returns False on error.
+		"""
+		#Get output file and remove if existing
+		outputFile = open( xOutputFile, "w" ) if isinstance(xOutputFile, str) else xOutputFile
+		#################################################################################
+		#    Check if file is a biom file - if so invoke the biom routine               #
+		#################################################################################
+		strFileName = xInputFile if isinstance(xInputFile, str) else xInputFile.name
+# Determine the file read function by file extension
+		if  strFileName.endswith(ConstantsBreadCrumbs.c_strBiomFile):
+			BiomCommonArea = AbundanceTable._funcBiomToStructuredArray(xInputFile)
+			if  BiomCommonArea:
+				lContents = [BiomCommonArea[ConstantsBreadCrumbs.c_BiomTaxData],
+					BiomCommonArea[ConstantsBreadCrumbs.c_Metadata],
+					BiomCommonArea[ ConstantsBreadCrumbs.c_dRowsMetadata],
+					BiomCommonArea[ConstantsBreadCrumbs.c_BiomFileInfo]
+					]
+				# Update last metadata and id if given
+				if not sLastMetadata:
+					strLastMetadata = BiomCommonArea[ConstantsBreadCrumbs.c_sLastMetadata]
+			else:
+				# return false on failure
+				lContents = False
+		else:
+			#Read in from text file to create the abundance and metadata structures
+			lContents = AbundanceTable._funcTextToStructuredArray(xInputFile=xInputFile, cDelimiter=cDelimiter,
+				sMetadataID = sMetadataID, sLastMetadataRow = sLastMetadataRow, sLastMetadata = sLastMetadata, ostmOutputFile = outputFile)
+		#If contents is not a false then set contents to appropriate objects
+		return AbundanceTable(npaAbundance=lContents[0], dictMetadata=lContents[1], strName=str(xInputFile), strLastMetadata=sLastMetadata, rwmtRowMetadata = lContents[2],
+		dictFileMetadata = lContents[3], lOccurenceFilter = lOccurenceFilter, cFileDelimiter=cDelimiter, cFeatureNameDelimiter=cFeatureNameDelimiter) if lContents else False
+	#Testing Status: Light happy path testing
+	@staticmethod
+	def funcCheckRawDataFile(strReadDataFileName, iFirstDataIndex = -1, sLastMetadataName = None, lOccurenceFilter = None, strOutputFileName = "", cDelimiter = ConstantsBreadCrumbs.c_cTab):
+		"""
+		Check the input abundance table.
+		Currently reduces the features that have no occurence.
+		Also inserts a NA for blank metadata and a 0 for blank abundance data.
+		Gives the option to filter features through an occurence filter (a feature must have a level of abundance in a minimal number of samples to be included).
+		Either iFirstDataIndex or sLastMetadataName must be given
+		:param	strReadDataFileName:	File path of file to read and check.
+		:type:	String	File path.
+		:param	iFirstDataIndex:	First (row) index of data not metadata in the abundance file.
+		:type:	Integer	Index starting at 0.
+		:param	sLastMetadataName:	The ID of the last metadata in the file. Rows of measurements should follow this metadata.
+		:type:	String
+		:param	lOccurenceFilter:	The lowest number of occurences in the lowest number of samples needed for a feature to be kept
+		:type:	List[2]	List length 2 [lowest abundance (not normalized), lowest number of samples to occur in] (eg. [2.0,2.0])
+		:param	strOutputFileName:	File path of out put file.
+		:type:	String	File path.
+		:param	cDelimiter:	Character delimiter for reading and writing files.
+		:type:	Character	Delimiter.
+		:return	Output Path:	Output path for written checked file.
+		"""
+		#Validate parameters
+		if (iFirstDataIndex == -1) and (sLastMetadataName == None):
+			print "AbundanceTable:checkRawDataFile::Error, either iFirstDataIndex or sLastMetadataNamemust be given."
+			return False
+		#Get output file and remove if existing
+		outputFile = strOutputFileName
+		if not strOutputFileName:
+			outputFile = os.path.splitext(strReadDataFileName)[0]+ConstantsBreadCrumbs.OUTPUT_SUFFIX
+		#Read input file lines
+		#Drop blank lines
+		readData = ""
+		with open(strReadDataFileName,'rU') as f:
+			readData = f.read()
+		readData = filter(None,readData.split(ConstantsBreadCrumbs.c_strEndline))
+		#Read the length of each line and make sure there is no jagged data
+		#Also hold row count for the metadata
+		iLongestLength = len(readData[0].split(cDelimiter))
+		iMetadataRow = -1
+		if not sLastMetadataName:
+			sLastMetadataName = "None"
+		for iIndex, strLine in enumerate(readData):
+			sLineElements = strLine.split(cDelimiter)
+		if sLineElements[0] == sLastMetadataName:
+			iMetadataRow = iIndex
+		iLongestLength = max(iLongestLength, len(sLineElements))
+		#If not already set, set iFirstDataIndex
+		if iFirstDataIndex < 0:
+			iFirstDataIndex = iMetadataRow + 1
+		#Used to substitute . to -
+		reSubPeriod = re.compile('\.')
+		#File writer
+		with open(outputFile,'w') as f:
+			#Write metadata
+			#Empty data is changed to a default
+			#Jagged ends are filled with a default
+			for strDataLine in readData[:iFirstDataIndex]:
+				lsLineElements = strDataLine.split(cDelimiter)
+				for iindex, sElement in enumerate(lsLineElements):
+					if not sElement.strip():
+						lsLineElements[iindex] = ConstantsBreadCrumbs.c_strEmptyDataMetadata
+				if len(lsLineElements) < iLongestLength:
+					lsLineElements = lsLineElements + ([ConstantsBreadCrumbs.c_strEmptyDataMetadata]*(iLongestLength-len(lsLineElements)))
+				f.write(cDelimiter.join(lsLineElements)+ConstantsBreadCrumbs.c_strEndline)
+			#For each data line in the table
+			for line in readData[iFirstDataIndex:]:
+				writeToFile = False
+				cleanLine = list()
+				#Break line into delimited elements
+				lineElements = line.split(cDelimiter)
+				#Clean feature name
+				sCleanFeatureName = reSubPeriod.sub("-",lineElements[0])
+				#For each element but the first (taxa name)
+				#Element check to see if not == zero
+				#If so add to output
+				for element in lineElements[1:]:
+					if(element.strip() in string.whitespace):
+						cleanLine.append(ConstantsBreadCrumbs.c_strEmptyAbundanceData)
+					#Set abundance of 0 but do not indicate the line should be saved
+					elif(element == "0"):
+						cleanLine.append(element)
+					#If an abundance is found set the line to be saved.
+					else:
+						cleanLine.append(element)
+						writeToFile = True
+				#Occurence filtering
+				#Removes features that do not have a given level iLowestAbundance in a given amount of samples iLowestSampleOccurence
+				if lOccurenceFilter:
+					iLowestAbundance, iLowestSampleOccurence = lOccurenceFilter
+					if iLowestSampleOccurence > sum([1 if float(sEntry) >= iLowestAbundance else 0 for sEntry in cleanLine]):
+						writeToFile = False
+				#Write to file
+				if writeToFile:
+					f.write(sCleanFeatureName+cDelimiter+cDelimiter.join(cleanLine)+ConstantsBreadCrumbs.c_strEndline)
+		return outputFile
+	def __repr__(self):
+		"""
+		Represent or print object.
+		"""
+		return "AbundanceTable"
+	def __str__(self):
+	  """
+	  Create a string representation of the Abundance Table.
+	  """
+	  return "".join(["Sample count:", str(len(self._npaFeatureAbundance.dtype.names[1:])),
+	  os.linesep+"Feature count:", str(len(self._npaFeatureAbundance[self._npaFeatureAbundance.dtype.names[0]])),
+	  os.linesep+"Id Metadata:", self._npaFeatureAbundance.dtype.names[0],
+	  os.linesep+"Metadata ids:", str(self._dictTableMetadata.keys()),
+	  os.linesep+"Metadata count:", str(len(self._dictTableMetadata.keys())),
+	  os.linesep+"Originating source:",self._strOriginalName,
+	  os.linesep+"Original feature count:", str(self._iOriginalFeatureCount),
+	  os.linesep+"Original sample count:", str(self._iOriginalSampleCount),
+	  os.linesep+"Is normalized:", str(self._fIsNormalized),
+	  os.linesep+"Is summed:", str(self._fIsSummed),
+	  os.linesep+"Current filtering state:", str(self._strCurrentFilterState),
+	  os.linesep+"Feature delimiter:", self._cFeatureDelimiter,
+	  os.linesep+"File delimiter:",self._cDelimiter])
+	def __eq__(self, objOther):
+		"""
+		Check if an object is equivalent in data to this object
+		Check to make sure that objOther is not None
+		Check to make sure objOther is the correct class type
+		Check to make sure self and other internal data are the same (exclusing file name)
+		Check data and make sure the npa arrays are the same
+		Check the metdata to make sure the dicts are the same
+		(will need to sort the keys of the dicts before comparing, they do not guarentee any order.
+		"""
+# Check for none
+		if objOther is None:
+			return False
+#Check for object type
+		if isinstance(objOther,AbundanceTable) != True:
+			return False
+		#Check feature delimiter
+		if self.funcGetFeatureDelimiter() != objOther.funcGetFeatureDelimiter():
+			return False
+		#Check file delimiter
+		if self.funcGetFileDelimiter() != objOther.funcGetFileDelimiter():
+			return False
+		#**************************************************
+		#* Commented out                                  *
+		#**************************************************
+#Check name  - Commented out by GW on 2013/09/14 because
+		#If we import pcl file into biom file and then export to pcl, the file names might be different but the tables might be the same
+#Check name
+		#if self.funcGetName() != objOther.funcGetName():
+			#return  False
+	        #Check sample metadata
+		#Go through the metadata
+		result1 = self.funcGetMetadataCopy()
+		result2 = objOther.funcGetMetadataCopy()
+		if sorted(result1.keys()) != sorted(result2.keys()):
+			return False
+		for strKey in result1.keys():
+			if strKey not in result2:
+				return False
+			if result1[strKey] != result2[strKey]:
+				return False
+		#TODO check the row (feature) metadata
+		#TODO check the file metadata
+		#Check the ID
+		if self.funcGetFileDelimiter() != objOther.funcGetFileDelimiter():
+			return False
+		#Check the date
+		if self.dateCreationDate != objOther.dateCreationDate:
+			return False
+		#Check the format
+		if self.strFileFormatType != objOther.strFileFormatType:
+			return False
+		#**************************************************
+		#* Commented out                                  *
+		#**************************************************
+		#Check source  - Commented out by GW on 2013/09/14 because
+		#If we import pcl file into biom file and then export to pcl, the file names might be different but the tables might be the same
+		#Check the source
+		#if self.strFileGenerationSource != objOther.strFileGenerationSource:
+			#return False
+		#Check the type
+		if self.strFileType != objOther.strFileType:
+			return False
+		#Check the URL
+		if self.strFileURL != objOther.strFileURL:
+			return False
+		#Check data
+		#TODO go through the data
+		#TODO also check the data type
+		result1 = self.funcGetAbundanceCopy()
+		result2 = objOther.funcGetAbundanceCopy()
+		if len(result1) != len(result2):
+			return False
+		sorted_result1 = sorted(result1, key=lambda tup: tup[0])
+		sorted_result2 = sorted(result2, key=lambda tup: tup[0])
+		if sorted_result1 != sorted_result2 :
+			return  False
+		#**************************************************
+		#* Commented out                                  *
+		#**************************************************
+		#Check AbundanceTable.__str__(self)  - Commented out by GW on 2013/09/14 because
+		#If we import pcl file into biom file and then export to pcl, the file names might be different but the tables might be the same
+		#Check string representation
+		#if AbundanceTable.__str__(self) !=  AbundanceTable.__str__(objOther):
+				#return False
+		#Check if sample ids are the same and in the same order
+		if self.funcGetSampleNames() != objOther.funcGetSampleNames():
+			return  False
+		return  True
+	def __ne__(self, objOther):
+		return not self == objOther
+	#Testing Status: Light happy path testing
+#TODO: Tim change static to class methods
+	@staticmethod
+	def _funcTextToStructuredArray(xInputFile = None, cDelimiter = ConstantsBreadCrumbs.c_cTab, sMetadataID = None, sLastMetadataRow = None, sLastMetadata = None, ostmOutputFile = None):
+		"""
+		Private method
+		Used to read in a file that is samples (column) and taxa (rows) into a structured array.
+		:param	xInputFile:	File stream or path to input file.
+		:type:	String		File stream or string path.
+		:param	cDelimiter:	Delimiter for parsing the input file.
+		:type:	Character	Character.
+		:param	sMetadataID:	String ID that is a metadata row ID (found on the first column) and used as an ID for samples.
+					If not given it is assumed to be position 0
+		:type: String		String ID
+		:param sLastMetadataRow: String ID that is the last row metadat id (id of the most right column with row/feature metadata)
+		:type:	String		String ID
+		:param	sLastMetadata:	The ID of the metadata that is the last metadata before measurement or feature rows.
+		:type:	String		String ID
+		:param	ostmOutputFile:	Output File to write to if needed. None does not write the file.
+		:type:	FileStream or String
+		:return	[taxData,metadata,rowmetadata]: Numpy Structured Array of abundance data and dictionary of metadata.
+										Metadata is a dictionary as such {"ID", [value,value,values...]}
+										Values are in the order thety are read in (and the order of the sample names).
+										ID is the first column in each metadata row.
+-										rowmetadata is a optional Numpy strucured array (can be None if not made)
++										rowmetadata is a optional RowMetadata object (can be None if not made)
+										The rowmetadata and taxData row Ids should match
+-										[Numpy structured Array, Dictionary, Numpy structured array]
++										The last dict is a collection of BIOM fielparameters when converting from a BIOM file
++										[Numpy structured Array, Dictionary, Numpy structured array, dict]
+		"""
+		# Open file from a stream or file path
+		istmInput = open( xInputFile, 'rU' ) if isinstance(xInputFile, str) else xInputFile
+		# Flag that when incremented will switch from metadata parsing to data parsing
+		iFirstDataRow = -1
+		# Sample id row
+		namesRow = None
+		# Row metadata names
+		lsRowMetadataIDs = None
+		# Index of the last row metadata
+		iIndexLastMetadataRow = None
+		# Holds metadata {ID:[list of values]}
+		metadata = dict()
+		# Holds the data measurements [(tuple fo values)]
+		dataMatrix = []
+		# Holds row metadata { sID : [ list of values ] }
+		dictRowMetadata = {}
+		# Positional index
+		iIndex = -1
+		# File handle
+		csvw = None
+		# Read in files
+		if ostmOutputFile:
+			csvw = csv.writer( open(ostmOutputFile,'w') if isinstance(ostmOutputFile, str) else ostmOutputFile, csv.excel_tab, delimiter = cDelimiter )
+		# For each line in the file, and assume the tax id is the first element and the data follows
+		for lsLineElements in csv.reader( istmInput, dialect = csv.excel_tab, delimiter = cDelimiter ):
+			iIndex += 1
+			taxId, sampleReads = lsLineElements[0], lsLineElements[1:]
+			# Read through data measurements
+			# Process them as a list of tuples (needed for structured array)
+			if iFirstDataRow > 0:
+				try:
+					# Parse the sample reads, removing row metadata and storing row metadata if it exists
+					if lsRowMetadataIDs:
+						# Build expected dict for row metadata dictionary {string feature id: {'metadata': {metadatakey: [list of metadata values]}}}
+						dictFeature = dict([ [sID, [sKey]] for sID, sKey in zip( lsRowMetadataIDs, sampleReads[ 0 : iIndexLastMetadataRow ]) ])
+						if len( dictFeature ):
+							dictRowMetadata[ taxId ] = { ConstantsBreadCrumbs.c_metadata_lowercase: dictFeature }
+						dataMatrix.append(tuple([taxId] + [( float(s) if s.strip( ) else 0 ) for s in sampleReads[ iIndexLastMetadataRow: ]]))
+					else:
+						dataMatrix.append(tuple([taxId] + [( float(s) if s.strip( ) else 0 ) for s in sampleReads]))
+				except ValueError:
+					sys.stderr.write( "AbundanceTable:textToStructuredArray::Error, non-numerical value on data row. File:" + str(xInputFile) +
+						" Row:" + str(lsLineElements) + "\n" )
+					return False
+			# Go through study measurements
+			else:
+				# Read in metadata values, if the entry is blank then give it the default empty metadata value.
+				for i, s in enumerate( sampleReads ):
+					if not s.strip( ):
+						sampleReads[i] = ConstantsBreadCrumbs.c_strEmptyDataMetadata
+				# If no id metadata (sample ids) is given then the first row is assumed to be the id row, otherwise look for the id for the metadata.
+				# Add the metadata to the containing dict
+				if ( ( not sMetadataID ) and ( iIndex == 0 ) ) or ( taxId == sMetadataID ):
+					namesRow = lsLineElements
+					# Remove the row metadata ids, these names are for the column ID and the samples ids
+					if sLastMetadataRow:
+						iIndexLastMetadataRow = lsLineElements.index(sLastMetadataRow)
+						lsRowMetadataIDs = namesRow[ 1 : iIndexLastMetadataRow + 1 ]
+						namesRow = [ namesRow[ 0 ] ] + namesRow[ iIndexLastMetadataRow + 1: ]
+						# If the sample metadata dictionary already has entries then remove the row metadata info from it.
+						if len( metadata ) and len( lsRowMetadataIDs ):
+							for sKey, lsValues in metadata.items():
+								metadata[ sKey ] = lsValues[ iIndexLastMetadataRow: ]
+				# Set the metadata without row metadata entries
+				metadata[taxId] = sampleReads[ iIndexLastMetadataRow: ] if (lsRowMetadataIDs and len( lsRowMetadataIDs )) else sampleReads
+				# If the last metadata was just processed switch to data processing
+				# If the last metadata name is not given it is assumed that there is only one metadata
+				if ( not sLastMetadata ) or ( taxId == sLastMetadata ):
+					iFirstDataRow = iIndex + 1
+			# If writing out the data write back out the line read in.
+			# This happens at the end so that the above cleaning is captured and written.
+			if csvw:
+				csvw.writerow( [taxId] + sampleReads )
+		if sLastMetadata and ( not dataMatrix ):
+			sys.stderr.write( "AbundanceTable:textToStructuredArray::Error, did not find the row for the last metadata ID. File:" + str(xInputFile) +
+				" Identifier:" + sLastMetadata + "\n" )
+			return False
+		# Make sure the names are found
+		if namesRow == None:
+			sys.stderr.write( "AbundanceTable:textToStructuredArray::Error, did not find the row for the unique sample/column. File:" + str(xInputFile) +
+				" Identifier:" + sMetadataID + "\n" )
+			return False
+		# Now we know the longest taxId we can define the first column holding the tax id
+		# Gross requirement of Numpy structured arrays, a = ASCII followed by max # of characters (as a string)
+		longestTaxId = max( len(a[0]) for a in dataMatrix )
+		dataTypeVector = [(namesRow[0],'a' + str(longestTaxId*2))] + [(s, "f4") for s in namesRow[1:]]
+		# Create structured array
+		taxData = np.array(dataMatrix,dtype=np.dtype(dataTypeVector))
+		# Returns a none currently because the PCL file specification this originally worked on did not have feature metadata
+		# Can be updated in the future.
+		# [Data (structured array), column metadata (dict), row metadata (structured array), file metadata (dict)]
+		return [taxData, metadata, RowMetadata(dictRowMetadata = dictRowMetadata, lsRowMetadataIDs = lsRowMetadataIDs), {
+ConstantsBreadCrumbs.c_strIDKey:ConstantsBreadCrumbs.c_strDefaultPCLID,
+ConstantsBreadCrumbs.c_strDateKey:str(date.today()),
+ConstantsBreadCrumbs.c_strFormatKey:ConstantsBreadCrumbs.c_strDefaultPCLFileFormateType,
+ConstantsBreadCrumbs.c_strSourceKey:ConstantsBreadCrumbs.c_strDefaultPCLGenerationSource,
+ConstantsBreadCrumbs.c_strTypekey:ConstantsBreadCrumbs.c_strDefaultPCLFileTpe,
+ConstantsBreadCrumbs.c_strURLKey:ConstantsBreadCrumbs.c_strDefaultPCLURL,
+ConstantsBreadCrumbs.c_strSparsityKey:ConstantsBreadCrumbs. c_fDefaultPCLSparsity}]
+#	def funcAdd(self,abndTwo,strFileName=None):
+#		"""
+#		Allows one to add an abundance table to an abundance table. They both must be the same state of normalization or summation
+#		or they will be summed or normalized if one of the two are.
+#
+#		:param	abndTwo:	AbundanceTable object 2
+#		:type:	AbundanceTable
+#		:return	AbudanceTable:
+#		"""
+#
+#		#Check summation and normalization
+#		if(self.funcIsSummed() or abndTwo.funcIsSummed()):
+#			self.funcSum()
+#			abndTwo.funcSum()
+#		if(self.funcIsNormalized() or abndTwo.funcIsNormalized()):
+#			self.funcNormalize()
+#			abndTwo.funcNormalize()
+#
+#		#Normalize Feature names
+#    		#Get if the abundance tables have clades
+#    		fAbndInputHasClades = self.funcHasFeatureHierarchy()
+#    		fAbndCompareHasClades = abndTwo.funcHasFeatureHierarchy()
+#
+#    		if(fAbndInputHasClades or fAbndCompareHasClades):
+#			#If feature delimiters do not match, switch
+#			if not self.funcGetFeatureDelimiter() == abndTwo.funcGetFeatureDelimiter():
+#				abndTwo.funcSetFeatureDelimiter(self.funcGetFeatureDelimiter())
+#
+#			#Add prefixes if needed.
+#            		self.funcAddCladePrefixToFeatures()
+#        		abndTwo.funcAddCladePrefixToFeatures()
+#
+#		#Get feature Names
+#		lsFeatures1 = self.funcGetFeatureNames()
+#		lsFeatures2 = abndTwo.funcGetFeatureNames()
+#
+#		#Make one feature name list
+#		lsFeaturesCombined = list(set(lsFeatures1+lsFeature2))
+#
+#		#Add samples by features (Use 0.0 for empty data features, use NA for empty metadata features)
+#
+#
+#		#Combine metadata
+#		dictMetadata1 = self.funcGetMetadataCopy()
+#		dictMetadata2 = abndTwo.funcGetMetadataCopy()
+#
+#		#Get first table metadata and add NA for metadata it is missing for the length of the current metadata
+#		lsMetadataOnlyInTwo = list(set(dictMetadata2.keys())-set(dictMetadata1.keys()))
+#		dictCombinedMetadata = dictMetadata1
+#		lsEmptyMetadata = ["NA" for i in xrange(self.funcGetSampleCount())]
+#		for sKey in lsMetadataOnlyInTwo:
+#			dictCombinedMetadata[sKey]=lsEmptyMetadata
+#		#Add in the other metadata dictionary
+#		lsCombinedKeys = dictCombinedMetadata.keys()
+#		lsEmptyMetadata = ["NA" for i in xrange(abndTwo.funcGetSampleCount())]
+#		for sKey in lsCombinedKeys():
+#			if sKey in dictMetadata2:
+#				dictCombinedMetadata[sKey] = dictCombinedMetadata[sKey]+dictMetadata2[sKey]
+#			else:
+#				dictCombinedMetadata[sKey] = dictCombinedMetadata[sKey]+lsEmptyMetadata
+#
+#		#Make Abundance table
+#		return AbundanceTable(npaAbundance=npaAbundance,
+#				dictMetadata = dictCombinedMetadata,
+#				strName = strFileName if strFileName else os.path.splitext(self)[0]+"_combined_"+os.path.splitext(abndTwo)[0],
+#				strLastMetadata = self.funcGetLastMetadataName(),
+#				cFileDelimiter = self.funcGetFileDelimiter(), cFeatureNameDelimiter=self.funcGetFeatureDelimiter())
+	#TODO This does not adjust for sample ordering, needs to
+	def funcAddDataFeature(self, lsNames, npdData):
+		"""
+		Adds a data or group of data to the underlying table.
+		Names should be in the order of the data
+		Each row is considered a feature (not sample).
+		:param lsNames:	Names of the features being added to the data of the table
+		:type: List	List of string names
+		:param npdData: Rows of features to add to the table
+		:type:	Numpy array accessed by row.
+		"""
+		if ( self._npaFeatureAbundance == None ):
+			return False
+		# Check number of input data rows
+		iDataRows = npdData.shape[0]
+		if (len(lsNames) != iDataRows):
+			print "Error:The names and the rows of data features to add must be of equal length"
+		# Grow the array by the neccessary amount and add the new rows
+		iTableRowCount = self.funcGetFeatureCount()
+		iRowElementCount = self.funcGetSampleCount()
+		self._npaFeatureAbundance.resize(iTableRowCount+iDataRows)
+		for iIndexData in xrange(iDataRows):
+			self._npaFeatureAbundance[iTableRowCount+iIndexData] = tuple([lsNames[iIndexData]]+list(npdData[iIndexData]))
+		return True
+	#TODO This does not adjust for sample ordering, needs to
+	def funcAddMetadataFeature(self,lsNames,llsMetadata):
+		"""
+		Adds metadata feature to the underlying table.
+		Names should be in the order of the lists of metadata
+		Each internal list is considered a metadata and paired to a name
+		"""
+		if ( self._dictTableMetadata == None ):
+			return False
+		# Check number of input data rows
+		iMetadataCount = len(llsMetadata)
+		if (len(lsNames) != iMetadataCount):
+			print "Error:The names and the rows of metadata features to add must be of equal length"
+		# Add the metadata
+		for tpleMetadata in zip(lsNames,llsMetadata):
+			self._dictTableMetadata[tpleMetadata[0]]=tpleMetadata[1]
+		return True
+	#2 test Cases
+	def funcSetFeatureDelimiter(self, cDelimiter):
+		"""
+		Changes the feature delimiter to the one provided.
+		Updates the feature names.
+		:param	cDelimiter:	Character feature delimiter
+		:type:	Character
+		:return	Boolean:	Indicator of success or not (false)
+		"""
+		if ( self._npaFeatureAbundance == None ):
+			return False
+		cDelimiterCurrent = self.funcGetFeatureDelimiter()
+		if ( not cDelimiter or not cDelimiterCurrent):
+			return False
+		#Make new feature names
+		lsNewFeatureNames = [sFeatureName.replace(cDelimiterCurrent,cDelimiter) for sFeatureName in self.funcGetFeatureNames()]
+		#Update new feature names to abundance table
+		if (not self.funcGetIDMetadataName() == None):
+			self._npaFeatureAbundance[self.funcGetIDMetadataName()] = np.array(lsNewFeatureNames)
+		#Update delimiter
+		self._cFeatureDelimiter = cDelimiter
+		return True
+	#Happy path tested
+	def funcGetSampleNames(self):
+		"""
+		Returns the sample names (IDs) contained in the abundance table.
+		:return	Sample Name:	A List of sample names indicated by the metadata associated with the sMetadataId given in table creation.
+								A list of string names or empty list on error as well as no underlying table.
+		"""
+		return self._npaFeatureAbundance.dtype.names[1:] if ( self._npaFeatureAbundance != None ) else []
+	#Happy Path Tested
+	def funcGetIDMetadataName(self):
+		"""
+		Returns the metadata id.
+		:return	ID:	The metadata id (the sample Id).
+					  Returns none on error.
+		"""
+		return self._npaFeatureAbundance.dtype.names[0] if ( self._npaFeatureAbundance != None ) else None
+	#Happy path tested
+	def funcGetAbundanceCopy(self):
+		"""
+		Returns a deep copy of the abundance table.
+		:return	Numpy Structured Array:	The measurement data in the Abundance table. Can use sample names to access each column of measurements.
+									   Returns none on error.
+		"""
+		return self._npaFeatureAbundance.copy() if ( self._npaFeatureAbundance != None ) else None
+	#Happy path tested
+	def funcGetAverageAbundancePerSample(self, lsTargetedFeatures):
+		"""
+		Averages feature abundance within a sample.
+		:param	lsTargetedFeatures:	String names of features to average
+		:type:	List of string names of features which are measured
+		:return	List: List of lists or boolean (False on error). One internal list per sample indicating the sample and the feature's average abudance
+			[[sample,average abundance of selected taxa]] or False on error
+		"""
+		#Sample rank averages [[sample,average abundance of selected taxa]]
+		sampleAbundanceAverages = []
+		sampleNames = self.funcGetSampleNames()
+		allTaxaNames = self.funcGetFeatureNames()
+		#Get an abundance table compressed to features of interest
+		abndReducedTable = self.funcGetFeatureAbundanceTable(lsTargetedFeatures)
+		if abndReducedTable == None:
+			return False
+		#If the taxa to be selected are not in the list, Return nothing and log
+		lsMissing = []
+		for sFeature in lsTargetedFeatures:
+			if not sFeature in allTaxaNames:
+				lsMissing.append(sFeature)
+			else:
+				#Check to make sure the taxa of interest is not average abundance of 0
+				if not abndReducedTable.funcGetFeatureSumAcrossSamples(sFeature):
+					lsMissing.append(sFeature)
+		if len(lsMissing) > 0:
+			sys.stderr.write( "Could not find features for averaging: " + str(lsMissing) )
+			return False
+		#For each sample name get average abundance
+		for sName in sampleNames:
+			npaFeaturesSample = abndReducedTable.funcGetSample(sName)
+			sampleAbundanceAverages.append([sName,sum(npaFeaturesSample)/float(len(npaFeaturesSample))])
+		#Sort based on average
+		return sorted(sampleAbundanceAverages, key = lambda sampleData: sampleData[1], reverse = True)
+	#Happy path tested 1
+	def funcGetAverageSample(self):
+		"""
+		Returns the average sample of the abundance table.
+		This average sample is made of the average of each feature.
+		:return list: A list of averages in the order of the feature names.
+		"""
+		ldAverageSample = []
+		#If there are no samples then return empty list.
+		if len(self.funcGetSampleNames()) < 1:
+			return ldAverageSample
+		#If there are samples return the average of each feature in the order of the feature names.
+		for sFeature in self._npaFeatureAbundance:
+			npFeaturesAbundance = list(sFeature)[1:]
+			ldAverageSample.append(sum(npFeaturesAbundance)/float(len(npFeaturesAbundance)))
+		return ldAverageSample
+	#Tested 2 cases
+	def funcHasFeatureHierarchy(self):
+		"""
+		Returns an indicator of having a hierarchy in the features indicated by the existance of the
+		feature delimiter.
+		:return	Boolean:	True (Has a hierarchy) or False (Does not have a hierarchy)
+		"""
+		if ( self._npaFeatureAbundance == None ):
+			return None
+		cDelimiter = self.funcGetFeatureDelimiter()
+		if ( not cDelimiter ):
+			return False
+		#For each feature name, check to see if the delimiter is in the name
+		for sFeature in self.funcGetFeatureNames():
+			if cDelimiter in sFeature:
+				return True
+		return False
+	def funcGetCladePrefixes(self):
+		"""
+		Returns the list of prefixes to use on biological sample hierarchy
+		:return	List:	List of strings
+		"""
+		return self._lsCladePrefixes
+	#3 test cases
+	def funcAddCladePrefixToFeatures(self):
+		"""
+		As a standardized clade prefix to indicate biological clade given hierarchy.
+		Will not add a prefix to already prefixes feature names.
+		Will add prefix to feature names that do not have them or clades in a feature name that
+		do not have them while leaving ones that do as is.
+		:return	Boolean:	True (Has a hierarchy) or False (Does not have a hierarchy)
+		"""
+		if ( self._npaFeatureAbundance == None ):
+			return None
+		cDelimiter = self.funcGetFeatureDelimiter()
+		lsPrefixes = self.funcGetCladePrefixes()
+		iPrefixLength = len(lsPrefixes)
+		if ( not cDelimiter ):
+			return False
+		#Append prefixes to feature names
+		lsUpdatedFeatureNames = []
+		lsFeatureNames = self.funcGetFeatureNames()
+		for sFeatureName in lsFeatureNames:
+			lsClades = sFeatureName.split(cDelimiter)
+			#If there are not enough then error
+			if(len(lsClades) > iPrefixLength):
+print "Error:: Too many clades given to be biologically meaningful"
+				return False
+			lsUpdatedFeatureNames.append(cDelimiter.join([lsPrefixes[iClade]+lsClades[iClade] if not(lsClades[iClade][0:len(lsPrefixes[iClade])]==lsPrefixes[iClade]) else lsClades[iClade] for iClade in xrange(len(lsClades))]))
+		#Update new feature names to abundance table
+		if not self.funcGetIDMetadataName() == None:
+			self._npaFeatureAbundance[self.funcGetIDMetadataName()] = np.array(lsUpdatedFeatureNames)
+		return True
+	#Happy Path Tested
+	def funcGetFeatureAbundanceTable(self, lsFeatures):
+		"""
+		Returns a copy of the current abundance table with the abundance of just the given features.
+		:param	lsFeatures:	String Feature IDs that are kept in the compressed abundance table.
+		:type:	List of strings	Feature IDs (found as the first entry of a filter in the input file.
+		:return	AbundanceTable:	A compressed version of the abundance table.
+				  On an error None is returned.
+		"""
+		if ( self._npaFeatureAbundance == None ) or ( lsFeatures == None ):
+			return None
+		#Get a list of boolean indicators that the row is from the features list
+		lfFeatureData = [sRowID in lsFeatures for sRowID in self.funcGetFeatureNames()]
+		#compressed version as an Abundance table
+		lsNamePieces = os.path.splitext(self._strOriginalName)
+		abndFeature = AbundanceTable(npaAbundance=np.compress(lfFeatureData, self._npaFeatureAbundance, axis = 0),
+					dictMetadata = self.funcGetMetadataCopy(),
+					strName = lsNamePieces[0] + "-" + str(len(lsFeatures)) +"-Features"+lsNamePieces[1],
+					strLastMetadata=self.funcGetLastMetadataName(),
+					cFileDelimiter = self.funcGetFileDelimiter(), cFeatureNameDelimiter= self.funcGetFeatureDelimiter())
+		#Table is no longer normalized
+		abndFeature._fIsNormalized = False
+		return abndFeature
+	#Happy path tested
+	def funcGetFeatureDelimiter(self):
+		"""
+		The delimiter of the feature names (For example to use on concensus lineages).
+		:return	Character:	Delimiter for the feature name pieces if it is complex.
+		"""
+		return self._cFeatureDelimiter
+	#Happy path tested
+	def funcGetFeatureCount(self):
+		"""
+		Returns the current feature count.
+		:return	Count:	Returns the int count of features in the abundance table.
+						Returns None on error.
+		"""
+		return self._npaFeatureAbundance.shape[0] if not self._npaFeatureAbundance is None else 0
+	#Happy path tested
+	def funcGetFeatureSumAcrossSamples(self,sFeatureName):
+		"""
+		Returns float sum of feature values across the samples.
+		:param	sFeatureName: The feature ID to get the sum from.
+		:type:	String.
+		:return	Double:	Sum of one feature across samples.
+		"""
+		return sum(self.funcGetFeature(sFeatureName))
+	def funcGetFeature(self,sFeatureName):
+		"""
+		Returns feature values across the samples.
+		:param	sFeatureName: The feature ID to get the sum from.
+		:type:	String.
+		:return	Double:	Feature across samples.
+		"""
+		for sFeature in self._npaFeatureAbundance:
+			if sFeature[0] == sFeatureName:
+				return list(sFeature)[1:]
+		return None
+	#Happy path tested
+	def funcGetFeatureNames(self):
+		"""
+		Return the feature names as a list.
+		:return	Feature Names:	List of feature names (or IDs) as strings.
+								As an error returns empty list.
+		"""
+		if (not self._npaFeatureAbundance == None):
+			return self._npaFeatureAbundance[self.funcGetIDMetadataName()]
+		return []
+	#Happy path tested
+	def funcGetFileDelimiter(self):
+		"""
+		The delimiter of the file the data was read from and which is also the delimiter which would be used to write the data to a file.
+		:return	Character:	Delimiter for the parsing and writing the file.
+		"""
+		return self._cDelimiter
+	def funcGetLastMetadataName(self):
+		"""
+		Get the last metadata name that seperates abundance and metadata measurements.
+		:return string:	Metadata name
+		"""
+		return self._strLastMetadataName
+	#Happy path tested
+	def funcGetSample(self,sSampleName):
+		"""
+		Return a copy of the feature measurements of a sample.
+		:param	sSampleName:	Name of sample to return.
+		:type:	String
+		:return	Sample: Measurements	Feature measurements of a sample.
+				Empty numpy array returned on error.
+		"""
+		if (not self._npaFeatureAbundance == None):
+			return self._npaFeatureAbundance[sSampleName].copy()
+		return np.array([])
+	#Happy path tested
+	def funcGetMetadata(self, strMetadataName):
+		"""
+		Returns a list of metadata that is associated with the given metadata name (id).
+		:param	strMetadataName:	String metadata ID to be returned
+		:type:	String	ID
+		:return	Metadata:	List of metadata
+		"""
+		return copy.deepcopy( self._dictTableMetadata.get(strMetadataName) ) \
+			if self._dictTableMetadata else None
+	#Happy path tested
+	def funcGetMetadataCopy(self):
+		"""
+		Returns a deep copy of the metadata.
+		:return	Metadata copy:	{"ID":[value,value...]}
+		"""
+		return copy.deepcopy(self._dictTableMetadata)
+	#Happy path tested
+	def funcGetName(self):
+		"""
+		Returns the name of the object which is the file name that generated it.
+		If the object was generated from an Abundance Table (for instance through stratification)
+		the name is still in the form of a file that could be written to which is informative
+		of the changes that have occurred on the data set.
+		:return string: Name
+		"""
+		return self._strOriginalName
+	#Happy path tested. could do more
+	def funcGetTerminalNodes(self):
+		"""
+		Returns the terminal nodes given the current feature names in the abundance table. The
+		features must contain a consensus lineage or all will be returned.
+		:return List:	List of strings of the terminal nodes given the abundance table.
+		"""
+		return AbundanceTable.funcGetTerminalNodesFromList(lsNames=self.funcGetFeatureNames(),cNameDelimiter=self.funcGetFeatureDelimiter())
+	#Tested 2 test cases
+	@staticmethod
+	def funcGetTerminalNodesFromList(lsNames,cNameDelimiter):
+		"""
+		Returns the terminal nodes given the current feature names in the abundance table. The
+		features must contain a consensus lineage or all will be returned.
+		:param	lsNames:	The list of string names to parse and filter.
+		:type:	List of strings
+		:param	cNameDelimiter:	The delimiter for the name of the features.
+		:type:	Character	Delimiter
+		:return list:	A list of terminal elements in the list (given only the list).
+		"""
+		#Build hash
+		dictCounts = dict()
+		for strTaxaName in lsNames:
+			#Split into the elements of the clades
+			lsClades = filter(None,strTaxaName.split(cNameDelimiter))
+			#Count clade levels
+			iCladeLength = len(lsClades)
+			#Evaluate first element
+			sClade = lsClades[0]
+			dictCounts[sClade] = sClade not in dictCounts
+			#Evaluate the rest of the elements
+			if iCladeLength < 2:
+				continue
+			for iIndex in xrange(1,iCladeLength):
+				prevClade = sClade
+				sClade = cNameDelimiter.join([sClade,lsClades[iIndex]])
+				if sClade in dictCounts:
+					dictCounts[sClade] = dictCounts[prevClade] = False
+				else:
+					dictCounts[sClade] = True
+					dictCounts[prevClade] = False
+		#Return only the elements that were of count 1
+		return filter( lambda s: dictCounts[s] == True, dictCounts )
+	#Happy path tested
+	def funcIsNormalized(self):
+		"""
+		Returns if the data has been normalized.
+		:return	Boolean:	Indicates if the data is normalized.
+						   True indicates it the data is normalized.
+		"""
+		return self._fIsNormalized
+	#Happy path tested
+	def funcIsPrimaryIdMetadata(self,sMetadataName):
+		"""
+		Checks the metadata data associated with the sMetadatName and returns if the metadata is unique.
+		This is important to some of the functions in the Abundance Table specifically when translating from one metadata to another.
+		:param	sMetadataName:	ID of metadata to check for uniqueness.
+		:type:	String	Metadata ID.
+		:return	Boolean:	Returns indicator of uniqueness.
+							True indicates unique.
+		"""
+		lMetadata = self.funcGetMetadata(sMetadataName)
+		if not lMetadata:
+			return False
+		return (len(lMetadata) == len(set(lMetadata)))
+	#Happy path tested
+	def funcIsSummed(self):
+		"""
+		Return is the data is summed.
+		:return	Boolean:	Indicator of being summed. True indicates summed.
+		"""
+		return self._fIsSummed
+	#Happy path tested
+	def funcFilterAbundanceByPercentile(self, dPercentileCutOff = 95.0, dPercentageAbovePercentile=1.0):
+		"""
+		Filter on features.
+		A feature is removed if it's abundance is not found in the top X percentile a certain percentage of the samples.
+		:param	dPercentileCutOff:	The percentile used for filtering.
+		:type:	double	A double between 0.0 and 100.0
+		:param	dPercentageAbovePercentile:	The percentage above the given percentile (dPercentileCutOff) that must exist to keep the feature.
+		:type:	double	Between 0.0 and 100.0
+		:return	Boolean:	Indicator of filtering occuring without error. True indicates filtering occuring.
+		"""
+		#No need to do anything
+		if(dPercentileCutOff==0.0) or (dPercentageAbovePercentile==0.0):
+			return True
+		#Sample names
+		lsSampleNames = self.funcGetSampleNames()
+		#Scale percentage out of 100
+		dPercentageAbovePercentile = dPercentageAbovePercentile/100.0
+		#Sample count
+		iSampleCount = len(lsSampleNames)
+		#Get a threshold score of the value at the specified percentile for each sample
+		#In the order of the sample names
+		ldScoreAtPercentile = [scipy.stats.scoreatpercentile(self._npaFeatureAbundance[lsSampleNames[iIndex]],dPercentileCutOff) for iIndex in xrange(iSampleCount)]
+		#Record how many entries for each feature have a value equal to or greater than the dPercentileCutOff
+		#If the percentile of entries passing the criteria are above the dPercentageAbovePercentile put index in list to keep
+		liKeepIndices = []
+		iSampleCount = float(iSampleCount)
+		for iRowIndex, npaRow in enumerate(self._npaFeatureAbundance):
+			iCountPass = sum([1 if dValue >= ldScoreAtPercentile[iValueIndex] else 0 for iValueIndex, dValue in enumerate(list(npaRow)[1:])])
+			if (iCountPass / iSampleCount) >= dPercentageAbovePercentile:
+				liKeepIndices.append(iRowIndex)
+		#Compress array
+		self._npaFeatureAbundance = self._npaFeatureAbundance[liKeepIndices,:]
+		#Update filter state
+		self._strCurrentFilterState += ":dPercentileCutOff=" + str(dPercentileCutOff) + ",dPercentageAbovePercentile=" + str(dPercentageAbovePercentile)
+		#Table is no longer normalized
+		self._fIsNormalized = False
+		return True
+	def funcFilterAbundanceByMinValue(self, dMinAbundance = 0.0001, iMinSamples = 3):
+		"""
+		Filter abundance by requiring features to have a minimum relative abundance in a minimum number of samples.
+		Will evaluate greater than or equal to the dMinAbundance and iMinSamples.
+		:param	dMinAbundance:	Minimum relative abundance.
+		:type:	Real	Number Less than 1.
+		:param	iMinSamples:	Minimum samples to have the relative abundnace or greater in.
+		:type:	Integer	Number greater than 1.
+		:return	Boolean:	Indicator of the filter running without error. False indicates error.
+		"""
+		#No need to do anything
+		if(dMinAbundance==0) or (iMinSamples==0):
+			return True
+		#This normalization requires the data to be relative abundance
+		if not self._fIsNormalized:
+			#sys.stderr.write( "Could not filter by sequence occurence because the data is already normalized.\n" )
+			return False
+		#Holds which indexes are kept
+		liKeepFeatures = []
+		for iRowIndex, dataRow in enumerate( self._npaFeatureAbundance ):
+			#See which rows meet the criteria and keep the index if needed.
+			if len( filter( lambda d: d >= dMinAbundance, list(dataRow)[1:] ) ) >= iMinSamples:
+				liKeepFeatures.append(iRowIndex)
+		#Compress array
+		self._npaFeatureAbundance = self._npaFeatureAbundance[liKeepFeatures,:]
+		#Update filter state
+		self._strCurrentFilterState += ":dMinAbundance=" + str(dMinAbundance) + ",iMinSamples=" + str(iMinSamples)
+		return True
+	#Happy path tested
+	def funcFilterAbundanceBySequenceOccurence(self, iMinSequence = 2, iMinSamples = 2):
+		"""
+		Filter occurence by requiring features to have a minimum sequence occurence in a minimum number of samples.
+		Will evaluate greater than or equal to the iMinSequence and iMinSamples.
+		:param	iMinSequence:	Minimum sequence to occur.
+		:type:	Integer	Number Greater than 1.
+		:param	iMinSamples:	Minimum samples to occur in.
+		:type:	Integer	Number greater than 1.
+		:return	Boolean:	Indicator of the filter running without error. False indicates error.
+		"""
+		#No need to do anything
+		if(iMinSequence==0) or (iMinSamples==0):
+			return True
+		#This normalization requires the data to be reads
+		if self._fIsNormalized:
+			#sys.stderr.write( "Could not filter by sequence occurence because the data is already normalized.\n" )
+			return False
+		#Holds which indexes are kept
+		liKeepFeatures = []
+		for iRowIndex, dataRow in enumerate( self._npaFeatureAbundance ):
+			#See which rows meet the criteria and keep the index if needed.
+			if len( filter( lambda d: d >= iMinSequence, list(dataRow)[1:] ) ) >= iMinSamples:
+				liKeepFeatures.append(iRowIndex)
+		#Compress array
+		self._npaFeatureAbundance = self._npaFeatureAbundance[liKeepFeatures,:]
+		#Update filter state
+		self._strCurrentFilterState += ":iMinSequence=" + str(iMinSequence) + ",iMinSamples=" + str(iMinSamples)
+		return True
+	#1 Happy path test
+	def funcFilterFeatureBySD(self, dMinSDCuttOff = 0.0):
+		"""
+		A feature is removed if it's abundance is not found to have standard deviation more than the given dMinSDCutoff.
+		:param	dMinSDCuttOff:	Standard deviation threshold.
+		:type:	Double	A double greater than 0.0.
+		:return	Boolean:	Indicator of success. False indicates error.
+		"""
+		#No need to do anything
+		if(dMinSDCuttOff==0.0):
+			return True
+		#Holds which indexes are kept
+		liKeepFeatures = []
+		#Evaluate each sample
+		for iRowIndex, dataRow in enumerate(self._npaFeatureAbundance):
+			if(np.std(list(dataRow)[1:])>=dMinSDCuttOff):
+				liKeepFeatures.append(iRowIndex)
+		#Compress array
+		self._npaFeatureAbundance = self._npaFeatureAbundance[liKeepFeatures,:]
+		#Update filter state
+		self._strCurrentFilterState += ":dMinSDCuttOff=" + str(dMinSDCuttOff)
+		#Table is no longer normalized
+		self._fIsNormalized = False
+		return True
+#Happy path tested 2 tests
+	def funcGetWithoutOTUs(self):
+		"""
+		Remove features that are terminal otus. Terminal otus are identified as being an integer.
+		"""
+		#Get the feature names
+		lsFeatures = self.funcGetFeatureNames()
+		#Reduce, filter the feature names
+		lsFeatures = [sFeature for sFeature in lsFeatures if not (ValidateData.funcIsValidStringInt(sFeature.split(self.funcGetFeatureDelimiter())[-1]))]
+		return self.funcGetFeatureAbundanceTable(lsFeatures)
+	#Happy path tested
+	def funcNormalize(self):
+		"""
+		Convenience method which will call which ever normalization is approriate on the data.
+		:return Boolean: Indicator of success (true).
+		"""
+		if self._fIsSummed:
+			return self.funcNormalizeColumnsWithSummedClades()
+		else:
+			return self.funcNormalizeColumnsBySum()
+	#Testing Status: Light happy path testing
+	def funcNormalizeColumnsBySum(self):
+		"""
+		Normalize the data in a manner that is approrpiate for NOT summed data.
+		Normalize the columns (samples) of the abundance table.
+		Normalizes as a fraction of the total (number/(sum of all numbers in the column)).
+		Will not act on summed tables.
+		:return	Boolean:	Indicator of success. False indicates error.
+		"""
+		if self._fIsNormalized:
+#			sys.stderr.write( "This table is already normalized, did not perform new normalization request.\n" )
+			return False
+		if self._fIsSummed:
+			sys.stderr.write( "This table has clades summed, this normalization is not appropriate. Did not perform.\n" )
+			return False
+		#Normalize
+		for columnName in self.funcGetSampleNames():
+			column = self._npaFeatureAbundance[columnName]
+			columnTotal = sum(column)
+			if(columnTotal > 0.0):
+				column = column/columnTotal
+			self._npaFeatureAbundance[columnName] = column
+		#Indicate normalization has occured
+		self._fIsNormalized = True
+		return True
+	#Happy path tested
+	def funcNormalizeColumnsWithSummedClades(self):
+		"""
+		Normalizes a summed Abundance Table.
+		If this is called on a dataset which is not summed and not normalized.
+		The data will be summed first and then normalized.
+		If already normalized, the current normalization is kept.
+		:return	Boolean:	Indicator of success. False indicates error.
+		"""
+		if self._fIsNormalized:
+#			sys.stderr.write( "This table is already normalized, did not perform new summed normalization request.\n" )
+			return False
+		if not self._fIsSummed:
+			sys.stderr.write( "This table does not have clades summed, this normalization is not appropriate until the clades are summed. The clades are being summed now before normalization.\n" )
+			self.funcSumClades()
+		#Load a hash table with root data {sKey: npaAbundances}
+		hashRoots = {}
+		for npaRow in self._npaFeatureAbundance:
+			curldAbundance = np.array(list(npaRow)[1:])
+			curFeatureNameLength = len(npaRow[0].split(self._cFeatureDelimiter))
+			curlRootData = hashRoots.get(npaRow[0].split(self._cFeatureDelimiter)[0])
+			if not curlRootData:
+				hashRoots[npaRow[0].split(self._cFeatureDelimiter)[0]] = [curFeatureNameLength, curldAbundance]
+			elif curlRootData[0] > curFeatureNameLength:
+				hashRoots[npaRow[0].split(self._cFeatureDelimiter)[0]] = [curFeatureNameLength, curldAbundance]
+		#Normalize each feature by thier root feature
+		dataMatrix = list()
+		for npaRow in self._npaFeatureAbundance:
+			curHashRoot = list(hashRoots[npaRow[0].split(self._cFeatureDelimiter)[0]][1])
+			dataMatrix.append(tuple([npaRow[0]]+[npaRow[i+1]/curHashRoot[i] if curHashRoot[i] > 0 else 0 for i in xrange(len(curHashRoot))]))
+		self._npaFeatureAbundance = np.array(dataMatrix,self._npaFeatureAbundance.dtype)
+		#Indicate normalization has occured
+		self._fIsNormalized = True
+		return True
+	def _funcRankAbundanceHelper( self, aaTodo, iRank, lRankAbundance ):
+		"""
+		Helper method for ranking abudance which are tied.
+		:params aaTodo: List of tied ranks to change to a rank.
+		:type:	List of Enumerates of samples.
+		:params iRank: Current Rank
+		:type:	Integer
+		:params lRankAbundance: Sample of abundance
+		:type:	List of integers
+		"""
+		# Subtract one from iRank (each time) to account for next loop iteration
+		# Then average it with itself minus (the length of aaTodo + 1)
+		dRank = ( iRank + iRank - len( aaTodo ) - 1 ) / 2.0
+		for a in aaTodo:
+			lRankAbundance[a[0]] = dRank
+	#1 Happy path test
+	def funcRankAbundance(self):
+		"""
+		Rank abundances of features with in a sample.
+		:return	AbundanceTable:	Abundance table data ranked (Features with in samples).
+							  None is returned on error.
+		"""
+		if self._npaFeatureAbundance == None:
+			return None
+		lsSampleNames = self.funcGetSampleNames()
+		npRankAbundance = self.funcGetAbundanceCopy()
+		liRanks = []
+		#For each sample name get the ranks
+		for sName in lsSampleNames:
+			#Enumerate for order and sort abundances
+			lfSample = list(enumerate(npRankAbundance[sName]))
+			lfSample = sorted(lfSample, key = lambda a: a[1], reverse = True)
+			# Accumulate indices until a new value is encountered to detect + handle ties
+			aaTodo = []
+			for i, a in enumerate( lfSample ):
+				if ( not aaTodo ) or ( a[1] == aaTodo[-1][1] ):
+					aaTodo.append( a )
+				else:
+			# Make multiple tied ranks = average of first and last
+					self._funcRankAbundanceHelper( aaTodo, i, npRankAbundance[sName] )
+					aaTodo = [a]
+			self._funcRankAbundanceHelper( aaTodo, i + 1, npRankAbundance[sName] )
+		abndRanked = AbundanceTable(npaAbundance=npRankAbundance, dictMetadata=self.funcGetMetadataCopy(),
+			strName= self.funcGetName() + "-Ranked",
+			strLastMetadata=self.funcGetLastMetadataName(),
+			cFileDelimiter=self.funcGetFileDelimiter(),
+			cFeatureNameDelimiter=self.funcGetFeatureDelimiter())
+		#Table is no longer normalized
+		abndRanked._fIsNormalized = False
+		return abndRanked
+	def funcGetSampleCount(self):
+		"""
+		Returns the sample count of the abundance table.
+		"""
+		return len(self.funcGetSampleNames())
+	#Happy Path Tested
+	def funcReduceFeaturesToCladeLevel(self, iCladeLevel):
+		"""
+		Reduce the current table to a certain clade level.
+		:param	iCladeLevel:	The level of the clade to trim the features to.
+		:type:	Integer	The higher the number the more clades are presevered in the consensus lineage contained in the feature name.
+		:return	Boolean:	Indicator of success. False indicates error.
+		"""
+		if iCladeLevel < 1: return False
+		if not self._npaFeatureAbundance == None:
+			liFeatureKeep = []
+			[liFeatureKeep.append(tplFeature[0]) if (len(tplFeature[1][0].split(self.funcGetFeatureDelimiter())) <= iCladeLevel) else 0
+			 for tplFeature in enumerate(self._npaFeatureAbundance)]
+			#Compress array
+			self._npaFeatureAbundance = self._npaFeatureAbundance[liFeatureKeep,:]
+			#Update filter state
+			self._strCurrentFilterState += ":iCladeLevel=" + str(iCladeLevel)
+			return True
+		else:
+			return False
+	#Happy path tested
+	def funcRemoveSamples(self,lsSampleNames):
+		"""
+		Removes the samples given in the list.
+		:param	lsSampleNames:	A list of string names of samples to remove.
+		:type:	List of strings	Unique values
+		:return Boolean: Indicator of success (True = success, no error)
+		"""
+		#Samples to remove
+		setSamples = set(lsSampleNames)
+		#Get orignal sample count
+		iOriginalCount  = self._iOriginalSampleCount
+		#The samples to keep
+		lsKeepSamples = [sSample for sSample in self.funcGetSampleNames() if not sSample in setSamples]
+		#The sample to keep as boolean flags for compressing the metadata
+		lfKeepSamples = [not sSample in setSamples for sSample in self.funcGetSampleNames()]
+		#Reduce the abundance data and update
+		self._npaFeatureAbundance = self._npaFeatureAbundance[[self.funcGetIDMetadataName()]+lsKeepSamples]
+		#Reduce the metadata and update
+		for sKey in self._dictTableMetadata:
+			self._dictTableMetadata[sKey] = [value for iindex, value in enumerate(self._dictTableMetadata[sKey]) if lfKeepSamples[iindex]]
+		#Update sample number count
+		self._iOriginalSampleCount = len(self.funcGetSampleNames())
+		return self._iOriginalSampleCount == (iOriginalCount-len(setSamples))
+	#Happy path tested
+	def funcRemoveSamplesByMetadata(self, sMetadata, lValuesToRemove):
+		"""
+		Removes samples from the abundance table based on values of a metadata.
+		If a metadata has any value given the associated sample is removed.
+		:param	sMetadata:	ID of the metdata to check the given values.
+		:type:	String	Metadata ID
+		:param	lValuesToRemove:	A list of values which if equal to a metadata entry indicate to remove the associated sample.
+		:type:	List of values:	List
+		:return	Boolean:	Indicator of success (True = success, no error)
+		"""
+		lsSampleNames = self.funcGetSampleNames()
+		return self.funcRemoveSamples([lsSampleNames[iindex] for iindex, sValue in enumerate(self.funcGetMetadata(sMetadata)) if sValue in lValuesToRemove])
+	#Happy path testing
+	def funcSumClades(self):
+		"""
+		Sums abundance data by clades indicated in the feature name (as consensus lineages).
+		:return	Boolean:	Indicator of success.
+					False indicates an error.
+		"""
+		if not self.funcIsSummed():
+			#Read in the data
+			#Find the header column (iCol) assumed to be 1 or 2 depending on the location of "NAME"
+			#Create a list (adSeq) that will eventually hold the sum of the columns of data
+			astrHeaders = iCol = None
+			adSeqs = np.array([0] * len(self.funcGetSampleNames()))
+			pTree = CClade( )
+			aastrRaw = []
+			#For each row in the npaAbundance
+			#Get the feature name, feature abundances, and sum up the abudance columns
+			#Keep the sum for later normalization
+			#Give a tree the feature name and abundance
+			for dataRow in self._npaFeatureAbundance:
+				sFeatureName = dataRow[0]
+				ldAbundances = list(dataRow)[1:]
+				#Add to the sum of the columns (samples)
+				adSeqs = adSeqs + np.array(list(dataRow)[1:])
+				#Build tree
+				pTree.get( sFeatureName.split(self._cFeatureDelimiter) ).set( ldAbundances )
+			#Create tree of data
+			#Input missing data
+			#Fill hashFeatures with the clade name (key) and a blist of values (value) of the specified level interested.
+			pTree.impute( )
+			hashFeatures = {}
+			pTree.freeze( hashFeatures, c_iSumAllCladeLevels, c_fOutputLeavesOnly )
+			setstrFeatures = hashFeatures.keys( )
+			#Remove parent clades that are identical to child clades
+			for strFeature, adCounts in hashFeatures.items( ):
+					astrFeature = strFeature.strip( ).split( "|" )
+					while len( astrFeature ) > 1:
+						astrFeature = astrFeature[:-1]
+						strParent = "|".join( astrFeature )
+						adParent = hashFeatures.get( strParent )
+						if adParent == adCounts:
+							del hashFeatures[strParent]
+							setstrFeatures.remove( strParent )
+			#Sort features to be nice
+			astrFeatures = sorted( setstrFeatures )
+			#Change the hash table to an array
+			dataMatrix = list()
+			for sFeature in astrFeatures:
+				dataMatrix.append(tuple([sFeature]+list(hashFeatures[sFeature])))
+			self._npaFeatureAbundance=np.array(dataMatrix,self._npaFeatureAbundance.dtype)
+			#Indicate summation has occured
+			self._fIsSummed = True
+		return True
+	#Happy path tested
+	def funcStratifyByMetadata(self, strMetadata, fWriteToFile=False):
+		"""
+		Stratifies the AbundanceTable by the given metadata.
+		Will write each stratified abundance table to file
+		if fWriteToFile is True the object will used it's internally stored name as a file to write to
+		if fWriteToFile is a string then it should be a directory and end with "." This will rebase the file
+		and store it in a different directory but with an otherwise unchanged name.
+		Note: If the metadata used for stratification has NAs, they will be segregated to thier own table and returned.
+		:param	strMetadata:	Metadata ID to stratify data with.
+		:type:	String	ID for a metadata.
+		:param	fWriteToFile:	Indicator to write to file.
+		:type:	Boolean	True indicates to write to file.
+		:return	List:	List of AbundanceTables which are deep copies of the original.
+						Empty list on error.
+		"""
+		if self._npaFeatureAbundance is None or self._dictTableMetadata is None:
+			return []
+		#Get unique metadata values to stratify by
+		lsMetadata = self._dictTableMetadata.get(strMetadata,[])
+		setValues = set(lsMetadata)
+		#If there is only one metadata value then no need to stratify so return the original in the list (and write if needed)
+		if len(setValues) == 0:
+		  return []
+		retlAbundanceTables = []
+		dictAbundanceBlocks = dict()
+		#Given here there are multiple metadata values, continue to stratify
+		lsNames = self.funcGetSampleNames()
+		#Get index of values to break up
+		for value in setValues:
+			lfDataIndex = [sData==value for sData in lsMetadata]
+			#Get abundance data for the metadata value
+			#The true is added to keep the first column which should be the feature id
+			npaStratfiedAbundance = self._npaFeatureAbundance[[self.funcGetIDMetadataName()]+list(np.compress(lfDataIndex,lsNames))]
+			#Get metadata for the metadata value
+			dictStratifiedMetadata = dict()
+			for metadataType in self._dictTableMetadata:
+				dictValues = self.funcGetMetadata(metadataType)
+				dictStratifiedMetadata[metadataType] = np.compress(lfDataIndex,dictValues).tolist()
+			#Make abundance table
+			#Add abundance table to the list
+			lsNamePieces = os.path.splitext(self._strOriginalName)
+			objStratifiedAbundanceTable = AbundanceTable(npaAbundance=npaStratfiedAbundance, dictMetadata=dictStratifiedMetadata,
+				strName=lsNamePieces[0] + "-StratBy-" + value+lsNamePieces[1],
+				strLastMetadata=self.funcGetLastMetadataName(),
+				cFeatureNameDelimiter=self._cFeatureDelimiter, cFileDelimiter = self._cDelimiter)
+			if fWriteToFile:
+				objStratifiedAbundanceTable.funcWriteToFile(lsNamePieces[0] + "-StratBy-" + value+lsNamePieces[1])
+			#Append abundance table to returning list
+			retlAbundanceTables.append(objStratifiedAbundanceTable)
+		return retlAbundanceTables
+	#Happy Path Tested
+	def funcTranslateIntoMetadata(self, lsValues, sMetadataFrom, sMetadataTo, fFromPrimaryIds=True):
+		"""
+		Takes the given data values in one metadata and translates it to values in another
+		metadata of the sample samples holding the values of the first metadata
+		FPrimaryIds, if true the sMetadataFrom are checked for unique values,
+		If FPrimaryIds is not true, duplicate values can stop the preservation of order
+		Or may cause duplication in the "to" group. This is not advised.
+		if the sMetadataFrom has any duplicates the function fails and return false.
+		:param	lsValues:	Values to translate.
+		:type:	List	List of values.
+		:param	sMetadataFrom:	The metadata the lsValues come from.
+		:type:	String	ID for the metadata.
+		:param	sMetadataTo:	The metadata the lsValues will be translated into keeping the samples the same.
+		:type:	String	ID for the metadata.
+		:param	fFromPrimaryIds:	The metadata that are in the from metadata list must be unique in each sample.
+		:type:	Boolean	True indicates the metadata list should be unique in each sample. Otherwise a false will return.
+		:return List:	List of new values or False on error.
+		"""
+		#Get metadata
+		lFromMetadata = self.funcGetMetadata(sMetadataFrom)
+		if not lFromMetadata:
+				sys.stderr.write( "Abundancetable::funcTranlateIntoMetadata. Did not receive lFromMetadata.\n" )
+				return False
+		lToMetadata = self.funcGetMetadata(sMetadataTo)
+		if not lToMetadata:
+				sys.stderr.write( "Abundancetable::funcTranlateIntoMetadata. Did not receive lToMetadata.\n" )
+				return False
+		#Check to see if the values are unique if indicated to do so
+		if fFromPrimaryIds:
+			if not len(lFromMetadata) == len(set(lFromMetadata)):
+				sys.stderr.write( "Abundancetable::funcTranlateIntoMetadata. sMetadataFrom did not have unique values.\n" )
+				return False
+		#Translate over
+		if lFromMetadata and lToMetadata:
+			return [lToMetadata[iIndex] for iIndex in [lFromMetadata.index(value) for value in lsValues]]
+		return False
+	#Happy path tested
+	def funcToArray(self):
+		"""
+		Returns a numpy array of the current Abundance Table.
+		Removes the first ID head column and the numpy array is
+		Made of lists, not tuples.
+		:return Numpy Array:	np.array([[float,float,...],[float,float,...],[float,float,...]])
+								None is returned on error.
+		"""
+		if not self._npaFeatureAbundance == None:
+			return np.array([list(tplRow)[1:] for tplRow in self._npaFeatureAbundance],'float')
+		return None
+	#Happy Path tested
+	def funcWriteToFile(self, xOutputFile, cDelimiter=None, cFileType=ConstantsBreadCrumbs.c_strPCLFile):
+		"""
+		Writes the AbundanceTable to a file strOutputFile.
+		Will rewrite over a file as needed.
+		Will use the cDelimiter to delimit columns if provided.
+		:param	xOutputFile:	File stream or File path to write the file to.
+		:type:	String	File Path
+		:param	cDelimiter:	Delimiter for the output file.
+		:type:	Character	If cDlimiter is not specified, the internally stored file delimiter is used.
+		"""
+		if not xOutputFile:
+			return
+		# Check delimiter argument
+		if not cDelimiter:
+			cDelimiter = self._cDelimiter
+		#  Check file type: If pcl: Write pcl file; If biom: write biom file;  If None - write pcl file
+		if(cFileType == None):
+				cFileType == ConstantsBreadCrumbs.c_strPCLFile
+		if(cFileType == ConstantsBreadCrumbs.c_strPCLFile):
+			# Write as a pcl file
+			self._funcWritePCLFile(xOutputFile, cDelimiter=cDelimiter)
+		elif(cFileType == ConstantsBreadCrumbs.c_strBiomFile):
+			#Write as a biom  file
+			self._funcWriteBiomFile(xOutputFile)
+		return
+	def _funcWritePCLFile(self, xOutputFile, cDelimiter=None):
+		"""
+		Write an abundance table object as a PCL file.
+		:param	xOutputFile:	File stream or File path to write the file to.
+		:type:	String	File Path
+		:param	cDelimiter:	Delimiter for the output file.
+		:type:	Character	If cDlimiter is not specified, the internally stored file delimiter is used.
+		"""
+		f = csv.writer(open( xOutputFile, "w" ) if isinstance(xOutputFile, str) else xOutputFile, csv.excel_tab, delimiter=cDelimiter)
+		# Get Row metadata id info (IDs for column header, keys that line up with the ids)
+		lsRowMetadataIDs, lsRowMetadataIDKeys = self.rwmtRowMetadata.funcMakeIDs() if self.rwmtRowMetadata else [[],[]]
+		#Write Ids
+		f.writerows([[self.funcGetIDMetadataName()]+lsRowMetadataIDs+list(self.funcGetSampleNames())])
+		#Write column metadata
+		lsKeys = list(set(self._dictTableMetadata.keys())-set([self.funcGetIDMetadataName(),self.funcGetLastMetadataName()]))
+		lMetadataIterations = list(set(lsKeys+[self.funcGetLastMetadataName()] ))
+		f.writerows([[sMetaKey]+([ConstantsBreadCrumbs.c_strEmptyDataMetadata]*len(lsRowMetadataIDs))+self.funcGetMetadata(sMetaKey) for sMetaKey in lMetadataIterations if sMetaKey != self.funcGetIDMetadataName() and not sMetaKey is None])
+		#Write abundance
+		lsOutput = list()
+		curAbundance = self._npaFeatureAbundance.tolist()
+		for curAbundanceRow in curAbundance:
+			# Make feature metadata, padding with NA as needed
+			lsMetadata = []
+			for sMetadataId in lsRowMetadataIDKeys:
+				lsMetadata = lsMetadata + self.rwmtRowMetadata.funGetFeatureMetadata( curAbundanceRow[0], sMetadataId )
+				lsMetadata = lsMetadata + ( [ ConstantsBreadCrumbs.c_strEmptyDataMetadata ] *
+					( self.rwmtRowMetadata.dictMetadataIDs.get( sMetadataId, 0 ) - len( lsMetadata ) ) )
+			f.writerows([[curAbundanceRow[0]]+lsMetadata+[str(curAbundanceElement) for curAbundanceElement in curAbundanceRow[1:]]])
+		return
+	def _funcWriteBiomFile(self, xOutputFile):
+		"""
+		Write an abundance table object as a Biom file.
+		:param	xOutputFile:	File stream or File path to write the file to.
+		:type:	String	File Path
+		"""
+		#**************************
+		# Get Sample Names        *
+		#**************************
+		lSampNames = list(self.funcGetSampleNames())
+		#**************************
+		# Metadata Names          *
+		#**************************
+		dictMetadataCopy = self.funcGetMetadataCopy()
+		lMetaData = list()
+		iKeysCounter = 0
+		for lMetadataCopyEntry in dictMetadataCopy.iteritems():
+			iKeysCounter +=1
+			sMetadataName = lMetadataCopyEntry[0]
+			lMetadataEntries = lMetadataCopyEntry[1]
+			iMetadataEntryCounter =  -1
+			for sMetadataEntry in lMetadataEntries:
+				iMetadataEntryCounter+=1
+				dictMetadataNames = dict()
+				dictMetadataNames[sMetadataName ] = sMetadataEntry
+				if iKeysCounter == 1:
+					lMetaData.append(dictMetadataNames)
+				else:
+					lMetaData[iMetadataEntryCounter][sMetadataName ] = sMetadataEntry
+		#**************************
+		# Observation Ids         *
+		# and row metadata        *
+		#**************************
+		bTaxonomyInRowsFlag = False
+		if  self.rwmtRowMetadata.dictRowMetadata  is not None:
+				bTaxonomyInRowsFlag = True
+		lObservationMetadataTable = list()
+		lObservationIds = list()
+		lFeatureNamesResultArray = self.funcGetFeatureNames()
+		for sFeatureName  in  lFeatureNamesResultArray:
+			lObservationIds.append(sFeatureName)
+			if self.rwmtRowMetadata and self.rwmtRowMetadata.dictRowMetadata:
+				RowMetadataEntry = self.rwmtRowMetadata.dictRowMetadata[sFeatureName][ConstantsBreadCrumbs.c_metadata_lowercase]
+				lObservationMetadataTable.append( RowMetadataEntry )
+		#**************************
+		# Data                    *
+		#**************************
+		lData = list()
+		lAbundanceCopyResultArray = self.funcGetAbundanceCopy()
+		for r in lAbundanceCopyResultArray:
+			lr = list(r)
+			lr.pop(0)	#Remove metadata
+			lAbundanceValues = list()
+			for AbundanceEntry in lr:
+				flAbundanceEntry = float(AbundanceEntry)
+				lAbundanceValues.append(flAbundanceEntry)
+			lData.append(lAbundanceValues)
+		arrData = array(lData)  #Convert list to array
+		#**************************
+		# Invoke the              *
+		# biom table factory      *
+		#**************************
+		if  bTaxonomyInRowsFlag == False:
+			BiomTable = table_factory(arrData,
+							  lSampNames,
+							  lObservationIds,
+							  lMetaData,
+							  constructor=SparseOTUTable)
+		else:				#There was metadata in the rows
+			BiomTable = table_factory(arrData,
+							  lSampNames,
+							  lObservationIds,
+							  lMetaData,
+							  lObservationMetadataTable if len(lObservationMetadataTable) > 0 else None,
+							  constructor=SparseOTUTable)
+		#**************************
+		# Generate biom Output    *
+		#**************************
+		f = open( xOutputFile, "w" ) if isinstance(xOutputFile, str) else xOutputFile
+		f.write(BiomTable.getBiomFormatJsonString(ConstantsBreadCrumbs.c_biom_file_generated_by))
+		f.close()
+		return
+	#Testing Status: 1 Happy path test
+	@staticmethod
+	def funcPairTables(strFileOne, strFileTwo, strIdentifier, cDelimiter, strOutFileOne, strOutFileTwo, lsIgnoreValues=None):
+		"""
+		This method will read in two files and abridge both files (saved as new files)
+		to just the samples in common between the two files given a common identifier.
+		***If the identifier is not unique in each data set, the first sample with the pairing id is taken so make sure the ID is unique.
+		Expects the files to have the sample delimiters.
+		:param	strFileOne:	Path to file one to be paired.
+		:type:	String	File path.
+		:param	strFileTwo:	Path to file two to be paired.
+		:type:	String	File path.
+		:param	strIdentifier:	Metadata ID that is used for pairing.
+		:type:	String	Metadata ID.
+		:param	cDelimiter:	Character delimiter to read the files.
+		:type:	Character	Delimiter.
+		:param	strOutFileOne:	The output file for the paired version of the first file.
+		:type:	String	File path.
+		:param	strOutFileTwo:	The output file for the paired version of the second file.
+		:type:	String	File path.
+		:param	lsIgnoreValues:	These values are ignored even if common IDs between the two files.
+		:type:	List	List of strings.
+		:return	Boolean:	Indicator of no errors.
+							  False indicates errors.
+		"""
+		#Validate parameters
+		if(not ValidateData.funcIsValidFileName(strFileOne)):
+			sys.stderr.write( "AbundanceTable:checkRawDataFile::Error, file not valid. File:" + strFileOne + "\n" )
+			return False
+		#Validate parameters
+		if(not ValidateData.funcIsValidFileName(strFileTwo)):
+			sys.stderr.write( "AbundanceTable:checkRawDataFile::Error, file not valid. File:"+ strFileTwo + "\n" )
+			return False
+		#Make file one
+		#Read in file
+		istm = csv.reader(open(strFileOne,'rU'), csv.excel_tab, delimiter=cDelimiter)
+		lsContentsOne = [lsRow for lsRow in istm]
+		#Get the file identifier for file one
+		fileOneIdentifier = None
+		for sLine in lsContentsOne:
+			if sLine[0] == strIdentifier:
+				fileOneIdentifier = sLine
+				break
+		#Make file two
+		#Read in file
+		istm = csv.reader(open(strFileTwo,'rU'), csv.excel_tab, delimiter=cDelimiter)
+		lsContentsTwo = [lsRow for lsRow in istm]
+		#Get the file identifier for file two
+		fileTwoIdentifier = None
+		for sLine in lsContentsTwo:
+			if sLine[0] == strIdentifier:
+				fileTwoIdentifier = sLine
+				break
+		#Get what is in common between the identifiers
+		#And find which columns to keep in the tables based on the common elements
+		setsCommonIdentifiers = set(fileOneIdentifier) & set(fileTwoIdentifier)
+		if lsIgnoreValues:
+			setsCommonIdentifiers = setsCommonIdentifiers - set(lsIgnoreValues)
+		#Get positions of common identifiers in each data set, if the identifier is not unique in a date set just take the first index
+		lfFileOneIDIndexes = [fileOneIdentifier.index(sCommonID) for sCommonID in setsCommonIdentifiers]
+		lfFileTwoIDIndexes = [fileTwoIdentifier.index(sCommonID) for sCommonID in setsCommonIdentifiers]
+		#Convert index list to list of boolean
+		lfFileOneElements = [iIndex in lfFileOneIDIndexes for iIndex, sIdentifier in enumerate(fileOneIdentifier)]
+		lfFileTwoElements = [iIndex in lfFileTwoIDIndexes for iIndex, sIdentifier in enumerate(fileTwoIdentifier)]
+		#Write out file one
+		ostm = csv.writer(open(strOutFileOne,'w'), csv.excel_tab, delimiter=cDelimiter)
+		(ostm.writerows([np.compress(lfFileOneElements,sLine) for sLine in lsContentsOne]))
+		#Write out file two
+		ostm = csv.writer(open(strOutFileTwo,'w'), csv.excel_tab, delimiter=cDelimiter)
+		(ostm.writerows([np.compress(lfFileTwoElements,sLine) for sLine in lsContentsTwo]))
+		return True
+	#Testing Status: Light happy path testing
+	@staticmethod
+	def funcStratifyAbundanceTableByMetadata(strInputFile = None, strDirectory = "", cDelimiter = ConstantsBreadCrumbs.c_cTab, iStratifyByRow = 1, llsGroupings = []):
+		"""
+		Splits an abundance table into multiple abundance tables stratified by the metadata
+		:param	strInputFile:	String file path to read in and stratify.
+		:type:	String	File path.
+		:param	strDirectory:	Output directory to write stratified files.
+		:type:	String	Output directory path.
+		:param	cDelimiter:	The delimiter used in the adundance file.
+		:type:	Character	Delimiter.
+		:param	iStratifyByRow:	The row which contains the metadata to use in stratification.
+		:type:	Integer	Positive integer index.
+		:param	llsGroupings:	A list of string lists where each string list holds values that are equal and should be grouped together.
+								So for example, if you wanted to group metadata "1", "2", and "3" seperately but "4" and "5" together you would
+								Give the following [["4","5"]].
+								If you know what "1" and "3" also together you would give [["1","3"],["4","5"]]
+		:type	List	List of list of strings
+		:return	Boolean:	Indicator of NO error.
+							False indicates an error.
+		"""
+		#Validate parameters
+		if(not ValidateData.funcIsValidFileName(strInputFile)):
+			sys.stderr.write( "AbundanceTable:stratifyAbundanceTableByMetadata::Error, file not valid. File:" + strInputFile + "\n" )
+			return False
+		if(not ValidateData.funcIsValidStringType(cDelimiter)):
+			sys.stderr.write( "AbundanceTable:stratifyAbundanceTableByMetadata::Error, Delimiter is not a valid string/char type. Delimiter =" + cDelimiter + "\n" )
+			return False
+		if(not ValidateData.funcIsValidPositiveInteger(iStratifyByRow, tempZero = True) and (not ValidateData.funcIsValidString(iStratifyByRow))):
+			sys.stderr.write( "AbundanceTable:stratifyAbundanceTableByMetadata::Error, Stratify by row is not a positive integer or string keyword. Row =" +
+				str(iStratifyByRow) + ".\n" )
+			return False
+		#Get the base of the file path
+		#This is dependent on the given output directory and the prefix of the file name of the input file
+		#If no output file is given then the input file directory is used.
+		baseFilePath = strDirectory
+		lsFilePiecesExt = os.path.splitext(strInputFile)
+		if baseFilePath:
+			baseFilePath = baseFilePath + os.path.splitext(os.path.split(strInputFile)[1])[0]
+		else:
+			baseFilePath = lsFilePiecesExt[0]
+		#Read in file
+		istm = csv.reader(open(strInputFile,'rU'), csv.excel_tab, delimiter=cDelimiter)
+		sFileContents = [lsRow for lsRow in istm]
+		#Collect metadata
+		metadataInformation = dict()
+		#If the tempStratifyRow is by key word than find the index
+		if ValidateData.funcIsValidString(iStratifyByRow):
+			for iLineIndex, strLine in enumerate(sFileContents):
+				if strLine[0].strip("\"") == iStratifyByRow:
+					iStratifyByRow = iLineIndex
+					break
+		#Stratify by metadata row
+		#Split metadata row into metadata entries
+		#And put in a dictionary containing {"variable":[1,2,3,4 column index]}
+		stratifyByRow = sFileContents[iStratifyByRow]
+		for metaDataIndex in xrange(1,len(stratifyByRow)):
+			metadata = stratifyByRow[metaDataIndex]
+			#Put all wierd categories, none, whitespace, blank space metadata cases into one bin
+			if not metadata or metadata in string.whitespace:
+				metadata = "Blank"
+			#Remove any extraneous formatting
+			metadata = metadata.strip(string.whitespace)
+			#Store processed metadata with column occurence in dictionary
+			if(not metadata in metadataInformation):
+				metadataInformation[metadata] = []
+			metadataInformation[metadata].append(metaDataIndex)
+		#For each of the groupings
+		#Use the first value as the primary value which the rest of the values in the list are placed into
+		#Go through the dict holding the indices and extend the list for the primary value with the secondary values
+		#Then set the secondary value list to empty so that it will be ignored.
+		if llsGroupings:
+			for lSKeyGroups in llsGroupings:
+				if len(lSKeyGroups) > 1:
+					for sGroup in lSKeyGroups[1:]:
+						if sGroup in metadataInformation:
+							metadataInformation[lSKeyGroups[0]].extend(metadataInformation[sGroup])
+							metadataInformation[sGroup] = []
+		#Stratify data
+		stratifiedAbundanceTables = dict()
+		for tableRow in sFileContents:
+			if(len(tableRow)> 1):
+				for metadata in metadataInformation:
+					#[0] includes the taxa line
+					columns = metadataInformation[metadata]
+					if columns:
+						columns = [0] + columns
+						lineList = list()
+						for column in columns:
+							lineList.append(tableRow[column])
+						stratifiedAbundanceTables.setdefault(metadata,[]).append(lineList)
+		#Write to file
+		lsFilesWritten = []
+		for metadata in stratifiedAbundanceTables:
+			sOutputFile = baseFilePath+"-by-"+metadata.strip("\"")+lsFilePiecesExt[1]
+			f = csv.writer(open(sOutputFile,'w'), csv.excel_tab, delimiter = cDelimiter )
+			f.writerows(stratifiedAbundanceTables[metadata])
+			lsFilesWritten.append(sOutputFile)
+		return lsFilesWritten
+	#*******************************************
+	#* biom interface functions:               *
+	#* 1. _funcBiomToStructuredArray           *
+	#* 2. _funcDecodeBiomMetadata              *
+	#*******************************************
+	@staticmethod
+	def _funcBiomToStructuredArray(xInputFile = None):
+		"""
+		Reads the biom input file and builds a "BiomCommonArea"  that contains:
+		1.BiomCommonArea['sLastMetadata'] - This is the name of the last Metadata (String)
+		2.BiomCommonArea['BiomTaxData']- dict() - going to be used as  lcontents[0]==TaxData
+		3.BiomCommonArea['Metadata']   - dict() -  going to be used as lcontents[1]==MetaData
+		4.BiomCommonArea['BiomFileInfo'] - dict() - going to be used as lcontents[2]==FileInfo (id, format:eg. Biological Observation Matrix 0.9.1) etc.
+		5.BiomCommonArea['column_metadata_id'] - This is a string which is the name of the column id
+		:param	xInputFile:	File path of biom file to read.
+		:type:	String	File path.
+		:return:   BiomCommonArea  (See description above)
+		:type:	dict()
+		"""
+		#*******************************************
+		#* Build the metadata                      *
+		#*******************************************
+		try:
+			BiomTable = parse_biom_table(open(xInputFile,'U') if isinstance(xInputFile, str) else xInputFile)	#Import the biom file
+		except:
+			print("Failure decoding biom file - please check your input biom file and rerun")
+			BiomCommonArea = None
+			return BiomCommonArea
+		BiomCommonArea = dict()
+		dBugNames = list()			#Bug Names Table
+		dRowsMetadata = None		#Initialize the np.array of the Rows metadata
+		BiomElements  =  BiomTable.getBiomFormatObject('')
+		for BiomKey, BiomValue in BiomElements.iteritems():
+		#****************************************************
+		#*     Checking the different keys:  format,        *
+		#*     rows, columns, date, generated_by            *
+		#****************************************************
+			if (BiomKey == ConstantsBreadCrumbs.c_strFormatKey
+			or BiomKey == ConstantsBreadCrumbs.c_strFormatUrl
+			or BiomKey == ConstantsBreadCrumbs.c_MatrixTtype
+			or BiomKey == ConstantsBreadCrumbs.c_strTypekey
+			or BiomKey == ConstantsBreadCrumbs.c_strIDKey #Same as below
+			or BiomKey == ConstantsBreadCrumbs.c_GeneratedBy  #<---Need to follow up with Biom as always BiomValue = "" even though in the file has a value
+			or BiomKey == ConstantsBreadCrumbs.c_strDateKey):  #Same as above
+				BiomCommonArea = AbundanceTable._funcInsertKeyToCommonArea(BiomCommonArea, BiomKey, BiomValue)
+			if BiomKey == ConstantsBreadCrumbs.c_rows:
+				iMaxIdLen = 0
+				for iIndexRowMetaData in range(0, len(BiomValue)):
+					if ConstantsBreadCrumbs.c_id_lowercase in BiomValue[iIndexRowMetaData]:
+						sBugName = BiomValue[iIndexRowMetaData][ConstantsBreadCrumbs.c_id_lowercase]
+						dBugNames.append(sBugName) 	 #Post to the bug table
+						if len(sBugName) > iMaxIdLen:    #We  are calculating dynamically the length of the ID
+							iMaxIdLen  =  len(sBugName)
+				if ConstantsBreadCrumbs.c_metadata_lowercase in BiomValue[0] and BiomValue[0][ConstantsBreadCrumbs.c_metadata_lowercase] != None :
+					dRowsMetadata = AbundanceTable._funcBiomBuildRowMetadata(BiomValue,  iMaxIdLen )
+			if BiomKey == ConstantsBreadCrumbs.c_columns:
+				BiomCommonArea = AbundanceTable._funcDecodeBiomMetadata(BiomCommonArea, BiomValue, iMaxIdLen)	#Call the subroutine to Build the metadata
+		#*******************************************
+		#* Build the TaxData                       *
+		#*******************************************
+		BiomTaxDataWork = list()			#Initlialize TaxData
+		BiomObservations = BiomTable.iterObservations(conv_to_np=True)		#Invoke biom method to fetch data from the biom file
+		for BiomObservationData in BiomObservations:
+			sBugName = str( BiomObservationData[1])
+			BiomTaxDataEntry = list()
+			BiomTaxDataEntry.append(sBugName)
+			BiomObservationsValues = BiomObservationData[0]
+			for BiomDataValue in BiomObservationsValues:
+				BiomTaxDataEntry.append(BiomDataValue)
+			BiomTaxDataWork.append(tuple(BiomTaxDataEntry))
+		BiomCommonArea[ConstantsBreadCrumbs.c_BiomTaxData] = np.array(BiomTaxDataWork,dtype=np.dtype(BiomCommonArea[ConstantsBreadCrumbs.c_Dtype]))
+		BiomCommonArea[ConstantsBreadCrumbs.c_dRowsMetadata] = RowMetadata(dRowsMetadata)
+		del(BiomCommonArea[ConstantsBreadCrumbs.c_Dtype])			#Not needed anymore
+		return BiomCommonArea
+	@staticmethod
+	def _funcDecodeBiomMetadata(BiomCommonArea,  BiomValue = None,  iMaxIdLen=0 ):
+		"""
+		Decode the Biom Metadata and build:
+			1. BiomCommonArea['Metadata']
+			2. BiomCommonArea['Dtype']
+			3. BiomCommonArea['sLastMetadata']
+			4. BiomCommonArea['column_metadata_id'] - This is a string which is the name of the column id
+			These elements will be formatted and passed down the line to build the AbundanceTable
+		:param	BiomValue:	The "columns" Metadata from the biom file (Contains the Metadata information)
+		:type:	dict()
+		:param	iMaxIdLen:	 The maximum length of a row ID
+		:type:	Integer
+		:return:   BiomCommonArea
+		:type:	dict()
+		"""
+		BiomCommonArea[ConstantsBreadCrumbs.c_sLastMetadata] = None	#Initialize the LastMetadata element
+		BiomCommonArea['dRowsMetadata'] = None				#Initialize for cases that there is no metadata in the rows
+		strLastMetadata = None
+		strIDMetadata = None
+		lenBiomValue = len(BiomValue)
+		BiomMetadata = dict()
+		for cntMetadata in range(0, lenBiomValue):
+			BiomMetadataEntry = BiomValue[cntMetadata]
+			for key, value in BiomMetadataEntry.iteritems(): 		#Loop on the entries
+				if key == ConstantsBreadCrumbs.c_id_lowercase:		#If id - process it
+					strIDMetadata = ConstantsBreadCrumbs.c_ID
+					if  ConstantsBreadCrumbs.c_ID  not in BiomMetadata:	#If ID  not in the common area - initalize it
+						BiomMetadata[ConstantsBreadCrumbs.c_ID] = list() #Initialize a list
+						for indx in range(0, lenBiomValue):			#And post the values
+							BiomMetadata[ConstantsBreadCrumbs.c_ID].append(None)
+					BiomMetadata[ConstantsBreadCrumbs.c_ID][cntMetadata] = value.encode(ConstantsBreadCrumbs.c_ascii,ConstantsBreadCrumbs.c_ignore)
+				if  key == ConstantsBreadCrumbs.c_metadata_lowercase:		#If key = metadata
+					if  not value is None:					#And value is not empty
+						MetadataDict = value				#Initialize a dictionary and post the values
+						for MDkey, MDvalue in MetadataDict.iteritems():
+							if type(MDkey) == unicode :
+								MDkeyAscii = MDkey.encode(ConstantsBreadCrumbs.c_ascii,ConstantsBreadCrumbs.c_ignore)
+							else:
+								MDkeyAscii = MDkey
+							if type(MDvalue) == unicode:
+								MDvalueAscii = MDvalue.encode(ConstantsBreadCrumbs.c_ascii,ConstantsBreadCrumbs.c_ignore)
+							else:
+								MDvalueAscii = MDvalue
+							if  len(MDkeyAscii) > 0:		#Search for the last metadata
+									if not strIDMetadata:
+										strIDMetadata = MDkeyAscii
+									BiomCommonArea[ConstantsBreadCrumbs.c_sLastMetadata] =  MDkeyAscii #Set the last Metadata
+							if  MDkeyAscii  not in BiomMetadata:
+								BiomMetadata[MDkeyAscii] = list()
+								for indx in range(0, lenBiomValue):
+									BiomMetadata[MDkeyAscii].append(None)
+							BiomMetadata[MDkeyAscii][cntMetadata] = MDvalueAscii
+		BiomCommonArea[ConstantsBreadCrumbs.c_Metadata] = BiomMetadata
+		BiomCommonArea[ConstantsBreadCrumbs.c_MetadataID] = strIDMetadata
+		#**********************************************
+		#*    Build dtype                             *
+		#**********************************************
+		BiomDtype = list()
+		iMaxIdLen+=10 #Increase it by 10
+		BiomDtypeEntry = list()
+		FirstValue = ConstantsBreadCrumbs.c_ID
+		SecondValue = "a" + str(iMaxIdLen)
+		BiomDtypeEntry.append(FirstValue)
+		BiomDtypeEntry.append(SecondValue)
+		BiomDtype.append(tuple(BiomDtypeEntry))
+		for a in BiomMetadata[ConstantsBreadCrumbs.c_ID]:
+				BiomDtypeEntry = list()
+				FirstValue =  a.encode(ConstantsBreadCrumbs.c_ascii,ConstantsBreadCrumbs.c_ignore)
+				SecondValue = ConstantsBreadCrumbs.c_f4
+				BiomDtypeEntry.append(FirstValue)
+				BiomDtypeEntry.append(SecondValue)
+				BiomDtype.append(tuple(BiomDtypeEntry))
+		BiomCommonArea[ConstantsBreadCrumbs.c_Dtype] = BiomDtype
+		return BiomCommonArea
+	@staticmethod
+	def _funcBiomBuildRowMetadata( BiomValue, iMaxIdLen ):
+		"""
+		Builds the row metadata from a BIOM value
+		:param	BiomValue:	BIOM Value from the BIOM JSON parsing
+		:type:			Complex dict of string pairs and dicts
+		:param	iMaxIdLen:	Maximum length of all the IDs
+		:type:			int
+		:return:		dictRowsMetadata - np Array containing the rows metadata
+		:type:			{string feature id: {'metadata': {'taxonomy': [list of metadata values]}}}
+		"""
+		# Build the input dict for RowMetadata from a dict of dicts from a BIOM file
+		dictRowsMetadata = dict()
+		for iIndexRowMetaData in range(0, len(BiomValue)):
+			dictRowsMetadata[str(BiomValue[iIndexRowMetaData][ConstantsBreadCrumbs.c_id_lowercase])] = dict()
+			RowMetadataEntryFromTable = BiomValue[iIndexRowMetaData][ConstantsBreadCrumbs.c_metadata_lowercase]
+			dMetadataTempDict = dict()
+			for key, value in RowMetadataEntryFromTable.iteritems():
+				dMetadataTempDict[key] = value
+			dictRowsMetadata[str(BiomValue[iIndexRowMetaData][ConstantsBreadCrumbs.c_id_lowercase])][ConstantsBreadCrumbs.c_metadata_lowercase] = dMetadataTempDict
+		return dictRowsMetadata
+	@staticmethod
+	def _funcInsertKeyToCommonArea(BiomCommonArea, BiomKey, BiomValue):
+		"""
+		Inserts the keys into the BiomCommonArea["BiomFileInfo"]
+		:param	BiomCommonArea   - The common area that has been built before
+		:type:	dict()
+		:param	BiomKey - The current key (eg. format, date, generated by)
+		:type:	str
+		:param	BiomValue - The current value of the key (eg. for format: "Biological Observation Matrix 0.9.1")
+		:type:	str
+		:return:   BiomCommonArea  - The updated common area
+		:type:	dict()
+		"""
+		if ConstantsBreadCrumbs.c_BiomFileInfo not in BiomCommonArea:
+				BiomCommonArea[ConstantsBreadCrumbs.c_BiomFileInfo] = dict()
+		strInsertKey = BiomKey			#Set Default - But it is now always the same... (eg. URL is not: format_url -->url and others)
+		PostBiomValue = BiomValue		#The default value to be posted
+		if  BiomKey == ConstantsBreadCrumbs.c_strFormatUrl:
+			strInsertKey = ConstantsBreadCrumbs.c_strURLKey
+		if  BiomKey == ConstantsBreadCrumbs.c_MatrixTtype:
+			strInsertKey = ConstantsBreadCrumbs.c_strSparsityKey
+		if  BiomKey == ConstantsBreadCrumbs.c_GeneratedBy:
+			PostBiomValue = None
+		if  BiomKey == ConstantsBreadCrumbs.c_strDateKey:
+			PostBiomValue = None
+		BiomCommonArea[ConstantsBreadCrumbs.c_BiomFileInfo][strInsertKey] = PostBiomValue
+		return BiomCommonArea

Mercurial > repos > sagun98 > micropita_v2

comparison src/breadcrumbs/src/AbundanceTable.py @ 0:0de566f21448 draft default tip