0
+ − 1 #!/usr/bin/env python
+ − 2 """
+ − 3 Author: Timothy Tickle
+ − 4 Description: Make PCoA of an abundance file
+ − 5 """
+ − 6
+ − 7 __author__ = "Timothy Tickle"
+ − 8 __copyright__ = "Copyright 2012"
+ − 9 __credits__ = ["Timothy Tickle"]
+ − 10 __license__ = ""
+ − 11 __version__ = ""
+ − 12 __maintainer__ = "Timothy Tickle"
+ − 13 __email__ = "ttickle@sph.harvard.edu"
+ − 14 __status__ = "Development"
+ − 15
+ − 16 import sys
+ − 17 import argparse
+ − 18 from src.AbundanceTable import AbundanceTable
+ − 19 from src.Metric import Metric
+ − 20 import csv
+ − 21 import os
+ − 22 from src.PCoA import PCoA
+ − 23
+ − 24 #Set up arguments reader
+ − 25 argp = argparse.ArgumentParser( prog = "scriptPcoa.py",
+ − 26 description = """PCoAs an abundance file given a metadata.\nExample:python scriptPcoa.py -i TID -l STSite""" )
+ − 27
+ − 28 #Arguments
+ − 29 #For table
+ − 30 argp.add_argument("-i","--id", dest="sIDName", default="ID", help="Abundance Table ID")
+ − 31 argp.add_argument("-l","--meta", dest="sLastMetadataName", help="Last metadata name")
+ − 32 argp.add_argument("-d","--fDelim", dest= "cFileDelimiter", action= "store", default="\t", help="File delimiter, default tab")
+ − 33 argp.add_argument("-f","--featureDelim", dest="cFeatureNameDelimiter", action= "store", metavar="Feature Name Delimiter", default="|", help="Feature delimiter")
+ − 34
+ − 35 argp.add_argument("-n","--doNorm", dest="fDoNormData", action="store_true", default=False, help="Flag to turn on normalization")
+ − 36 argp.add_argument("-s","--doSum", dest="fDoSumData", action="store_true", default=False, help="Flag to turn on summation")
+ − 37
+ − 38 argp.add_argument("-p","--paint", dest="sLabel", metavar= "Label", default=None, help="Label to paint in the PCoA")
+ − 39 argp.add_argument("-m","--metric", dest="strMetric", metavar = "distance", default = PCoA.c_BRAY_CURTIS, help ="Distance metric to use. Pick from braycurtis, canberra, chebyshev, cityblock, correlation, cosine, euclidean, hamming, spearman, sqeuclidean, unifrac_unweighted, unifrac_weighted")
+ − 40 argp.add_argument("-o","--outputFile", dest="strOutFile", metavar= "outputFile", default=None, help="Specify the path for the output figure.")
+ − 41 argp.add_argument("-D","--DistanceMatrix", dest="strFileDistanceMatrix", metavar= "strFileDistanceMatrix", default=None, help="Specify the path for outputing the distance matrix (if interested). Default this will not output.")
+ − 42 argp.add_argument("-C","--CoordinatesMatrix", dest="strFileCoordinatesMatrix", metavar= "strFileCoordinatesMatrix", default=None, help="Specify the path for outputing the x,y coordinates matrix (Dim 1 and 2). Default this will not output.")
+ − 43
+ − 44 # Unifrac arguments
+ − 45 argp.add_argument("-t","--unifracTree", dest="istrmTree", metavar="UnifracTreeFile", default=None, help="Optional file only needed for UniFrac calculations.")
+ − 46 argp.add_argument("-e","--unifracEnv", dest="istrmEnvr", metavar="UnifracEnvFile", default=None, help="Optional file only needed for UniFrac calculations.")
+ − 47 argp.add_argument("-c","--unifracColor", dest="fileUnifracColor", metavar="UnifracColorFile", default = None, help="A text file indicating the groupings of metadata to color. Each line in the file is a group to color. An example file line would be 'GroupName:ID,ID,ID,ID'")
+ − 48
+ − 49 argp.add_argument("strFileAbund", metavar = "Abundance file", nargs="?", help ="Input data file")
+ − 50
+ − 51 args = argp.parse_args( )
+ − 52
+ − 53 #Read in abundance table
+ − 54 abndTable = None
+ − 55 if args.strFileAbund:
+ − 56 abndTable = AbundanceTable.funcMakeFromFile(args.strFileAbund,
+ − 57 cDelimiter = args.cFileDelimiter,
+ − 58 sMetadataID = args.sIDName,
+ − 59 sLastMetadata = args.sLastMetadataName,
+ − 60 cFeatureNameDelimiter= args.cFeatureNameDelimiter)
+ − 61
+ − 62 #Normalize if need
+ − 63 if args.fDoSumData:
+ − 64 abndTable.funcSumClades()
+ − 65
+ − 66 #Sum if needed
+ − 67 if args.fDoNormData:
+ − 68 abndTable.funcNormalize()
+ − 69
+ − 70 #Get the metadata to paint
+ − 71 lsKeys = None
+ − 72 if abndTable:
+ − 73 lsKeys = abndTable.funcGetMetadataCopy().keys() if not args.sLabel else [args.sLabel]
+ − 74
+ − 75 #Get pieces of output file
+ − 76 if not args.strOutFile:
+ − 77 if not args.strFileAbund:
+ − 78 args.strOutFile = os.path.splitext(os.path.basename(args.istrmEnvr))[0]+"-pcoa.pdf"
+ − 79 else:
+ − 80 args.strOutFile = os.path.splitext(os.path.basename(args.strFileAbund))[0]+"-pcoa.pdf"
+ − 81 lsFilePieces = os.path.splitext(args.strOutFile)
+ − 82
+ − 83 # Make PCoA object
+ − 84 # Get PCoA object and plot
+ − 85 pcoa = PCoA()
+ − 86 if(not args.strMetric in [Metric.c_strUnifracUnweighted,Metric.c_strUnifracWeighted]) and abndTable:
+ − 87 pcoa.loadData(abndTable,True)
+ − 88 # Optional args.strFileDistanceMatrix if not none will force a printing of the distance measures to the path in args.strFileDistanceMatrix
+ − 89 pcoa.run(tempDistanceMetric=args.strMetric, iDims=2, strDistanceMatrixFile=args.strFileDistanceMatrix, istrmTree=args.istrmTree, istrmEnvr=args.istrmEnvr)
+ − 90
+ − 91 # Write dim 1 and 2 coordinates to file
+ − 92 if args.strFileCoordinatesMatrix:
+ − 93 lsIds = pcoa.funcGetIDs()
+ − 94 mtrxCoordinates = pcoa.funcGetCoordinates()
+ − 95 csvrCoordinates = csv.writer(open(args.strFileCoordinatesMatrix, 'w'))
+ − 96 csvrCoordinates.writerow(["ID","Dimension_1","Dimension_2"])
+ − 97 for x in xrange(mtrxCoordinates.shape[0]):
+ − 98 strId = lsIds[x] if lsIds else ""
+ − 99 csvrCoordinates.writerow([strId]+mtrxCoordinates[x].tolist())
+ − 100
+ − 101 # Paint metadata
+ − 102 if lsKeys:
+ − 103 for iIndex in xrange(len(lsKeys)):
+ − 104 lsMetadata = abndTable.funcGetMetadata(lsKeys[iIndex])
+ − 105
+ − 106 pcoa.plotList(lsLabelList = lsMetadata,
+ − 107 strOutputFileName = lsFilePieces[0]+"-"+lsKeys[iIndex]+lsFilePieces[1],
+ − 108 iSize=20,
+ − 109 dAlpha=1.0,
+ − 110 charForceColor=None,
+ − 111 charForceShape=None,
+ − 112 fInvert=False,
+ − 113 iDim1=1,
+ − 114 iDim2=2)
+ − 115
+ − 116 if args.strMetric in [Metric.c_strUnifracUnweighted,Metric.c_strUnifracWeighted]:
+ − 117
+ − 118 c_sNotGiven = "Not_specified"
+ − 119
+ − 120 lsIds = pcoa.funcGetIDs()
+ − 121 lsGroupLabels = [c_sNotGiven for s in lsIds]
+ − 122
+ − 123 if args.fileUnifracColor:
+ − 124
+ − 125 # Read color file and make a dictionary to convert ids
+ − 126 lsColorLines = csv.reader(open(args.fileUnifracColor))
+ − 127 dictConvertIDToGroup = {}
+ − 128 for lsLine in lsColorLines:
+ − 129 if lsLine:
+ − 130 sGroupID, sFirstID = lsLine[0].split(":")
+ − 131 dictConvertIDToGroup.update(dict([(sID,sGroupID) for sID in [sFirstID]+lsLine[1:]]))
+ − 132
+ − 133 lsGroupLabels = [dictConvertIDToGroup.get(sID,c_sNotGiven) for sID in lsIds]
+ − 134
+ − 135 pcoa.plotList(lsLabelList = lsGroupLabels,
+ − 136 strOutputFileName = lsFilePieces[0]+"-"+args.strMetric+lsFilePieces[1],
+ − 137 iSize=20,
+ − 138 dAlpha=1.0,
+ − 139 charForceColor=None,
+ − 140 charForceShape=None,
+ − 141 fInvert=False,
+ − 142 iDim1=1,
+ − 143 iDim2=2)