| 
0
 | 
     1 #!/usr/bin/env python
 | 
| 
 | 
     2 """
 | 
| 
 | 
     3 Author: Timothy Tickle
 | 
| 
 | 
     4 Description: Make PCoA of an abundance file
 | 
| 
 | 
     5 """
 | 
| 
 | 
     6 
 | 
| 
 | 
     7 __author__ = "Timothy Tickle"
 | 
| 
 | 
     8 __copyright__ = "Copyright 2012"
 | 
| 
 | 
     9 __credits__ = ["Timothy Tickle"]
 | 
| 
 | 
    10 __license__ = ""
 | 
| 
 | 
    11 __version__ = ""
 | 
| 
 | 
    12 __maintainer__ = "Timothy Tickle"
 | 
| 
 | 
    13 __email__ = "ttickle@sph.harvard.edu"
 | 
| 
 | 
    14 __status__ = "Development"
 | 
| 
 | 
    15 
 | 
| 
 | 
    16 import sys
 | 
| 
 | 
    17 import argparse
 | 
| 
 | 
    18 from src.AbundanceTable import AbundanceTable
 | 
| 
 | 
    19 from src.Metric import Metric
 | 
| 
 | 
    20 import csv
 | 
| 
 | 
    21 import os
 | 
| 
 | 
    22 from src.PCoA import PCoA
 | 
| 
 | 
    23 
 | 
| 
 | 
    24 #Set up arguments reader
 | 
| 
 | 
    25 argp = argparse.ArgumentParser( prog = "scriptPcoa.py",
 | 
| 
 | 
    26     description = """PCoAs an abundance file given a metadata.\nExample:python scriptPcoa.py -i TID -l STSite""" )
 | 
| 
 | 
    27 
 | 
| 
 | 
    28 #Arguments
 | 
| 
 | 
    29 #For table
 | 
| 
 | 
    30 argp.add_argument("-i","--id", dest="sIDName", default="ID", help="Abundance Table ID")
 | 
| 
 | 
    31 argp.add_argument("-l","--meta", dest="sLastMetadataName", help="Last metadata name")
 | 
| 
 | 
    32 argp.add_argument("-d","--fDelim", dest= "cFileDelimiter", action= "store", default="\t", help="File delimiter, default tab")
 | 
| 
 | 
    33 argp.add_argument("-f","--featureDelim", dest="cFeatureNameDelimiter", action= "store", metavar="Feature Name Delimiter", default="|", help="Feature delimiter") 
 | 
| 
 | 
    34 
 | 
| 
 | 
    35 argp.add_argument("-n","--doNorm", dest="fDoNormData", action="store_true", default=False, help="Flag to turn on normalization")
 | 
| 
 | 
    36 argp.add_argument("-s","--doSum", dest="fDoSumData", action="store_true", default=False, help="Flag to turn on summation")
 | 
| 
 | 
    37 
 | 
| 
 | 
    38 argp.add_argument("-p","--paint", dest="sLabel", metavar= "Label", default=None, help="Label to paint in the PCoA")
 | 
| 
 | 
    39 argp.add_argument("-m","--metric", dest="strMetric", metavar = "distance", default = PCoA.c_BRAY_CURTIS, help ="Distance metric to use. Pick from braycurtis, canberra, chebyshev, cityblock, correlation, cosine, euclidean, hamming, spearman, sqeuclidean, unifrac_unweighted, unifrac_weighted")
 | 
| 
 | 
    40 argp.add_argument("-o","--outputFile", dest="strOutFile", metavar= "outputFile", default=None, help="Specify the path for the output figure.")
 | 
| 
 | 
    41 argp.add_argument("-D","--DistanceMatrix", dest="strFileDistanceMatrix", metavar= "strFileDistanceMatrix", default=None, help="Specify the path for outputing the distance matrix (if interested). Default this will not output.")
 | 
| 
 | 
    42 argp.add_argument("-C","--CoordinatesMatrix", dest="strFileCoordinatesMatrix", metavar= "strFileCoordinatesMatrix", default=None, help="Specify the path for outputing the x,y coordinates matrix (Dim 1 and 2). Default this will not output.")
 | 
| 
 | 
    43 
 | 
| 
 | 
    44 # Unifrac arguments
 | 
| 
 | 
    45 argp.add_argument("-t","--unifracTree", dest="istrmTree", metavar="UnifracTreeFile", default=None, help="Optional file only needed for UniFrac calculations.")
 | 
| 
 | 
    46 argp.add_argument("-e","--unifracEnv", dest="istrmEnvr", metavar="UnifracEnvFile", default=None, help="Optional file only needed for UniFrac calculations.")
 | 
| 
 | 
    47 argp.add_argument("-c","--unifracColor", dest="fileUnifracColor", metavar="UnifracColorFile", default = None, help="A text file indicating the groupings of metadata to color. Each line in the file is a group to color. An example file line would be  'GroupName:ID,ID,ID,ID'")
 | 
| 
 | 
    48 
 | 
| 
 | 
    49 argp.add_argument("strFileAbund", metavar = "Abundance file", nargs="?", help ="Input data file")
 | 
| 
 | 
    50 
 | 
| 
 | 
    51 args = argp.parse_args( )
 | 
| 
 | 
    52 
 | 
| 
 | 
    53 #Read in abundance table
 | 
| 
 | 
    54 abndTable = None
 | 
| 
 | 
    55 if args.strFileAbund:
 | 
| 
 | 
    56   abndTable = AbundanceTable.funcMakeFromFile(args.strFileAbund,
 | 
| 
 | 
    57                              cDelimiter = args.cFileDelimiter,
 | 
| 
 | 
    58                              sMetadataID = args.sIDName,
 | 
| 
 | 
    59                              sLastMetadata = args.sLastMetadataName,
 | 
| 
 | 
    60                              cFeatureNameDelimiter= args.cFeatureNameDelimiter)
 | 
| 
 | 
    61 
 | 
| 
 | 
    62   #Normalize if need
 | 
| 
 | 
    63   if args.fDoSumData:
 | 
| 
 | 
    64     abndTable.funcSumClades()
 | 
| 
 | 
    65 
 | 
| 
 | 
    66   #Sum if needed
 | 
| 
 | 
    67   if args.fDoNormData:
 | 
| 
 | 
    68     abndTable.funcNormalize()
 | 
| 
 | 
    69 
 | 
| 
 | 
    70 #Get the metadata to paint
 | 
| 
 | 
    71 lsKeys = None
 | 
| 
 | 
    72 if abndTable:
 | 
| 
 | 
    73   lsKeys = abndTable.funcGetMetadataCopy().keys() if not args.sLabel else [args.sLabel]
 | 
| 
 | 
    74 
 | 
| 
 | 
    75 #Get pieces of output file
 | 
| 
 | 
    76 if not args.strOutFile:
 | 
| 
 | 
    77   if not args.strFileAbund:
 | 
| 
 | 
    78     args.strOutFile = os.path.splitext(os.path.basename(args.istrmEnvr))[0]+"-pcoa.pdf"
 | 
| 
 | 
    79   else:
 | 
| 
 | 
    80     args.strOutFile = os.path.splitext(os.path.basename(args.strFileAbund))[0]+"-pcoa.pdf"
 | 
| 
 | 
    81 lsFilePieces = os.path.splitext(args.strOutFile)
 | 
| 
 | 
    82 
 | 
| 
 | 
    83 # Make PCoA object
 | 
| 
 | 
    84 # Get PCoA object and plot
 | 
| 
 | 
    85 pcoa = PCoA()
 | 
| 
 | 
    86 if(not args.strMetric in [Metric.c_strUnifracUnweighted,Metric.c_strUnifracWeighted]) and abndTable:
 | 
| 
 | 
    87   pcoa.loadData(abndTable,True)
 | 
| 
 | 
    88 # Optional args.strFileDistanceMatrix if not none will force a printing of the distance measures to the path in args.strFileDistanceMatrix
 | 
| 
 | 
    89 pcoa.run(tempDistanceMetric=args.strMetric, iDims=2, strDistanceMatrixFile=args.strFileDistanceMatrix, istrmTree=args.istrmTree, istrmEnvr=args.istrmEnvr)
 | 
| 
 | 
    90 
 | 
| 
 | 
    91 # Write dim 1 and 2 coordinates to file
 | 
| 
 | 
    92 if args.strFileCoordinatesMatrix:
 | 
| 
 | 
    93   lsIds = pcoa.funcGetIDs()
 | 
| 
 | 
    94   mtrxCoordinates = pcoa.funcGetCoordinates()
 | 
| 
 | 
    95   csvrCoordinates = csv.writer(open(args.strFileCoordinatesMatrix, 'w'))
 | 
| 
 | 
    96   csvrCoordinates.writerow(["ID","Dimension_1","Dimension_2"])
 | 
| 
 | 
    97   for x in xrange(mtrxCoordinates.shape[0]):
 | 
| 
 | 
    98     strId = lsIds[x] if lsIds else ""
 | 
| 
 | 
    99     csvrCoordinates.writerow([strId]+mtrxCoordinates[x].tolist())
 | 
| 
 | 
   100 
 | 
| 
 | 
   101 # Paint metadata
 | 
| 
 | 
   102 if lsKeys:
 | 
| 
 | 
   103   for iIndex in xrange(len(lsKeys)):
 | 
| 
 | 
   104     lsMetadata = abndTable.funcGetMetadata(lsKeys[iIndex])
 | 
| 
 | 
   105 
 | 
| 
 | 
   106     pcoa.plotList(lsLabelList = lsMetadata,
 | 
| 
 | 
   107       strOutputFileName = lsFilePieces[0]+"-"+lsKeys[iIndex]+lsFilePieces[1],
 | 
| 
 | 
   108       iSize=20,
 | 
| 
 | 
   109       dAlpha=1.0,
 | 
| 
 | 
   110       charForceColor=None,
 | 
| 
 | 
   111       charForceShape=None,
 | 
| 
 | 
   112       fInvert=False,
 | 
| 
 | 
   113       iDim1=1,
 | 
| 
 | 
   114       iDim2=2)
 | 
| 
 | 
   115 
 | 
| 
 | 
   116 if args.strMetric in [Metric.c_strUnifracUnweighted,Metric.c_strUnifracWeighted]:
 | 
| 
 | 
   117 
 | 
| 
 | 
   118   c_sNotGiven = "Not_specified"
 | 
| 
 | 
   119 
 | 
| 
 | 
   120   lsIds = pcoa.funcGetIDs()
 | 
| 
 | 
   121   lsGroupLabels = [c_sNotGiven for s in lsIds]
 | 
| 
 | 
   122 
 | 
| 
 | 
   123   if args.fileUnifracColor:
 | 
| 
 | 
   124 
 | 
| 
 | 
   125     # Read color file and make a dictionary to convert ids
 | 
| 
 | 
   126     lsColorLines = csv.reader(open(args.fileUnifracColor))
 | 
| 
 | 
   127     dictConvertIDToGroup = {}
 | 
| 
 | 
   128     for lsLine in lsColorLines:
 | 
| 
 | 
   129       if lsLine:
 | 
| 
 | 
   130         sGroupID, sFirstID = lsLine[0].split(":")
 | 
| 
 | 
   131         dictConvertIDToGroup.update(dict([(sID,sGroupID) for sID in [sFirstID]+lsLine[1:]]))
 | 
| 
 | 
   132 
 | 
| 
 | 
   133     lsGroupLabels = [dictConvertIDToGroup.get(sID,c_sNotGiven) for sID in lsIds]
 | 
| 
 | 
   134 
 | 
| 
 | 
   135   pcoa.plotList(lsLabelList = lsGroupLabels,
 | 
| 
 | 
   136       strOutputFileName = lsFilePieces[0]+"-"+args.strMetric+lsFilePieces[1],
 | 
| 
 | 
   137       iSize=20,
 | 
| 
 | 
   138       dAlpha=1.0,
 | 
| 
 | 
   139       charForceColor=None,
 | 
| 
 | 
   140       charForceShape=None,
 | 
| 
 | 
   141       fInvert=False,
 | 
| 
 | 
   142       iDim1=1,
 | 
| 
 | 
   143       iDim2=2)
 |