view smart_toolShed/SMART/Java/Python/plotCsv.py @ 0:e0f8dcca02ed

Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
author yufei-luo
date Thu, 17 Jan 2013 10:52:14 -0500
parents
children
line wrap: on
line source

#! /usr/bin/env python
#
# Copyright INRA-URGI 2009-2010
# 
# This software is governed by the CeCILL license under French law and
# abiding by the rules of distribution of free software. You can use,
# modify and/ or redistribute the software under the terms of the CeCILL
# license as circulated by CEA, CNRS and INRIA at the following URL
# "http://www.cecill.info".
# 
# As a counterpart to the access to the source code and rights to copy,
# modify and redistribute granted by the license, users are provided only
# with a limited warranty and the software's author, the holder of the
# economic rights, and the successive licensors have only limited
# liability.
# 
# In this respect, the user's attention is drawn to the risks associated
# with loading, using, modifying and/or developing or reproducing the
# software by the user in light of its specific status of free software,
# that may mean that it is complicated to manipulate, and that also
# therefore means that it is reserved for developers and experienced
# professionals having in-depth computer knowledge. Users are therefore
# encouraged to load and test the software's suitability as regards their
# requirements in conditions enabling the security of their systems and/or
# data to be ensured and, more generally, to use and operate it in the
# same conditions as regards security.
# 
# The fact that you are presently reading this means that you have had
# knowledge of the CeCILL license and that you accept its terms.
#
"""
Plot the data from the data files
"""

import os
import re
from optparse import OptionParser
from SMART.Java.Python.misc.RPlotter import *
from SMART.Java.Python.misc.Progress import *


def mergeData(line1, line2):
    if line1.keys() != line2.keys():
        sys.exit("Error! Input files do not correspond to each other! Aborting...")
    mergedData = {}
    for key in line1:
        mergedData[key] = (line1[key], line2[key])
    return mergedData



if __name__ == "__main__":
    
    # parse command line
    description = "Plot CSV v1.0.1: Plot the content of a CSV file. [Category: Personnal]"

    parser = OptionParser(description = description)
    parser.add_option("-i", "--input",     dest="inputFileNames", action="store",             type="string", help="input file [compulsory] [format: file in CSV format]")
    parser.add_option("-o", "--output",    dest="outputFileName", action="store",             type="string", help="output file [compulsory] [format: output file in PNG format]")
    parser.add_option("-s", "--shape",     dest="shape",          action="store",             type="string", help="shape of the plot [format: choice (line, bar, points, heatPoints)]")
    parser.add_option("-l", "--log",       dest="log",            action="store", default="", type="string", help="use log on x- or y-axis (write 'x', 'y' or 'xy') [format: string] [default: ]")
    parser.add_option("-v", "--verbosity", dest="verbosity",      action="store", default=1,  type="int",    help="trace level [format: int]")
    (options, args) = parser.parse_args()

    plotter = RPlotter(options.outputFileName, options.verbosity)
    if options.shape == "bar":
        plotter.setBarplot(True)
    elif options.shape == "points":
        plotter.setPoints(True)
    elif options.shape == "heatPoints":
        plotter.setHeatPoints(True)
        
    plotter.setLog(options.log)
    
    lines            = []
    nbsColumns = []
    for inputFileName in options.inputFileNames.split(","):
        inputFile = open(inputFileName)
        line            = {}
        nbColumns = None
        
        for point in inputFile:
            point = point.strip()
            
            m = re.search(r"^\s*(\S+)\s+(\d+\.?\d*)\s+(\d+\.?\d*)\s*$", point)
            if m != None:
                line[m.group(1)] = (float(m.group(2)), float(m.group(3)))
                if nbColumns == None:
                    nbColumns = 3
                    nbsColumns.append(nbColumns)
                elif nbColumns != 3:
                    sys.exit("Number of columns changed around line '%s' of file '%s'! Aborting..." % (point, inputFileName))
            else:
                m = re.search(r"^\s*(\d+\.?\d*)\s+(\d+\.?\d*)\s*$", point)
                if m != None:
                    line[float(m.group(1))] = float(m.group(2))
                    if nbColumns == None:
                        nbColumns = 2
                        nbsColumns.append(nbColumns)
                    if nbColumns != 2:
                        sys.exit("Number of columns changed around line '%s' of file '%s'! Aborting..." % (point, inputFileName))
                else:
                    m = re.search(r"^\s*(\S+)\s+(\d+\.?\d*)\s*$", point)
                    if m != None:
                        line[m.group(1)] = float(m.group(2))
                        if nbColumns == None:
                            nbColumns = 1
                            nbsColumns.append(nbColumns)
                        if nbColumns != 1:
                            sys.exit("Number of columns changed around line '%s' of file '%s'! Aborting..." % (point, inputFileName))
                    else:
                        sys.exit("Do not understand line '%s' of file '%s'! Aborting..." % (point, inputFileName))

        lines.append(line)
                    
    if len(lines) != len(nbsColumns):
        sys.exit("Something is wrong in the input files! Aborting...")

    if options.shape == "bar":
        if len(lines) != 1:
            sys.exit("Error! Bar plot should have exactly one input file! Aborting...")
        if nbsColumns[0] != 2:
            sys.exit("Error! Bar plot input file should have exactly two columns! Aborting...")
        plotter.addLine(lines[0])
    elif options.shape == "points":
        if len(lines) != 2:
            sys.exit("Error! Points cloud should have exactly two input file! Aborting...")
        if nbsColumns[0] != 2 or nbsColumns[1] != 2:
            sys.exit("Error! Points cloud plot input file should have exactly two columns! Aborting...")
        plotter.addLine(mergedData(lines[0], lines[1]))
    elif options.shape == "heatPoints":
        if len(lines) != 3:
            sys.exit("Error! Heat points cloud should have exactly three input file! Aborting...")
        plotter.addLine(mergeData(lines[0], lines[1]))
        plotter.addHeatLine(lines[2])
    elif options.shape == "line":
        for i in range(0, len(lines)):
            if (nbsColumns[i] != 2):
                sys.exit("Error! Curve plot input file should have exactly two columns! Aborting...")
            plotter.addLine(lines[i])
    else:
        sys.exit("Do not understand shape '%s'" % (options.shape))


    plotter.plot()