view commons/core/parsing/VarscanFile.py @ 9:1eb55963fe39

Updated CompareOverlappingSmall*.py
author m-zytnicki
date Thu, 14 Mar 2013 05:23:05 -0400
parents 769e306b7933
children
line wrap: on
line source

# Copyright INRA (Institut National de la Recherche Agronomique)
# http://www.inra.fr
# http://urgi.versailles.inra.fr
#
# This software is governed by the CeCILL license under French law and
# abiding by the rules of distribution of free software.  You can  use, 
# modify and/ or redistribute the software under the terms of the CeCILL
# license as circulated by CEA, CNRS and INRIA at the following URL
# "http://www.cecill.info". 
#
# As a counterpart to the access to the source code and  rights to copy,
# modify and redistribute granted by the license, users are provided only
# with a limited warranty  and the software's author,  the holder of the
# economic rights,  and the successive licensors  have only  limited
# liability. 
#
# In this respect, the user's attention is drawn to the risks associated
# with loading,  using,  modifying and/or developing or reproducing the
# software by the user in light of its specific status of free software,
# that may mean  that it is complicated to manipulate,  and  that  also
# therefore means  that it is reserved for developers  and  experienced
# professionals having in-depth computer knowledge. Users are therefore
# encouraged to load and test the software's suitability as regards their
# requirements in conditions enabling the security of their systems and/or 
# data to be ensured and,  more generally, to use and operate it in the 
# same conditions as regards security. 
#
# The fact that you are presently reading this means that you have had
# knowledge of the CeCILL license and that you accept its terms.


from commons.core.parsing.VarscanHit import VarscanHit
from commons.core.parsing.VarscanHit_WithTag import VarscanHit_WithTag
from commons.core.parsing.VarscanHit_v2_2_8 import VarscanHit_v2_2_8
from commons.core.checker.CheckerException import CheckerException
from commons.core.parsing.VarscanHit_v2_2_8_WithTag import VarscanHit_v2_2_8_WithTag

class VarscanFile(object):

    def __init__(self, varscanFileName = ""):
        self._varscanFileName = varscanFileName
        self._varscanFieldSeparator = "\t"
        self._lVarscanHits = []
        self._typeOfVarscanFile = ""
        
    def __eq__(self, o):
        return self._varscanFieldSeparator == o._varscanFieldSeparator and self._lVarscanHits == o._lVarscanHits and self._varscanFileName == o._varscanFileName
             
    def setVarscanHitsList(self, lVarscanHits):
        self._lVarscanHits = lVarscanHits
    
    def setHeaderVarcanFile(self, headerVarcanFile):
        self._headerVarcanFile = headerVarcanFile
        
    def setTypeOfVarscanFile(self, type):
        if type == "Varscan_2_2" or type == "Varscan_2_2_WithTag" or type == "Varscan_2_2_8" or type == "Varscan_2_2_8_WithTag":
            self._typeOfVarscanFile = type
        else:
            self._typeOfVarscanFile = ""
        
    def getVarscanHitsList(self):
        return self._lVarscanHits
    
    def getHeaderVarcanFile(self):
        return self._headerVarcanFile
    
    def getListOfVarscanHits(self):
        return self._lVarscanHits
    
    def getTypeOfVarscanFile(self):
        return self._typeOfVarscanFile
        
    def parse(self):
        varscanFile = open(self._varscanFileName, "r")
        currentLineNumber = 0
        line = varscanFile.readline()
        if "Chrom\tPosition" in line:
            self.setHeaderVarcanFile(line)
            line = varscanFile.readline()
        while line != "":
            if not "Chrom\tPosition" in line:
                currentLineNumber += 1
                line = line.strip()
                lResults = line.split(self._varscanFieldSeparator)
                if len(lResults) == 12:
                    currentVarscanLine = self.createVarscanHit(line, currentLineNumber)
                    self._typeOfVarscanFile = "Varscan_2_2"
                elif len(lResults) == 13:
                    currentVarscanLine = self.createVarscanHitWithTag(line, currentLineNumber)
                    self._typeOfVarscanFile = "Varscan_2_2_WithTag"
                elif len(lResults) == 19:
                    currentVarscanLine = self.createVarscanHit_v2_2_8(line, currentLineNumber)
                    self._typeOfVarscanFile = "Varscan_2_2_8"
                elif len(lResults) == 20:
                    currentVarscanLine = self.createVarscanHit_v2_2_8_WithTag(line, currentLineNumber)
                    self._typeOfVarscanFile = "Varscan_2_2_8_WithTag"
                else:
                    raise CheckerException ("Warning: this line (l.%s) is not a valid varscan line !" % currentLineNumber)
                self._lVarscanHits.append(currentVarscanLine)
                line = varscanFile.readline()
        varscanFile.close()
        
    def createVarscanObjectFromLine(self, line, currentLineNumber):
        if self._typeOfVarscanFile == "Varscan_2_2":
            VarscanHit =  self.createVarscanHit(line, currentLineNumber)
            return VarscanHit
        elif self._typeOfVarscanFile == "Varscan_2_2_WithTag":
            return self.createVarscanHitWithTag(line, currentLineNumber)
        elif self._typeOfVarscanFile == "Varscan_2_2_8":
            return self.createVarscanHit_v2_2_8(line, currentLineNumber)
        elif self._typeOfVarscanFile == "Varscan_2_2_8_WithTag":
            return self.createVarscanHit_v2_2_8_WithTag(line, currentLineNumber)
            
    def createVarscanHit(self, line, currentLineNumber):
        iVarscanHit =  VarscanHit()
        iVarscanHit.setAttributesFromString(line, currentLineNumber)
        return iVarscanHit
        
    def createVarscanHitWithTag(self, line, currentLineNumber):
        iVarscanHitWithTag =  VarscanHit_WithTag()
        iVarscanHitWithTag.setAttributesFromString(line, currentLineNumber)
        return iVarscanHitWithTag
    
    def createVarscanHit_v2_2_8(self, line, currentLineNumber):
        iVarscanHit =  VarscanHit_v2_2_8()
        iVarscanHit.setAttributesFromString(line, currentLineNumber)
        return iVarscanHit
    
    def createVarscanHit_v2_2_8_WithTag(self, line, currentLineNumber):
        iVarscanHitWithTag =  VarscanHit_v2_2_8_WithTag()
        iVarscanHitWithTag.setAttributesFromString(line, currentLineNumber)
        return iVarscanHitWithTag
    
    def selectTypeOfVarscanHitObject(self):
        if self._typeOfVarscanFile == "":
            raise CheckerException ("Error: no varscan object found !")
        elif self._typeOfVarscanFile == "Varscan_2_2":
            return VarscanHit()
        elif self._typeOfVarscanFile == "Varscan_2_2_WithTag":
            return VarscanHit_WithTag()
        elif self._typeOfVarscanFile == "Varscan_2_2_8":
            return VarscanHit_v2_2_8()
        elif self._typeOfVarscanFile == "Varscan_2_2_8_WithTag":
            return VarscanHit_v2_2_8_WithTag()