| 6 | 1 # Copyright INRA (Institut National de la Recherche Agronomique) | 
|  | 2 # http://www.inra.fr | 
|  | 3 # http://urgi.versailles.inra.fr | 
|  | 4 # | 
|  | 5 # This software is governed by the CeCILL license under French law and | 
|  | 6 # abiding by the rules of distribution of free software.  You can  use, | 
|  | 7 # modify and/ or redistribute the software under the terms of the CeCILL | 
|  | 8 # license as circulated by CEA, CNRS and INRIA at the following URL | 
|  | 9 # "http://www.cecill.info". | 
|  | 10 # | 
|  | 11 # As a counterpart to the access to the source code and  rights to copy, | 
|  | 12 # modify and redistribute granted by the license, users are provided only | 
|  | 13 # with a limited warranty  and the software's author,  the holder of the | 
|  | 14 # economic rights,  and the successive licensors  have only  limited | 
|  | 15 # liability. | 
|  | 16 # | 
|  | 17 # In this respect, the user's attention is drawn to the risks associated | 
|  | 18 # with loading,  using,  modifying and/or developing or reproducing the | 
|  | 19 # software by the user in light of its specific status of free software, | 
|  | 20 # that may mean  that it is complicated to manipulate,  and  that  also | 
|  | 21 # therefore means  that it is reserved for developers  and  experienced | 
|  | 22 # professionals having in-depth computer knowledge. Users are therefore | 
|  | 23 # encouraged to load and test the software's suitability as regards their | 
|  | 24 # requirements in conditions enabling the security of their systems and/or | 
|  | 25 # data to be ensured and,  more generally, to use and operate it in the | 
|  | 26 # same conditions as regards security. | 
|  | 27 # | 
|  | 28 # The fact that you are presently reading this means that you have had | 
|  | 29 # knowledge of the CeCILL license and that you accept its terms. | 
|  | 30 | 
|  | 31 | 
|  | 32 import sys | 
|  | 33 from commons.core.sql.TableAdaptator import TableAdaptator | 
|  | 34 from commons.core.sql.ITableSeqAdaptator import ITableSeqAdaptator | 
|  | 35 from commons.core.coord.SetUtils import SetUtils | 
|  | 36 from commons.core.seq.Bioseq import Bioseq | 
|  | 37 | 
|  | 38 | 
|  | 39 ## Adaptator for a Seq table | 
|  | 40 # | 
|  | 41 class TableSeqAdaptator( TableAdaptator, ITableSeqAdaptator ): | 
|  | 42 | 
|  | 43     ## Retrieve all the distinct accession names in a list. | 
|  | 44     # | 
|  | 45     # @return lAccessions list of accessions | 
|  | 46     # | 
|  | 47     def getAccessionsList( self ): | 
|  | 48         sqlCmd = "SELECT DISTINCT accession FROM %s;" % ( self._table ) | 
|  | 49         lAccessions = self._getStringListWithSQLCmd(sqlCmd) | 
|  | 50         return lAccessions | 
|  | 51 | 
|  | 52     ## Save sequences in a fasta file from a list of accession names. | 
|  | 53     # | 
|  | 54     # @param lAccessions list of accessions | 
|  | 55     # @param outFileName string Fasta file | 
|  | 56     # | 
|  | 57     def saveAccessionsListInFastaFile( self, lAccessions, outFileName ): | 
|  | 58         outFile = open( outFileName, "w" ) | 
|  | 59         for ac in lAccessions: | 
|  | 60             bs = self.getBioseqFromHeader( ac ) | 
|  | 61             bs.write(outFile) | 
|  | 62         outFile.close() | 
|  | 63 | 
|  | 64     ## Get a bioseq instance given its header | 
|  | 65     # | 
|  | 66     # @param header string name of the sequence ('accession' field in the 'seq' table) | 
|  | 67     # @return bioseq instance | 
|  | 68     # | 
|  | 69     def getBioseqFromHeader( self, header ): | 
|  | 70         sqlCmd = "SELECT * FROM %s WHERE accession='%s';" % ( self._table, header ) | 
|  | 71         self._iDb.execute( sqlCmd ) | 
|  | 72         res = self._iDb.fetchall() | 
|  | 73         return Bioseq( res[0][0], res[0][1] ) | 
|  | 74 | 
|  | 75     ## Retrieve the length of a sequence given its name. | 
|  | 76     # | 
|  | 77     # @param accession name of the sequence | 
|  | 78     # @return seqLength integer length of the sequence | 
|  | 79     # | 
|  | 80     def getSeqLengthFromAccession( self, accession ): | 
|  | 81         sqlCmd = 'SELECT length FROM %s WHERE accession="%s"' % ( self._table, accession ) | 
|  | 82         seqLength = self._iDb.getIntegerWithSQLCmd(sqlCmd) | 
|  | 83         return seqLength | 
|  | 84 | 
|  | 85     ## Retrieve the length of a sequence given its description. | 
|  | 86     # | 
|  | 87     # @param description of the sequence | 
|  | 88     # @return seqLength integer length of the sequence | 
|  | 89     # | 
|  | 90     def getSeqLengthFromDescription( self, description ): | 
|  | 91         sqlCmd = 'SELECT length FROM %s WHERE description="%s"' % ( self._table, description ) | 
|  | 92         seqLength = self._iDb.getIntegerWithSQLCmd(sqlCmd) | 
|  | 93         return seqLength | 
|  | 94 | 
|  | 95     ## Retrieve all the accessions with length in a list of tuples | 
|  | 96     # | 
|  | 97     # @return lAccessionLengthTuples list of tuples | 
|  | 98     # | 
|  | 99     def getAccessionAndLengthList(self): | 
|  | 100         sqlCmd = 'SELECT accession, length FROM %s' % self._table | 
|  | 101         self._iDb.execute(sqlCmd) | 
|  | 102         res = self._iDb.fetchall() | 
|  | 103         lAccessionLengthTuples = [] | 
|  | 104         for i in res: | 
|  | 105             lAccessionLengthTuples.append(i) | 
|  | 106         return lAccessionLengthTuples | 
|  | 107 | 
|  | 108     ## get subsequence according to given parameters | 
|  | 109     # | 
|  | 110     # @param accession | 
|  | 111     # @param start integer | 
|  | 112     # @param end integer | 
|  | 113     # @return bioseq.sequence string | 
|  | 114     # | 
|  | 115     def getSubSequence( self, accession, start, end ): | 
|  | 116         bs = Bioseq() | 
|  | 117         if start <= 0 or end <= 0: | 
|  | 118             print "ERROR with coordinates start=%i or end=%i" % ( start, end ) | 
|  | 119             sys.exit(1) | 
|  | 120 | 
|  | 121         if accession not in self.getAccessionsList(): | 
|  | 122             print "ERROR: accession '%s' absent from table '%s'" % ( accession, self._table ) | 
|  | 123             sys.exit(1) | 
|  | 124 | 
|  | 125         lengthAccession = self.getSeqLengthFromAccession( accession ) | 
|  | 126         if start > lengthAccession or end > lengthAccession: | 
|  | 127             print "ERROR: coordinates start=%i end=%i out of sequence '%s' range (%i bp)" % ( start, end, accession, lengthAccession ) | 
|  | 128             sys.exit(1) | 
|  | 129 | 
|  | 130         sqlCmd = "SELECT SUBSTRING(sequence,%i,%i) FROM %s WHERE accession='%s'" % ( min(start,end), abs(end-start)+ 1, self._table, accession ) | 
|  | 131         self._iDb.execute( sqlCmd ) | 
|  | 132         res = self._iDb.fetchall() | 
|  | 133         bs.setSequence( res[0][0] ) | 
|  | 134         if start > end: | 
|  | 135             bs.reverseComplement() | 
|  | 136         return bs.sequence | 
|  | 137 | 
|  | 138     ## get bioseq from given set list | 
|  | 139     # | 
|  | 140     # @param lSets set list of sets | 
|  | 141     # @return bioseq instance | 
|  | 142     # | 
|  | 143     def getBioseqFromSetList( self, lSets ): | 
|  | 144         header = "%s::%i %s " % ( lSets[0].name, lSets[0].id, lSets[0].seqname ) | 
|  | 145         sequence = "" | 
|  | 146         lSortedSets = SetUtils.getSetListSortedByIncreasingMinThenMax( lSets ) | 
|  | 147         if not lSets[0].isOnDirectStrand(): | 
|  | 148             lSortedSets.reverse() | 
|  | 149         for iSet in lSortedSets: | 
|  | 150             header += "%i..%i," % ( iSet.getStart(), iSet.getEnd() ) | 
|  | 151             sequence += self.getSubSequence( iSet.seqname, iSet.getStart(), iSet.getEnd() ) | 
|  | 152         return Bioseq( header[:-1], sequence ) | 
|  | 153 | 
|  | 154     ## Return True if the given accession is present in the table | 
|  | 155     # | 
|  | 156     def isAccessionInTable( self, name ): | 
|  | 157         sqlCmd = "SELECT accession FROM %s WHERE accession='%s'" % ( self._table, name ) | 
|  | 158         self._iDb.execute( sqlCmd ) | 
|  | 159         res = self._iDb.fetchall() | 
|  | 160         return bool(res) | 
|  | 161 | 
|  | 162     ## Retrieve all the distinct accession names in a fasta file. | 
|  | 163     # | 
|  | 164     # @param outFileName string Fasta file | 
|  | 165     # | 
|  | 166     def exportInFastaFile(self, outFileName ): | 
|  | 167         lAccessions = self.getAccessionsList() | 
|  | 168         self.saveAccessionsListInFastaFile( lAccessions, outFileName ) | 
|  | 169 | 
|  | 170     def _getStringListWithSQLCmd( self, sqlCmd ): | 
|  | 171         self._iDb.execute(sqlCmd) | 
|  | 172         res = self._iDb.fetchall() | 
|  | 173         lString = [] | 
|  | 174         for i in res: | 
|  | 175             lString.append(i[0]) | 
|  | 176         return lString | 
|  | 177 | 
|  | 178     def _getTypeAndAttr2Insert(self, bs): | 
|  | 179         type2Insert =  ( "'%s'", "'%s'", "'%s'", "'%i'" ) | 
|  | 180         attr2Insert =  (bs.header.split()[0], bs.sequence, bs.header, bs.getLength()) | 
|  | 181         return type2Insert, attr2Insert | 
|  | 182 | 
|  | 183     def _escapeAntislash(self, obj): | 
|  | 184         pass | 
|  | 185 |