Mercurial > repos > yufei-luo > s_mart
comparison commons/pyRepetUnit/profilesDB/CompleteAProfilesDBFromAFileWithProfilesNamesOrAccNumber.py @ 18:94ab73e8a190
Uploaded
| author | m-zytnicki |
|---|---|
| date | Mon, 29 Apr 2013 03:20:15 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 17:b0e8584489e6 | 18:94ab73e8a190 |
|---|---|
| 1 from commons.pyRepetUnit.profilesDB.ProfilesDatabankUtils import ProfilesDatabankUtils | |
| 2 import re | |
| 3 import sys | |
| 4 | |
| 5 | |
| 6 class CompleteAProfilesDBFromAFileWithProfilesNamesOrAccNumber(object): | |
| 7 ''' | |
| 8 Check if profiles from a profiles list are present in profiles DB, if not you can generate the getz command to add them. | |
| 9 By default the profiles list is a list of profiles names | |
| 10 ''' | |
| 11 | |
| 12 def __init__(self): | |
| 13 ''' | |
| 14 Constructor | |
| 15 ''' | |
| 16 self.profilesDBUtils = ProfilesDatabankUtils() | |
| 17 self.profilesToAddNotInDB = [] | |
| 18 self._pfamAccNumber = False | |
| 19 | |
| 20 def setProfilesDBFile ( self, profilesDBFile ): | |
| 21 self._profilesDBFile = profilesDBFile | |
| 22 | |
| 23 def setProfilesToAdd ( self, profilesFile ): | |
| 24 self._profilesToAdd = profilesFile | |
| 25 | |
| 26 def setPfamAccNumberKeys ( self ): | |
| 27 self._pfamAccNumber = True | |
| 28 | |
| 29 def _IsProfilInDB(self, pfamDBList, profil): | |
| 30 IsProfilInDB = False | |
| 31 for profilInstance in pfamDBList.getList(): | |
| 32 if (self._pfamAccNumber == False and profilInstance.name == profil) or (self._pfamAccNumber == True and re.match(profil + "\.\d+", profilInstance.accNumber)): | |
| 33 IsProfilInDB = True | |
| 34 break | |
| 35 return IsProfilInDB | |
| 36 | |
| 37 def _generateProfilesList(self): | |
| 38 f = open(self._profilesToAdd) | |
| 39 profilesToAddList = f.readlines() | |
| 40 return profilesToAddList | |
| 41 | |
| 42 def generateNotExistingProfilesList ( self ): | |
| 43 ''' | |
| 44 generate the profiles list of profiles not in profiles DB among profiles in a list of name or accession number | |
| 45 ''' | |
| 46 self.profilesToAddNotInDB = [] | |
| 47 profilesToAddList = self._generateProfilesList() | |
| 48 pfamDBList = self.profilesDBUtils.read( self._profilesDBFile ) | |
| 49 if pfamDBList.getList( ) != []: | |
| 50 for profil in profilesToAddList: | |
| 51 if profil != "\n": | |
| 52 sys.stdout.flush() | |
| 53 profil = profil.rstrip( ) | |
| 54 IsProfilInDB = self._IsProfilInDB(pfamDBList, profil) | |
| 55 if ( IsProfilInDB == False): | |
| 56 self.profilesToAddNotInDB.append( profil ) | |
| 57 return ( self.profilesToAddNotInDB ) | |
| 58 | |
| 59 def generateGetzCmdProfilesList ( self, profilesList ): | |
| 60 ''' | |
| 61 generate the getz command to retrieve profiles list of name or accession number | |
| 62 ''' | |
| 63 getzCmd = "getz -e \'" | |
| 64 if (self._pfamAccNumber == False): | |
| 65 for profileName in profilesList: | |
| 66 getzCmd += "[pfamhmm-Id:\"" + profileName + "*\"] | " | |
| 67 else: | |
| 68 for profileAccNumber in profilesList: | |
| 69 getzCmd += "[pfamhmm-AccNumber:\"" + profileAccNumber + "\"] | " | |
| 70 getzCmd = getzCmd[ 0:len( getzCmd )-3 ] | |
| 71 getzCmd += "\'" | |
| 72 return getzCmd | |
| 73 | |
| 74 def CmdToCompleteProfileDB (self): | |
| 75 ''' | |
| 76 generate the getz command to retrieve profiles list of name or accession number if the profile is not yet in profiles DB | |
| 77 ''' | |
| 78 profilesList2Add = self.generateNotExistingProfilesList() | |
| 79 return self.generateGetzCmdProfilesList ( profilesList2Add ) |
