diff commons/pyRepetUnit/hmmer/profilsSearchInTEClassifier/ProfilesSearch.py @ 18:94ab73e8a190

Uploaded
author m-zytnicki
date Mon, 29 Apr 2013 03:20:15 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/hmmer/profilsSearchInTEClassifier/ProfilesSearch.py	Mon Apr 29 03:20:15 2013 -0400
@@ -0,0 +1,103 @@
+import subprocess
+import os
+import sys
+
+## Prepare profiles databank and create command to search profiles from a profiles databank in a nucleotides databank
+#
+class ProfilesSearch(object):
+
+    ## launch command to prepare profiles bank
+    #
+    # @param launch_1 string corresponding to pre command
+    # @param launch_2 string corresponding to post command
+    # @param config configParser object instance
+    # @param cDir string current directory
+    # @param verbose int (default = 0)
+    #
+    def prepareProfilesBank(self, launch_1, launch_2, config, cDir, verbose = 0):
+        bank = self._getBankBaseName(config)
+        prg = "hmmpress"
+        if verbose > 0:
+            print "prepare bank '%s'..." % ( bank ); sys.stdout.flush()
+        cmd = ""
+        cmd += prg + " -f "
+        cmd += "%s/%s " % ( cDir, bank )
+        process = subprocess.Popen(cmd, shell = True)
+        process.communicate()
+        if process.returncode != 0:
+            raise Exception("ERROR when launching '%s'" % cmd)
+    
+    ## create command to detect Hmm profiles in a nt sequence file
+    #
+    # @param inFileName string name of input file
+    # @param launch_1 string corresponding to pre command
+    # @param launch_2 string corresponding to post command
+    # @param cDir string current directory
+    # @param tmpDir string temporary directory
+    # @param config configParser object instance
+    # @return cmd string command to launch
+    #        
+    def detectHmmProfiles(self, inFileName, launch_1, launch_2, cDir, tmpDir, config):
+        bank = self._getBankBaseName(config)
+        evalueMax = config.get("detect_features","TE_HMMER_evalue")
+        
+        cmd = ""
+        
+        cmd += launch_1
+        cmd += os.environ["REPET_PATH"] + "/bin/translateAfastaFileInAllFrameAndReplaceStopsByX_script.py"
+        cmd += " -i %s" % ( inFileName )
+        cmd += " -o %s_translated" % ( inFileName )
+        cmd += launch_2
+        
+        cmd += launch_1
+        cmd += "hmmscan "
+        cmd += " -o %s_tr.hmmScanOut" % ( inFileName )
+        cmd += " --domtblout %s_tr.hmmScanOutTab" % ( inFileName )
+        cmd += " --noali -E " + evalueMax
+        cmd += " --cpu 1 "        
+        cmd += "%s/%s" % ( cDir, bank ) + " " + "%s_translated" % ( inFileName )
+        cmd += launch_2
+        
+        cmd += "if os.path.exists( \"%s_translated\" ):\n" % ( inFileName )
+        cmd += "\tos.remove( \"%s_translated\" )\n" % ( inFileName )
+        
+        cmd += launch_1
+        cmd += os.environ["REPET_PATH"] + "/bin/HmmOutput2alignAndTransformCoordInNtAndFilterScores_script.py"
+        cmd += " -i %s_tr.hmmScanOutTab" % ( inFileName )
+        cmd += " -o %s_profiles_%s.align" % ( inFileName, bank )
+        cmd += " -T %s" % ( inFileName )
+        cmd += " -p hmmscan"
+        cmd += " -c"
+        cmd += launch_2
+        
+        cmd += launch_1
+        cmd += os.environ["REPET_PATH"] + "/bin/matcher"
+        cmd += " -m %s_profiles_%s.align" % ( inFileName, bank )
+        cmd += " -j"
+        cmd += " -E 10"
+        cmd += " -L 0"
+        cmd += " -v 1"
+        cmd += launch_2
+        
+        cmd += "if not os.path.exists( \"%s/%s_profiles_%s.align.clean_match.path\" ):\n" % ( cDir, inFileName, bank )
+        cmd += "\tos.system( \"mv %s_profiles_%s.align.clean_match.path %s\" )\n" % ( inFileName, bank, cDir )
+        cmd += "if not os.path.exists( \"%s/%s_profiles_%s.align.clean_match.param\" ):\n" % ( cDir, inFileName, bank )
+        cmd += "\tos.system( \"mv %s_profiles_%s.align.clean_match.param %s\" )\n" % ( inFileName, bank, cDir )
+        cmd += "if os.path.exists( \"%s_profiles_%s.align\" ):\n" % ( inFileName, bank )
+        cmd += "\tos.remove( \"%s_profiles_%s.align\" )\n" % ( inFileName, bank )
+        cmd += "if os.path.exists( \"%s_profiles_%s.align.clean_match.map\" ):\n" % ( inFileName, bank )
+        cmd += "\tos.remove( \"%s_profiles_%s.align.clean_match.map\" )\n" % ( inFileName, bank )
+        cmd += "if os.path.exists( \"%s_hmmScanOut\" ):\n" % ( inFileName )
+        cmd += "\tos.remove( \"%s_hmmScanOut\" )\n" % ( inFileName )
+        
+        if tmpDir != cDir:
+            cmd += "if os.path.exists( \"%s\" ):\n" % ( bank )
+            cmd += "\tos.remove( \"%s\" )\n" % ( bank )
+            
+        return cmd
+    
+    def _getBankBaseName(self, config):
+        profilsHmmBank = config.get("detect_features", "TE_HMM_profiles")
+        bank = os.path.basename(profilsHmmBank)
+        return bank
+