comparison commons/pyRepetUnit/hmmer/profilsSearchInTEClassifier/ProfilesSearch.py @ 18:94ab73e8a190

Uploaded
author m-zytnicki
date Mon, 29 Apr 2013 03:20:15 -0400
parents
children
comparison
equal deleted inserted replaced
17:b0e8584489e6 18:94ab73e8a190
1 import subprocess
2 import os
3 import sys
4
5 ## Prepare profiles databank and create command to search profiles from a profiles databank in a nucleotides databank
6 #
7 class ProfilesSearch(object):
8
9 ## launch command to prepare profiles bank
10 #
11 # @param launch_1 string corresponding to pre command
12 # @param launch_2 string corresponding to post command
13 # @param config configParser object instance
14 # @param cDir string current directory
15 # @param verbose int (default = 0)
16 #
17 def prepareProfilesBank(self, launch_1, launch_2, config, cDir, verbose = 0):
18 bank = self._getBankBaseName(config)
19 prg = "hmmpress"
20 if verbose > 0:
21 print "prepare bank '%s'..." % ( bank ); sys.stdout.flush()
22 cmd = ""
23 cmd += prg + " -f "
24 cmd += "%s/%s " % ( cDir, bank )
25 process = subprocess.Popen(cmd, shell = True)
26 process.communicate()
27 if process.returncode != 0:
28 raise Exception("ERROR when launching '%s'" % cmd)
29
30 ## create command to detect Hmm profiles in a nt sequence file
31 #
32 # @param inFileName string name of input file
33 # @param launch_1 string corresponding to pre command
34 # @param launch_2 string corresponding to post command
35 # @param cDir string current directory
36 # @param tmpDir string temporary directory
37 # @param config configParser object instance
38 # @return cmd string command to launch
39 #
40 def detectHmmProfiles(self, inFileName, launch_1, launch_2, cDir, tmpDir, config):
41 bank = self._getBankBaseName(config)
42 evalueMax = config.get("detect_features","TE_HMMER_evalue")
43
44 cmd = ""
45
46 cmd += launch_1
47 cmd += os.environ["REPET_PATH"] + "/bin/translateAfastaFileInAllFrameAndReplaceStopsByX_script.py"
48 cmd += " -i %s" % ( inFileName )
49 cmd += " -o %s_translated" % ( inFileName )
50 cmd += launch_2
51
52 cmd += launch_1
53 cmd += "hmmscan "
54 cmd += " -o %s_tr.hmmScanOut" % ( inFileName )
55 cmd += " --domtblout %s_tr.hmmScanOutTab" % ( inFileName )
56 cmd += " --noali -E " + evalueMax
57 cmd += " --cpu 1 "
58 cmd += "%s/%s" % ( cDir, bank ) + " " + "%s_translated" % ( inFileName )
59 cmd += launch_2
60
61 cmd += "if os.path.exists( \"%s_translated\" ):\n" % ( inFileName )
62 cmd += "\tos.remove( \"%s_translated\" )\n" % ( inFileName )
63
64 cmd += launch_1
65 cmd += os.environ["REPET_PATH"] + "/bin/HmmOutput2alignAndTransformCoordInNtAndFilterScores_script.py"
66 cmd += " -i %s_tr.hmmScanOutTab" % ( inFileName )
67 cmd += " -o %s_profiles_%s.align" % ( inFileName, bank )
68 cmd += " -T %s" % ( inFileName )
69 cmd += " -p hmmscan"
70 cmd += " -c"
71 cmd += launch_2
72
73 cmd += launch_1
74 cmd += os.environ["REPET_PATH"] + "/bin/matcher"
75 cmd += " -m %s_profiles_%s.align" % ( inFileName, bank )
76 cmd += " -j"
77 cmd += " -E 10"
78 cmd += " -L 0"
79 cmd += " -v 1"
80 cmd += launch_2
81
82 cmd += "if not os.path.exists( \"%s/%s_profiles_%s.align.clean_match.path\" ):\n" % ( cDir, inFileName, bank )
83 cmd += "\tos.system( \"mv %s_profiles_%s.align.clean_match.path %s\" )\n" % ( inFileName, bank, cDir )
84 cmd += "if not os.path.exists( \"%s/%s_profiles_%s.align.clean_match.param\" ):\n" % ( cDir, inFileName, bank )
85 cmd += "\tos.system( \"mv %s_profiles_%s.align.clean_match.param %s\" )\n" % ( inFileName, bank, cDir )
86 cmd += "if os.path.exists( \"%s_profiles_%s.align\" ):\n" % ( inFileName, bank )
87 cmd += "\tos.remove( \"%s_profiles_%s.align\" )\n" % ( inFileName, bank )
88 cmd += "if os.path.exists( \"%s_profiles_%s.align.clean_match.map\" ):\n" % ( inFileName, bank )
89 cmd += "\tos.remove( \"%s_profiles_%s.align.clean_match.map\" )\n" % ( inFileName, bank )
90 cmd += "if os.path.exists( \"%s_hmmScanOut\" ):\n" % ( inFileName )
91 cmd += "\tos.remove( \"%s_hmmScanOut\" )\n" % ( inFileName )
92
93 if tmpDir != cDir:
94 cmd += "if os.path.exists( \"%s\" ):\n" % ( bank )
95 cmd += "\tos.remove( \"%s\" )\n" % ( bank )
96
97 return cmd
98
99 def _getBankBaseName(self, config):
100 profilsHmmBank = config.get("detect_features", "TE_HMM_profiles")
101 bank = os.path.basename(profilsHmmBank)
102 return bank
103