18
|
1 import subprocess
|
|
2 import os
|
|
3 import sys
|
|
4
|
|
5 ## Prepare profiles databank and create command to search profiles from a profiles databank in a nucleotides databank
|
|
6 #
|
|
7 class ProfilesSearch(object):
|
|
8
|
|
9 ## launch command to prepare profiles bank
|
|
10 #
|
|
11 # @param launch_1 string corresponding to pre command
|
|
12 # @param launch_2 string corresponding to post command
|
|
13 # @param config configParser object instance
|
|
14 # @param cDir string current directory
|
|
15 # @param verbose int (default = 0)
|
|
16 #
|
|
17 def prepareProfilesBank(self, launch_1, launch_2, config, cDir, verbose = 0):
|
|
18 bank = self._getBankBaseName(config)
|
|
19 prg = "hmmpress"
|
|
20 if verbose > 0:
|
|
21 print "prepare bank '%s'..." % ( bank ); sys.stdout.flush()
|
|
22 cmd = ""
|
|
23 cmd += prg + " -f "
|
|
24 cmd += "%s/%s " % ( cDir, bank )
|
|
25 process = subprocess.Popen(cmd, shell = True)
|
|
26 process.communicate()
|
|
27 if process.returncode != 0:
|
|
28 raise Exception("ERROR when launching '%s'" % cmd)
|
|
29
|
|
30 ## create command to detect Hmm profiles in a nt sequence file
|
|
31 #
|
|
32 # @param inFileName string name of input file
|
|
33 # @param launch_1 string corresponding to pre command
|
|
34 # @param launch_2 string corresponding to post command
|
|
35 # @param cDir string current directory
|
|
36 # @param tmpDir string temporary directory
|
|
37 # @param config configParser object instance
|
|
38 # @return cmd string command to launch
|
|
39 #
|
|
40 def detectHmmProfiles(self, inFileName, launch_1, launch_2, cDir, tmpDir, config):
|
|
41 bank = self._getBankBaseName(config)
|
|
42 evalueMax = config.get("detect_features","TE_HMMER_evalue")
|
|
43
|
|
44 cmd = ""
|
|
45
|
|
46 cmd += launch_1
|
|
47 cmd += os.environ["REPET_PATH"] + "/bin/translateAfastaFileInAllFrameAndReplaceStopsByX_script.py"
|
|
48 cmd += " -i %s" % ( inFileName )
|
|
49 cmd += " -o %s_translated" % ( inFileName )
|
|
50 cmd += launch_2
|
|
51
|
|
52 cmd += launch_1
|
|
53 cmd += "hmmscan "
|
|
54 cmd += " -o %s_tr.hmmScanOut" % ( inFileName )
|
|
55 cmd += " --domtblout %s_tr.hmmScanOutTab" % ( inFileName )
|
|
56 cmd += " --noali -E " + evalueMax
|
|
57 cmd += " --cpu 1 "
|
|
58 cmd += "%s/%s" % ( cDir, bank ) + " " + "%s_translated" % ( inFileName )
|
|
59 cmd += launch_2
|
|
60
|
|
61 cmd += "if os.path.exists( \"%s_translated\" ):\n" % ( inFileName )
|
|
62 cmd += "\tos.remove( \"%s_translated\" )\n" % ( inFileName )
|
|
63
|
|
64 cmd += launch_1
|
|
65 cmd += os.environ["REPET_PATH"] + "/bin/HmmOutput2alignAndTransformCoordInNtAndFilterScores_script.py"
|
|
66 cmd += " -i %s_tr.hmmScanOutTab" % ( inFileName )
|
|
67 cmd += " -o %s_profiles_%s.align" % ( inFileName, bank )
|
|
68 cmd += " -T %s" % ( inFileName )
|
|
69 cmd += " -p hmmscan"
|
|
70 cmd += " -c"
|
|
71 cmd += launch_2
|
|
72
|
|
73 cmd += launch_1
|
|
74 cmd += os.environ["REPET_PATH"] + "/bin/matcher"
|
|
75 cmd += " -m %s_profiles_%s.align" % ( inFileName, bank )
|
|
76 cmd += " -j"
|
|
77 cmd += " -E 10"
|
|
78 cmd += " -L 0"
|
|
79 cmd += " -v 1"
|
|
80 cmd += launch_2
|
|
81
|
|
82 cmd += "if not os.path.exists( \"%s/%s_profiles_%s.align.clean_match.path\" ):\n" % ( cDir, inFileName, bank )
|
|
83 cmd += "\tos.system( \"mv %s_profiles_%s.align.clean_match.path %s\" )\n" % ( inFileName, bank, cDir )
|
|
84 cmd += "if not os.path.exists( \"%s/%s_profiles_%s.align.clean_match.param\" ):\n" % ( cDir, inFileName, bank )
|
|
85 cmd += "\tos.system( \"mv %s_profiles_%s.align.clean_match.param %s\" )\n" % ( inFileName, bank, cDir )
|
|
86 cmd += "if os.path.exists( \"%s_profiles_%s.align\" ):\n" % ( inFileName, bank )
|
|
87 cmd += "\tos.remove( \"%s_profiles_%s.align\" )\n" % ( inFileName, bank )
|
|
88 cmd += "if os.path.exists( \"%s_profiles_%s.align.clean_match.map\" ):\n" % ( inFileName, bank )
|
|
89 cmd += "\tos.remove( \"%s_profiles_%s.align.clean_match.map\" )\n" % ( inFileName, bank )
|
|
90 cmd += "if os.path.exists( \"%s_hmmScanOut\" ):\n" % ( inFileName )
|
|
91 cmd += "\tos.remove( \"%s_hmmScanOut\" )\n" % ( inFileName )
|
|
92
|
|
93 if tmpDir != cDir:
|
|
94 cmd += "if os.path.exists( \"%s\" ):\n" % ( bank )
|
|
95 cmd += "\tos.remove( \"%s\" )\n" % ( bank )
|
|
96
|
|
97 return cmd
|
|
98
|
|
99 def _getBankBaseName(self, config):
|
|
100 profilsHmmBank = config.get("detect_features", "TE_HMM_profiles")
|
|
101 bank = os.path.basename(profilsHmmBank)
|
|
102 return bank
|
|
103
|