annotate commons/pyRepetUnit/hmmer/profilsSearchInTEClassifier/ProfilesSearch.py @ 31:0ab839023fe4

Uploaded
author m-zytnicki
date Tue, 30 Apr 2013 14:33:21 -0400
parents 94ab73e8a190
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
18
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
1 import subprocess
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
2 import os
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
3 import sys
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
4
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
5 ## Prepare profiles databank and create command to search profiles from a profiles databank in a nucleotides databank
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
6 #
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
7 class ProfilesSearch(object):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
8
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
9 ## launch command to prepare profiles bank
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
10 #
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
11 # @param launch_1 string corresponding to pre command
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
12 # @param launch_2 string corresponding to post command
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
13 # @param config configParser object instance
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
14 # @param cDir string current directory
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
15 # @param verbose int (default = 0)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
16 #
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
17 def prepareProfilesBank(self, launch_1, launch_2, config, cDir, verbose = 0):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
18 bank = self._getBankBaseName(config)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
19 prg = "hmmpress"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
20 if verbose > 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
21 print "prepare bank '%s'..." % ( bank ); sys.stdout.flush()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
22 cmd = ""
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
23 cmd += prg + " -f "
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
24 cmd += "%s/%s " % ( cDir, bank )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
25 process = subprocess.Popen(cmd, shell = True)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
26 process.communicate()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
27 if process.returncode != 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
28 raise Exception("ERROR when launching '%s'" % cmd)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
29
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
30 ## create command to detect Hmm profiles in a nt sequence file
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
31 #
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
32 # @param inFileName string name of input file
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
33 # @param launch_1 string corresponding to pre command
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
34 # @param launch_2 string corresponding to post command
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
35 # @param cDir string current directory
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
36 # @param tmpDir string temporary directory
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
37 # @param config configParser object instance
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
38 # @return cmd string command to launch
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
39 #
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
40 def detectHmmProfiles(self, inFileName, launch_1, launch_2, cDir, tmpDir, config):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
41 bank = self._getBankBaseName(config)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
42 evalueMax = config.get("detect_features","TE_HMMER_evalue")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
43
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
44 cmd = ""
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
45
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
46 cmd += launch_1
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
47 cmd += os.environ["REPET_PATH"] + "/bin/translateAfastaFileInAllFrameAndReplaceStopsByX_script.py"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
48 cmd += " -i %s" % ( inFileName )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
49 cmd += " -o %s_translated" % ( inFileName )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
50 cmd += launch_2
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
51
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
52 cmd += launch_1
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
53 cmd += "hmmscan "
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
54 cmd += " -o %s_tr.hmmScanOut" % ( inFileName )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
55 cmd += " --domtblout %s_tr.hmmScanOutTab" % ( inFileName )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
56 cmd += " --noali -E " + evalueMax
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
57 cmd += " --cpu 1 "
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
58 cmd += "%s/%s" % ( cDir, bank ) + " " + "%s_translated" % ( inFileName )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
59 cmd += launch_2
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
60
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
61 cmd += "if os.path.exists( \"%s_translated\" ):\n" % ( inFileName )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
62 cmd += "\tos.remove( \"%s_translated\" )\n" % ( inFileName )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
63
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
64 cmd += launch_1
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
65 cmd += os.environ["REPET_PATH"] + "/bin/HmmOutput2alignAndTransformCoordInNtAndFilterScores_script.py"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
66 cmd += " -i %s_tr.hmmScanOutTab" % ( inFileName )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
67 cmd += " -o %s_profiles_%s.align" % ( inFileName, bank )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
68 cmd += " -T %s" % ( inFileName )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
69 cmd += " -p hmmscan"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
70 cmd += " -c"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
71 cmd += launch_2
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
72
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
73 cmd += launch_1
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
74 cmd += os.environ["REPET_PATH"] + "/bin/matcher"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
75 cmd += " -m %s_profiles_%s.align" % ( inFileName, bank )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
76 cmd += " -j"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
77 cmd += " -E 10"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
78 cmd += " -L 0"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
79 cmd += " -v 1"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
80 cmd += launch_2
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
81
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
82 cmd += "if not os.path.exists( \"%s/%s_profiles_%s.align.clean_match.path\" ):\n" % ( cDir, inFileName, bank )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
83 cmd += "\tos.system( \"mv %s_profiles_%s.align.clean_match.path %s\" )\n" % ( inFileName, bank, cDir )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
84 cmd += "if not os.path.exists( \"%s/%s_profiles_%s.align.clean_match.param\" ):\n" % ( cDir, inFileName, bank )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
85 cmd += "\tos.system( \"mv %s_profiles_%s.align.clean_match.param %s\" )\n" % ( inFileName, bank, cDir )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
86 cmd += "if os.path.exists( \"%s_profiles_%s.align\" ):\n" % ( inFileName, bank )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
87 cmd += "\tos.remove( \"%s_profiles_%s.align\" )\n" % ( inFileName, bank )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
88 cmd += "if os.path.exists( \"%s_profiles_%s.align.clean_match.map\" ):\n" % ( inFileName, bank )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
89 cmd += "\tos.remove( \"%s_profiles_%s.align.clean_match.map\" )\n" % ( inFileName, bank )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
90 cmd += "if os.path.exists( \"%s_hmmScanOut\" ):\n" % ( inFileName )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
91 cmd += "\tos.remove( \"%s_hmmScanOut\" )\n" % ( inFileName )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
92
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
93 if tmpDir != cDir:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
94 cmd += "if os.path.exists( \"%s\" ):\n" % ( bank )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
95 cmd += "\tos.remove( \"%s\" )\n" % ( bank )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
96
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
97 return cmd
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
98
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
99 def _getBankBaseName(self, config):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
100 profilsHmmBank = config.get("detect_features", "TE_HMM_profiles")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
101 bank = os.path.basename(profilsHmmBank)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
102 return bank
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
103