Mercurial > repos > yufei-luo > s_mart
comparison commons/pyRepetUnit/hmmer/profilsSearchInTEClassifier/ProfilesSearch.py @ 18:94ab73e8a190
Uploaded
author | m-zytnicki |
---|---|
date | Mon, 29 Apr 2013 03:20:15 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
17:b0e8584489e6 | 18:94ab73e8a190 |
---|---|
1 import subprocess | |
2 import os | |
3 import sys | |
4 | |
5 ## Prepare profiles databank and create command to search profiles from a profiles databank in a nucleotides databank | |
6 # | |
7 class ProfilesSearch(object): | |
8 | |
9 ## launch command to prepare profiles bank | |
10 # | |
11 # @param launch_1 string corresponding to pre command | |
12 # @param launch_2 string corresponding to post command | |
13 # @param config configParser object instance | |
14 # @param cDir string current directory | |
15 # @param verbose int (default = 0) | |
16 # | |
17 def prepareProfilesBank(self, launch_1, launch_2, config, cDir, verbose = 0): | |
18 bank = self._getBankBaseName(config) | |
19 prg = "hmmpress" | |
20 if verbose > 0: | |
21 print "prepare bank '%s'..." % ( bank ); sys.stdout.flush() | |
22 cmd = "" | |
23 cmd += prg + " -f " | |
24 cmd += "%s/%s " % ( cDir, bank ) | |
25 process = subprocess.Popen(cmd, shell = True) | |
26 process.communicate() | |
27 if process.returncode != 0: | |
28 raise Exception("ERROR when launching '%s'" % cmd) | |
29 | |
30 ## create command to detect Hmm profiles in a nt sequence file | |
31 # | |
32 # @param inFileName string name of input file | |
33 # @param launch_1 string corresponding to pre command | |
34 # @param launch_2 string corresponding to post command | |
35 # @param cDir string current directory | |
36 # @param tmpDir string temporary directory | |
37 # @param config configParser object instance | |
38 # @return cmd string command to launch | |
39 # | |
40 def detectHmmProfiles(self, inFileName, launch_1, launch_2, cDir, tmpDir, config): | |
41 bank = self._getBankBaseName(config) | |
42 evalueMax = config.get("detect_features","TE_HMMER_evalue") | |
43 | |
44 cmd = "" | |
45 | |
46 cmd += launch_1 | |
47 cmd += os.environ["REPET_PATH"] + "/bin/translateAfastaFileInAllFrameAndReplaceStopsByX_script.py" | |
48 cmd += " -i %s" % ( inFileName ) | |
49 cmd += " -o %s_translated" % ( inFileName ) | |
50 cmd += launch_2 | |
51 | |
52 cmd += launch_1 | |
53 cmd += "hmmscan " | |
54 cmd += " -o %s_tr.hmmScanOut" % ( inFileName ) | |
55 cmd += " --domtblout %s_tr.hmmScanOutTab" % ( inFileName ) | |
56 cmd += " --noali -E " + evalueMax | |
57 cmd += " --cpu 1 " | |
58 cmd += "%s/%s" % ( cDir, bank ) + " " + "%s_translated" % ( inFileName ) | |
59 cmd += launch_2 | |
60 | |
61 cmd += "if os.path.exists( \"%s_translated\" ):\n" % ( inFileName ) | |
62 cmd += "\tos.remove( \"%s_translated\" )\n" % ( inFileName ) | |
63 | |
64 cmd += launch_1 | |
65 cmd += os.environ["REPET_PATH"] + "/bin/HmmOutput2alignAndTransformCoordInNtAndFilterScores_script.py" | |
66 cmd += " -i %s_tr.hmmScanOutTab" % ( inFileName ) | |
67 cmd += " -o %s_profiles_%s.align" % ( inFileName, bank ) | |
68 cmd += " -T %s" % ( inFileName ) | |
69 cmd += " -p hmmscan" | |
70 cmd += " -c" | |
71 cmd += launch_2 | |
72 | |
73 cmd += launch_1 | |
74 cmd += os.environ["REPET_PATH"] + "/bin/matcher" | |
75 cmd += " -m %s_profiles_%s.align" % ( inFileName, bank ) | |
76 cmd += " -j" | |
77 cmd += " -E 10" | |
78 cmd += " -L 0" | |
79 cmd += " -v 1" | |
80 cmd += launch_2 | |
81 | |
82 cmd += "if not os.path.exists( \"%s/%s_profiles_%s.align.clean_match.path\" ):\n" % ( cDir, inFileName, bank ) | |
83 cmd += "\tos.system( \"mv %s_profiles_%s.align.clean_match.path %s\" )\n" % ( inFileName, bank, cDir ) | |
84 cmd += "if not os.path.exists( \"%s/%s_profiles_%s.align.clean_match.param\" ):\n" % ( cDir, inFileName, bank ) | |
85 cmd += "\tos.system( \"mv %s_profiles_%s.align.clean_match.param %s\" )\n" % ( inFileName, bank, cDir ) | |
86 cmd += "if os.path.exists( \"%s_profiles_%s.align\" ):\n" % ( inFileName, bank ) | |
87 cmd += "\tos.remove( \"%s_profiles_%s.align\" )\n" % ( inFileName, bank ) | |
88 cmd += "if os.path.exists( \"%s_profiles_%s.align.clean_match.map\" ):\n" % ( inFileName, bank ) | |
89 cmd += "\tos.remove( \"%s_profiles_%s.align.clean_match.map\" )\n" % ( inFileName, bank ) | |
90 cmd += "if os.path.exists( \"%s_hmmScanOut\" ):\n" % ( inFileName ) | |
91 cmd += "\tos.remove( \"%s_hmmScanOut\" )\n" % ( inFileName ) | |
92 | |
93 if tmpDir != cDir: | |
94 cmd += "if os.path.exists( \"%s\" ):\n" % ( bank ) | |
95 cmd += "\tos.remove( \"%s\" )\n" % ( bank ) | |
96 | |
97 return cmd | |
98 | |
99 def _getBankBaseName(self, config): | |
100 profilsHmmBank = config.get("detect_features", "TE_HMM_profiles") | |
101 bank = os.path.basename(profilsHmmBank) | |
102 return bank | |
103 |