Mercurial > repos > rnateam > rnacommender
annotate rbpfeatures.py @ 5:b3462a72ff76 draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ef62aa3d86abd4b911e35447646712a4628e7fe
author | rnateam |
---|---|
date | Fri, 29 Jul 2016 03:27:18 -0400 |
parents | a609d6dc8047 |
children |
rev | line source |
---|---|
0
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
1 """Compute the RBP features.""" |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
2 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
3 import re |
4
a609d6dc8047
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
0
diff
changeset
|
4 import sys |
0
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
5 import subprocess as sp |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
6 import uuid |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
7 from os import mkdir |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
8 from os import listdir |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
9 from os.path import isfile, join |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
10 from os import devnull |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
11 from shutil import rmtree |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
12 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
13 import numpy as np |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
14 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
15 import pandas as pd |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
16 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
17 import fasta_utils |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
18 import pfam_utils |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
19 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
20 __author__ = "Gianluca Corrado" |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
21 __copyright__ = "Copyright 2016, Gianluca Corrado" |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
22 __license__ = "MIT" |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
23 __maintainer__ = "Gianluca Corrado" |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
24 __email__ = "gianluca.corrado@unitn.it" |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
25 __status__ = "Production" |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
26 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
27 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
28 class RBPVectorizer(): |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
29 """Compute the RBP features.""" |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
30 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
31 def __init__(self, fasta): |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
32 """ |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
33 Constructor. |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
34 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
35 Parameters |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
36 ---------- |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
37 fasta : str |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
38 Fasta file containing the RBP sequences to predict. |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
39 """ |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
40 self.fasta = fasta |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
41 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
42 self._mod_fold = "AURA_Human_data/RBP_features/mod" |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
43 self._reference_fisher_scores = \ |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
44 "AURA_Human_data/RBP_features/fisher_scores_ref" |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
45 self._train_rbps_file = \ |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
46 "AURA_Human_data/RBP_features/rbps_in_train.txt" |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
47 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
48 self._temp_fold = "temp_" + str(uuid.uuid4()) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
49 self.pfam_scan = "%s/pfam_scan.txt" % self._temp_fold |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
50 self._dom_fold = "%s/domains" % self._temp_fold |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
51 self._seeds_fold = "%s/seeds" % self._temp_fold |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
52 self._fisher_fold = "%s/fisher_scores" % self._temp_fold |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
53 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
54 def _pfam_scan(self): |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
55 """Scan the sequences against the Pfam database.""" |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
56 nf = open(self.pfam_scan, "w") |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
57 nf.write(pfam_utils.search_header()) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
58 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
59 fasta = fasta_utils.import_fasta(self.fasta) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
60 |
4
a609d6dc8047
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
0
diff
changeset
|
61 if len(fasta) != 1: |
a609d6dc8047
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
0
diff
changeset
|
62 sys.exit("""Fasta file must contain exactly one sequence.""") |
a609d6dc8047
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 7ad344d108076116e702e1c1e91cea73d8fcadc4
rnateam
parents:
0
diff
changeset
|
63 |
0
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
64 for rbp in sorted(fasta.keys()): |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
65 seq = fasta[rbp] |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
66 text = pfam_utils.sequence_search(rbp, seq) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
67 nf.write(text) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
68 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
69 nf.close() |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
70 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
71 def _overlapping_domains(self): |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
72 """Compute the set of domains contributing to the similarity.""" |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
73 reference_domains = set([dom.replace(".mod", "") for dom in |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
74 listdir(self._mod_fold) if |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
75 isfile(join(self._mod_fold, dom))]) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
76 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
77 data = pfam_utils.read_pfam_output(self.pfam_scan) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
78 if data is None: |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
79 return [] |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
80 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
81 prot_domains = set([a.split('.')[0] for a in data["hmm_acc"]]) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
82 dom_list = sorted(list(reference_domains & prot_domains)) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
83 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
84 return dom_list |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
85 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
86 def _prepare_domains(self, dom_list): |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
87 """Select domain subsequences from the entire protein sequences.""" |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
88 def prepare_domains(fasta_dic, dom_list, pfam_scan, out_folder): |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
89 out_file_dic = {} |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
90 for acc in dom_list: |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
91 out_file_dic[acc] = open("%s/%s.fa" % (out_folder, acc), "w") |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
92 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
93 f = open(pfam_scan) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
94 f.readline() |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
95 for line in f: |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
96 split = line.split() |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
97 rbp = split[0] |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
98 start = int(split[3]) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
99 stop = int(split[4]) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
100 acc = split[5].split('.')[0] |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
101 if acc in out_file_dic.keys(): |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
102 out_file_dic[acc].write( |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
103 ">%s:%i-%i\n%s\n" % (rbp, start, stop, |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
104 fasta_dic[rbp][start:stop])) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
105 f.close() |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
106 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
107 for acc in dom_list: |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
108 out_file_dic[acc].close() |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
109 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
110 mkdir(self._dom_fold) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
111 fasta = fasta_utils.import_fasta(self.fasta) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
112 prepare_domains(fasta, dom_list, self.pfam_scan, |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
113 self._dom_fold) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
114 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
115 def _compute_fisher_scores(self, dom_list): |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
116 """Wrapper for SAM 3.5 get_fisher_scores.""" |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
117 def get_fisher_scores(dom_list, mod_fold, dom_fold, fisher_fold): |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
118 for acc in dom_list: |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
119 _FNULL = open(devnull, 'w') |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
120 cmd = "get_fisher_scores run -i %s/%s.mod -db %s/%s.fa" % ( |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
121 mod_fold, acc, dom_fold, acc) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
122 fisher = sp.check_output( |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
123 cmd, shell=True, stderr=_FNULL) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
124 nf = open("%s/%s.txt" % (fisher_fold, acc), "w") |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
125 nf.write(fisher) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
126 nf.close() |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
127 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
128 mkdir(self._fisher_fold) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
129 get_fisher_scores(dom_list, self._mod_fold, self._dom_fold, |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
130 self._fisher_fold) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
131 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
132 def _ekm(self, dom_list): |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
133 """Compute the empirical kernel map from the Fisher scores.""" |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
134 def process_seg(e): |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
135 """Process segment of a SAM 3.5 get_fisher_scores output file.""" |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
136 seg = e.split() |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
137 c = seg[0].split(':')[0] |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
138 m = map(float, seg[3:]) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
139 return c, m |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
140 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
141 def read_sam_file(samfile): |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
142 """Read a SAM 3.5 get_fisher_scores output file.""" |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
143 f = open(samfile) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
144 data = f.read() |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
145 f.close() |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
146 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
147 columns = [] |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
148 m = [] |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
149 split = re.split(">A ", data)[1:] |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
150 for e in split: |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
151 c, m_ = process_seg(e) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
152 columns.append(c) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
153 m.append(m_) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
154 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
155 m = np.matrix(m) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
156 df = pd.DataFrame(data=m.T, columns=columns) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
157 return df |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
158 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
159 def dom_features(fisher_fold, dom_list, names=None): |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
160 """Compute the features with respect to a domain type.""" |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
161 dfs = [] |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
162 for acc in dom_list: |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
163 df = read_sam_file("%s/%s.txt" % (fisher_fold, acc)) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
164 df = df.groupby(df.columns, axis=1).mean() |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
165 dfs.append(df) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
166 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
167 con = pd.concat(dfs, ignore_index=True) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
168 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
169 if names is not None: |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
170 add = sorted(list(set(names) - set(con.columns))) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
171 fil = sorted(list(set(names) - set(add))) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
172 con = con[fil] |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
173 for c in add: |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
174 con[c] = np.zeros(len(con.index), dtype='float64') |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
175 con = con[names] |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
176 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
177 con = con.fillna(0.0) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
178 return con |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
179 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
180 f = open(self._train_rbps_file) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
181 train_rbps = f.read().strip().split('\n') |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
182 f.close() |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
183 ref = dom_features(self._reference_fisher_scores, dom_list, |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
184 names=train_rbps) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
185 ekm_ref = ref.T.dot(ref) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
186 ekm_ref.index = ekm_ref.columns |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
187 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
188 sel = dom_features(self._fisher_fold, dom_list) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
189 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
190 ekm_sel = sel.T.dot(sel) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
191 ekm_sel.index = ekm_sel.columns |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
192 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
193 ekm = ref.T.dot(sel) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
194 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
195 for rs in ekm.columns: |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
196 for rr in ekm.index: |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
197 if ekm_ref[rr][rr] > 0 and ekm_sel[rs][rs] > 0: |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
198 ekm[rs][rr] /= np.sqrt(ekm_ref[rr][rr] * ekm_sel[rs][rs]) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
199 return ekm |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
200 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
201 def vectorize(self): |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
202 """Produce the RBP features.""" |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
203 # create a temporary folder |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
204 mkdir(self._temp_fold) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
205 # scan the RBP sequences against Pfam |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
206 self._pfam_scan() |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
207 # determine the accession numbers of the pfam domains needed for |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
208 # computing the features |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
209 dom_list = self._overlapping_domains() |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
210 if len(dom_list) == 0: |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
211 rmtree(self._temp_fold) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
212 return None |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
213 # prepare fasta file with the sequence of the domains |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
214 self._prepare_domains(dom_list) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
215 # compute fisher scores using SAM 3.5 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
216 self._compute_fisher_scores(dom_list) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
217 # compute the empirical kernel map |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
218 ekm = self._ekm(dom_list) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
219 # remove the temporary folder |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
220 rmtree(self._temp_fold) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
221 return ekm |