Mercurial > repos > rnateam > rnacommender
annotate rbpfeatures.py @ 0:8918de535391 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
author | rnateam |
---|---|
date | Tue, 31 May 2016 05:41:03 -0400 |
parents | |
children | a609d6dc8047 |
rev | line source |
---|---|
0
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
1 """Compute the RBP features.""" |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
2 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
3 import re |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
4 import subprocess as sp |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
5 import uuid |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
6 from os import mkdir |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
7 from os import listdir |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
8 from os.path import isfile, join |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
9 from os import devnull |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
10 from shutil import rmtree |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
11 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
12 import numpy as np |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
13 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
14 import pandas as pd |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
15 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
16 import fasta_utils |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
17 import pfam_utils |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
18 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
19 __author__ = "Gianluca Corrado" |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
20 __copyright__ = "Copyright 2016, Gianluca Corrado" |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
21 __license__ = "MIT" |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
22 __maintainer__ = "Gianluca Corrado" |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
23 __email__ = "gianluca.corrado@unitn.it" |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
24 __status__ = "Production" |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
25 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
26 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
27 class RBPVectorizer(): |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
28 """Compute the RBP features.""" |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
29 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
30 def __init__(self, fasta): |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
31 """ |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
32 Constructor. |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
33 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
34 Parameters |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
35 ---------- |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
36 fasta : str |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
37 Fasta file containing the RBP sequences to predict. |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
38 """ |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
39 self.fasta = fasta |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
40 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
41 self._mod_fold = "AURA_Human_data/RBP_features/mod" |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
42 self._reference_fisher_scores = \ |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
43 "AURA_Human_data/RBP_features/fisher_scores_ref" |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
44 self._train_rbps_file = \ |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
45 "AURA_Human_data/RBP_features/rbps_in_train.txt" |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
46 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
47 self._temp_fold = "temp_" + str(uuid.uuid4()) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
48 self.pfam_scan = "%s/pfam_scan.txt" % self._temp_fold |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
49 self._dom_fold = "%s/domains" % self._temp_fold |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
50 self._seeds_fold = "%s/seeds" % self._temp_fold |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
51 self._fisher_fold = "%s/fisher_scores" % self._temp_fold |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
52 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
53 def _pfam_scan(self): |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
54 """Scan the sequences against the Pfam database.""" |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
55 nf = open(self.pfam_scan, "w") |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
56 nf.write(pfam_utils.search_header()) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
57 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
58 fasta = fasta_utils.import_fasta(self.fasta) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
59 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
60 for rbp in sorted(fasta.keys()): |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
61 seq = fasta[rbp] |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
62 text = pfam_utils.sequence_search(rbp, seq) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
63 nf.write(text) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
64 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
65 nf.close() |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
66 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
67 def _overlapping_domains(self): |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
68 """Compute the set of domains contributing to the similarity.""" |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
69 reference_domains = set([dom.replace(".mod", "") for dom in |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
70 listdir(self._mod_fold) if |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
71 isfile(join(self._mod_fold, dom))]) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
72 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
73 data = pfam_utils.read_pfam_output(self.pfam_scan) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
74 if data is None: |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
75 return [] |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
76 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
77 prot_domains = set([a.split('.')[0] for a in data["hmm_acc"]]) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
78 dom_list = sorted(list(reference_domains & prot_domains)) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
79 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
80 return dom_list |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
81 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
82 def _prepare_domains(self, dom_list): |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
83 """Select domain subsequences from the entire protein sequences.""" |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
84 def prepare_domains(fasta_dic, dom_list, pfam_scan, out_folder): |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
85 out_file_dic = {} |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
86 for acc in dom_list: |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
87 out_file_dic[acc] = open("%s/%s.fa" % (out_folder, acc), "w") |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
88 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
89 f = open(pfam_scan) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
90 f.readline() |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
91 for line in f: |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
92 split = line.split() |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
93 rbp = split[0] |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
94 start = int(split[3]) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
95 stop = int(split[4]) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
96 acc = split[5].split('.')[0] |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
97 if acc in out_file_dic.keys(): |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
98 out_file_dic[acc].write( |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
99 ">%s:%i-%i\n%s\n" % (rbp, start, stop, |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
100 fasta_dic[rbp][start:stop])) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
101 f.close() |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
102 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
103 for acc in dom_list: |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
104 out_file_dic[acc].close() |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
105 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
106 mkdir(self._dom_fold) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
107 fasta = fasta_utils.import_fasta(self.fasta) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
108 prepare_domains(fasta, dom_list, self.pfam_scan, |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
109 self._dom_fold) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
110 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
111 def _compute_fisher_scores(self, dom_list): |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
112 """Wrapper for SAM 3.5 get_fisher_scores.""" |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
113 def get_fisher_scores(dom_list, mod_fold, dom_fold, fisher_fold): |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
114 for acc in dom_list: |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
115 _FNULL = open(devnull, 'w') |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
116 cmd = "get_fisher_scores run -i %s/%s.mod -db %s/%s.fa" % ( |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
117 mod_fold, acc, dom_fold, acc) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
118 fisher = sp.check_output( |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
119 cmd, shell=True, stderr=_FNULL) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
120 nf = open("%s/%s.txt" % (fisher_fold, acc), "w") |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
121 nf.write(fisher) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
122 nf.close() |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
123 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
124 mkdir(self._fisher_fold) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
125 get_fisher_scores(dom_list, self._mod_fold, self._dom_fold, |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
126 self._fisher_fold) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
127 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
128 def _ekm(self, dom_list): |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
129 """Compute the empirical kernel map from the Fisher scores.""" |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
130 def process_seg(e): |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
131 """Process segment of a SAM 3.5 get_fisher_scores output file.""" |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
132 seg = e.split() |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
133 c = seg[0].split(':')[0] |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
134 m = map(float, seg[3:]) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
135 return c, m |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
136 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
137 def read_sam_file(samfile): |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
138 """Read a SAM 3.5 get_fisher_scores output file.""" |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
139 f = open(samfile) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
140 data = f.read() |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
141 f.close() |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
142 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
143 columns = [] |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
144 m = [] |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
145 split = re.split(">A ", data)[1:] |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
146 for e in split: |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
147 c, m_ = process_seg(e) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
148 columns.append(c) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
149 m.append(m_) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
150 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
151 m = np.matrix(m) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
152 df = pd.DataFrame(data=m.T, columns=columns) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
153 return df |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
154 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
155 def dom_features(fisher_fold, dom_list, names=None): |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
156 """Compute the features with respect to a domain type.""" |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
157 dfs = [] |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
158 for acc in dom_list: |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
159 df = read_sam_file("%s/%s.txt" % (fisher_fold, acc)) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
160 df = df.groupby(df.columns, axis=1).mean() |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
161 dfs.append(df) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
162 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
163 con = pd.concat(dfs, ignore_index=True) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
164 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
165 if names is not None: |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
166 add = sorted(list(set(names) - set(con.columns))) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
167 fil = sorted(list(set(names) - set(add))) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
168 con = con[fil] |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
169 for c in add: |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
170 con[c] = np.zeros(len(con.index), dtype='float64') |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
171 con = con[names] |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
172 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
173 con = con.fillna(0.0) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
174 return con |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
175 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
176 f = open(self._train_rbps_file) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
177 train_rbps = f.read().strip().split('\n') |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
178 f.close() |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
179 ref = dom_features(self._reference_fisher_scores, dom_list, |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
180 names=train_rbps) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
181 ekm_ref = ref.T.dot(ref) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
182 ekm_ref.index = ekm_ref.columns |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
183 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
184 sel = dom_features(self._fisher_fold, dom_list) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
185 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
186 ekm_sel = sel.T.dot(sel) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
187 ekm_sel.index = ekm_sel.columns |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
188 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
189 ekm = ref.T.dot(sel) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
190 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
191 for rs in ekm.columns: |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
192 for rr in ekm.index: |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
193 if ekm_ref[rr][rr] > 0 and ekm_sel[rs][rs] > 0: |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
194 ekm[rs][rr] /= np.sqrt(ekm_ref[rr][rr] * ekm_sel[rs][rs]) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
195 return ekm |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
196 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
197 def vectorize(self): |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
198 """Produce the RBP features.""" |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
199 # create a temporary folder |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
200 mkdir(self._temp_fold) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
201 # scan the RBP sequences against Pfam |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
202 self._pfam_scan() |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
203 # determine the accession numbers of the pfam domains needed for |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
204 # computing the features |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
205 dom_list = self._overlapping_domains() |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
206 if len(dom_list) == 0: |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
207 rmtree(self._temp_fold) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
208 return None |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
209 # prepare fasta file with the sequence of the domains |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
210 self._prepare_domains(dom_list) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
211 # compute fisher scores using SAM 3.5 |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
212 self._compute_fisher_scores(dom_list) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
213 # compute the empirical kernel map |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
214 ekm = self._ekm(dom_list) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
215 # remove the temporary folder |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
216 rmtree(self._temp_fold) |
8918de535391
planemo upload for repository https://github.com/bgruening/galaxytools/tree/rna_commander/tools/rna_tools/rna_commender commit 2fc7f3c08f30e2d81dc4ad19759dfe7ba9b0a3a1
rnateam
parents:
diff
changeset
|
217 return ekm |