Mercurial > repos > iuc > virannot_blast2tsv
annotate otu.py @ 4:bb29ae8708b5 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
author | iuc |
---|---|
date | Tue, 13 May 2025 11:52:17 +0000 |
parents | f8ebd1e802d7 |
children |
rev | line source |
---|---|
0
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
1 #!/usr/bin/env python3 |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
2 |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
3 |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
4 # Name: virAnnot_otu |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
5 # Author: Marie Lefebvre - INRAE |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
6 # Reuirements: Ete3 toolkit and external apps |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
7 |
4
bb29ae8708b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
8 """Create viral OTUs based on RPS and Blast annotations""" |
0
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
9 |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
10 import argparse |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
11 import csv |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
12 import logging as log |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
13 import os |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
14 import random |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
15 import re |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
16 |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
17 import pandas as pd |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
18 import xlsxwriter |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
19 from Bio import SeqIO |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
20 from Bio.Align.Applications import ClustalOmegaCommandline |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
21 from ete3 import NodeStyle, SeqGroup, SeqMotifFace, Tree, TreeStyle |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
22 |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
23 |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
24 def main(): |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
25 """ |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
26 1 - retrieve info (sequence, query_id, taxo) from RPS file |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
27 2 - align protein sequences of the same domain, calculate |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
28 matrix of distances, generate trees |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
29 3 - get statistics (read number) per otu |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
30 4 - create HTML report |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
31 """ |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
32 options = _set_options() |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
33 _set_log_level(options.verbosity) |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
34 hits_collection = _cut_sequence(options) |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
35 _align_sequences(options, hits_collection) |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
36 _get_stats(options, hits_collection) |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
37 _create_html(options, hits_collection) |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
38 |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
39 |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
40 def _cut_sequence(options): |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
41 """ |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
42 Retrieve viral hits and sequences from RPS files |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
43 """ |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
44 log.info("Cut sequences") |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
45 i = 0 # keep track of iterations over rps files to use the corresponding fasta file |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
46 collection = {} |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
47 options.rps.sort() |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
48 for rps_file in options.rps: |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
49 log.debug("Reading rps file " + str(rps_file)) |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
50 with open(rps_file[0], 'r') as rps_current_file: |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
51 rps_reader = csv.reader(rps_current_file, delimiter='\t') |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
52 headers = 0 |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
53 for row in rps_reader: |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
54 if headers == 0: |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
55 # headers |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
56 headers += 1 |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
57 else: |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
58 if row[1] == "no_hit": |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
59 pass |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
60 else: |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
61 query_id = row[0] |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
62 cdd_id = row[2] |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
63 startQ = int(row[5]) |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
64 endQ = int(row[6]) |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
65 frame = float(row[7]) |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
66 description = row[8] |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
67 superkingdom = row[9] |
4
bb29ae8708b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
68 try: |
bb29ae8708b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
69 pident = row[10] |
bb29ae8708b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
70 except IndexError: |
bb29ae8708b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
71 log.info(rps_file[0]) |
bb29ae8708b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
72 log.info(row) |
0
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
73 match = re.search("Viruses", superkingdom) |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
74 # if contig is viral then retrieve sequence |
4
bb29ae8708b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
75 if match and float(pident) >= options.viral_portion: |
0
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
76 options.fasta.sort() |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
77 seq = _retrieve_fasta_seq(options.fasta[i][0], query_id) |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
78 seq_length = len(seq) |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
79 if endQ < seq_length: |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
80 seq = seq[startQ - 1:endQ] |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
81 else: |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
82 seq = seq[startQ - 1:seq_length] |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
83 if frame < 0: |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
84 seq = seq.reverse_complement() |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
85 prot = seq.translate() |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
86 if len(prot) >= options.min_protein_length: |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
87 log.debug("Add " + query_id + " to collection") |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
88 if cdd_id not in collection: |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
89 collection[cdd_id] = {} |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
90 collection[cdd_id][query_id] = {} |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
91 collection[cdd_id][query_id]["nuccleotide"] = seq |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
92 collection[cdd_id][query_id]["protein"] = prot |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
93 collection[cdd_id][query_id]["full_description"] = description |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
94 if options.blast is not None: |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
95 options.blast.sort() |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
96 with open(options.blast[i][0], 'r') as blast_current_file: |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
97 blast_reader = csv.reader(blast_current_file, delimiter='\t') |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
98 for b_query in blast_reader: |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
99 if b_query[1] == query_id: |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
100 collection[cdd_id][query_id]["nb"] = b_query[2] |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
101 if len(b_query) > 10: |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
102 collection[cdd_id][query_id]["taxonomy"] = b_query[14] |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
103 else: |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
104 collection[cdd_id][query_id]["taxonomy"] = "Unknown" |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
105 else: |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
106 if "nb" not in collection[cdd_id][query_id]: |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
107 collection[cdd_id][query_id]["nb"] = 0 |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
108 if "taxonomy" not in collection[cdd_id][query_id]: |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
109 collection[cdd_id][query_id]["taxonomy"] = "Unknown" |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
110 else: |
4
bb29ae8708b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
111 log.debug("No blast file") |
0
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
112 collection[cdd_id][query_id]["taxonomy"] = "Unknown" |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
113 collection[cdd_id][query_id]["nb"] = 0 |
4
bb29ae8708b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
114 # keep pfamXXX and RdRp 1 |
bb29ae8708b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
115 collection[cdd_id]["short_description"] = description.split(",")[0] + description.split(",")[1] |
0
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
116 collection[cdd_id]["full_description"] = description |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
117 i += 1 |
4
bb29ae8708b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
118 if options.merge_rdrp == "yes": |
bb29ae8708b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
119 rdrp_list = ["pfam00680", "pfam02123", "pfam00978", "pfam00998"] |
bb29ae8708b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
120 collection["RdRp_merge"] = {} |
bb29ae8708b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
121 for cdd_id in collection: |
bb29ae8708b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
122 if cdd_id in rdrp_list and cdd_id != "RdRp_merge": |
bb29ae8708b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
123 log.info("Add " + cdd_id + " in merge") |
bb29ae8708b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
124 for query_id in collection[cdd_id]: |
bb29ae8708b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
125 if query_id not in collection["RdRp_merge"]: |
bb29ae8708b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
126 collection["RdRp_merge"][query_id] = {} |
bb29ae8708b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
127 collection["RdRp_merge"][query_id] = collection[cdd_id][query_id] |
0
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
128 return collection |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
129 |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
130 |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
131 def _retrieve_fasta_seq(fasta_file, query_id): |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
132 """ |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
133 From fasta file retrieve specific sequence with id |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
134 """ |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
135 contigs_list = SeqIO.to_dict(SeqIO.parse(open(fasta_file), 'fasta')) |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
136 try: |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
137 seq = contigs_list[query_id].seq |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
138 except KeyError: |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
139 print("KeyError for " + query_id + " file " + fasta_file) |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
140 else: |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
141 return seq |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
142 |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
143 |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
144 def _create_tree(tree, fasta, out, color): |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
145 """ |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
146 Create phylogenic tree from multiple alignments |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
147 """ |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
148 try: |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
149 f = open(tree, 'r') |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
150 except IOError: |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
151 log.info("Unknown file: " + tree + ". You may have less than 2 sequences to align.") |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
152 return |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
153 |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
154 line = "" |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
155 for word in f: |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
156 line += word.strip() |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
157 |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
158 f.close() |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
159 seqs = SeqGroup(fasta, format="fasta") |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
160 t = Tree(tree) |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
161 ts = TreeStyle() |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
162 ts.show_branch_length = True |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
163 colors = _parse_color_file(color) |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
164 node_names = t.get_leaf_names() |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
165 for name in node_names: |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
166 seq = seqs.get_seq(name) |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
167 seqFace = SeqMotifFace(seq, seq_format="()") |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
168 node = t.get_leaves_by_name(name) |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
169 for i in range(0, len(node)): |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
170 if name in colors: |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
171 ns = NodeStyle() |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
172 ns['bgcolor'] = colors[name] |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
173 node[i].set_style(ns) |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
174 node[i].add_face(seqFace, 0, 'aligned') |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
175 |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
176 t.render(out, tree_style=ts) |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
177 |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
178 |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
179 def _parse_color_file(file): |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
180 fh = open(file) |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
181 reader = csv.reader(fh, delimiter="\t") |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
182 data = list(reader) |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
183 colors = {} |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
184 for i in range(0, len(data)): |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
185 colors[data[i][0]] = data[i][1] |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
186 |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
187 return colors |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
188 |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
189 |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
190 def _align_sequences(options, hits_collection): |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
191 """ |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
192 Align hit sequences with pfam reference |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
193 """ |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
194 log.info("Align sequences") |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
195 if not os.path.exists(options.output): |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
196 os.mkdir(options.output) |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
197 color_by_sample = {} |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
198 for cdd_id in hits_collection: |
4
bb29ae8708b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
199 log.info("align seq for " + cdd_id) |
bb29ae8708b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
200 if cdd_id == "RdRp_merge": |
bb29ae8708b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
201 cdd_output = options.output + "/" + cdd_id |
bb29ae8708b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
202 else: |
bb29ae8708b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
203 cdd_output = options.output + "/" + hits_collection[cdd_id]["short_description"].replace(" ", "_") |
0
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
204 if not os.path.exists(cdd_output): |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
205 os.mkdir(cdd_output) |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
206 if os.path.exists(cdd_output + "/seq_to_align.fasta"): |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
207 os.remove(cdd_output + "/seq_to_align.fasta") |
3
f8ebd1e802d7
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 16701bfbffd605805e847897799251ab748f559f
iuc
parents:
0
diff
changeset
|
208 if os.path.exists(cdd_output + "/seq_nucc.fasta"): |
f8ebd1e802d7
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 16701bfbffd605805e847897799251ab748f559f
iuc
parents:
0
diff
changeset
|
209 os.remove(cdd_output + "/seq_nucc.fasta") |
0
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
210 file_seq_to_align = cdd_output + "/seq_to_align.fasta" |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
211 file_color_config = cdd_output + "/color_config.txt" |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
212 f = open(file_seq_to_align, "a") |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
213 f_c = open(file_color_config, "w+") |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
214 log.info("Writing to " + file_seq_to_align) |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
215 count = 0 # count number of contig per domain |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
216 for query_id in hits_collection[cdd_id]: |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
217 if query_id not in ["short_description", "full_description"]: |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
218 sample = query_id.split("_")[0] # get sample from SAMPLE_IdCONTIG |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
219 sample_color = "#" + ''.join([random.choice('ABCDEF0123456789') for i in range(6)]) |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
220 # same color for each contig of the same sample |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
221 if sample not in color_by_sample.keys(): |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
222 color_by_sample[sample] = sample_color |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
223 f.write(">" + query_id + "\n") |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
224 f.write(str(hits_collection[cdd_id][query_id]["protein"]) + "\n") |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
225 f_c.write(query_id + '\t' + color_by_sample[sample] + '\n') |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
226 count += 1 |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
227 f.close() |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
228 f_c.close() |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
229 file_seq_aligned = cdd_output + '/seq_aligned.final_tree.fa' |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
230 tree_file = cdd_output + '/tree.dnd' |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
231 file_cluster = cdd_output + '/otu_cluster.csv' |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
232 # create alignment for domain with more than 1 contigs |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
233 if count > 1: |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
234 log.info("Run clustal omega...") |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
235 clustalo_cmd = ClustalOmegaCommandline("clustalo", infile=file_seq_to_align, outfile=file_seq_aligned, |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
236 guidetree_out=tree_file, seqtype="protein", force=True) |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
237 log.debug(clustalo_cmd) |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
238 stdout, stderr = clustalo_cmd() |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
239 log.debug(stdout + stderr) |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
240 |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
241 # create tree plot with colors |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
242 file_matrix = cdd_output + "/identity_matrix.csv" |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
243 log.info("Create tree...") |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
244 _create_tree(tree_file, file_seq_aligned, tree_file + '.png', file_color_config) |
4
bb29ae8708b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
245 _compute_pairwise_distance(file_seq_aligned, file_matrix, cdd_id) |
0
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
246 log.info("Retrieve OTUs...") |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
247 # if os.path.exists(file_cluster): |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
248 # os.remove(file_cluster) |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
249 otu_cmd = os.path.join(options.tool_path, 'seek_otu.R') + ' ' + file_matrix + ' ' + file_cluster + ' ' + str(options.perc) |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
250 log.debug(otu_cmd) |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
251 os.system(otu_cmd) |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
252 # only one contig |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
253 else: |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
254 mv_cmd = 'cp ' + file_seq_to_align + ' ' + file_seq_aligned |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
255 log.debug(mv_cmd) |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
256 os.system(mv_cmd) |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
257 |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
258 f = open(file_cluster, "w+") |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
259 f.write('OTU_1,1,' + list(hits_collection[cdd_id].keys())[0] + ',') |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
260 f.close() |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
261 |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
262 |
4
bb29ae8708b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
263 def _compute_pairwise_distance(file_seq_aligned, file_matrix, cdd_id): |
0
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
264 """ |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
265 Calculate paiwise distance between aligned protein sequences |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
266 from a cdd_id |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
267 """ |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
268 log.info("Compute pairwise distance of " + cdd_id) |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
269 matrix = {} |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
270 for k1 in SeqIO.parse(file_seq_aligned, "fasta"): |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
271 row = [] |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
272 for k2 in SeqIO.parse(file_seq_aligned, "fasta"): |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
273 identic = 0 |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
274 compared = 0 |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
275 keep_pos = 0 |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
276 for base in k1: |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
277 base2 = k2[keep_pos] |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
278 # mutation, next |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
279 if base == 'X' or base2 == 'X': |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
280 keep_pos += 1 |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
281 continue |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
282 # gap in both sequences, next |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
283 if base == '-' and base2 == '-': |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
284 keep_pos += 1 |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
285 continue |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
286 # gap in one of the sequence, next |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
287 if base == '-' or base2 == '-': |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
288 keep_pos += 1 |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
289 continue |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
290 # identity |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
291 if base == base2: |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
292 identic += 1 |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
293 compared += 1 |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
294 keep_pos += 1 |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
295 # set minimum overlap to 20 |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
296 if compared == 0 or compared < 20: |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
297 percentIdentity = 0 |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
298 else: |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
299 percentIdentity = (identic / compared) * 100 |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
300 row.append(percentIdentity) |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
301 matrix[k1.id] = row |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
302 log.debug("Write " + file_matrix) |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
303 f = open(file_matrix, "w+") |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
304 for row in matrix: |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
305 f.write(row + ',' + ', '.join(map(str, matrix[row])) + "\n") |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
306 f.close() |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
307 |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
308 |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
309 def _get_stats(options, hits_collection): |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
310 """ |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
311 Retrieve annotation and number of read |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
312 for each OTUs |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
313 """ |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
314 file_xlsx = options.output + '/otu_stats.xlsx' # Create a workbook |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
315 workbook = xlsxwriter.Workbook(file_xlsx) |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
316 log.info("Writing stats to " + file_xlsx) |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
317 for cdd_id in hits_collection: |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
318 otu_collection = {} |
4
bb29ae8708b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
319 if cdd_id == "RdRp_merge": |
bb29ae8708b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
320 cdd_output = options.output + "/" + cdd_id |
bb29ae8708b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
321 worksheet = workbook.add_worksheet(cdd_id) |
bb29ae8708b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
322 else: |
bb29ae8708b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
323 |
bb29ae8708b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
324 cdd_output = options.output + "/" + hits_collection[cdd_id]["short_description"].replace(" ", "_") |
bb29ae8708b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
325 worksheet = workbook.add_worksheet(hits_collection[cdd_id]["short_description"]) # add a worksheet |
0
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
326 file_cluster = cdd_output + '/otu_cluster.csv' |
3
f8ebd1e802d7
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 16701bfbffd605805e847897799251ab748f559f
iuc
parents:
0
diff
changeset
|
327 file_fasta_nucc = cdd_output + '/representative_nucc.fasta' |
0
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
328 with open(file_cluster, 'r') as clust: |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
329 otu_reader = csv.reader(clust, delimiter=',') |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
330 samples_list = [] |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
331 for row in otu_reader: |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
332 contigs_list = row[2:len(row) - 1] # remove last empty column |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
333 otu_collection[row[0]] = {} # key -> otu number |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
334 otu_collection[row[0]]['contigs_list'] = contigs_list |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
335 for contig in contigs_list: |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
336 sample = contig.split('_')[0] |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
337 samples_list.append(sample) if sample not in samples_list else samples_list |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
338 if sample not in otu_collection[row[0]]: |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
339 otu_collection[row[0]][sample] = {} |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
340 otu_collection[row[0]][sample][contig] = {} |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
341 # add read number of the contig and annotation |
4
bb29ae8708b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
342 if contig in hits_collection[cdd_id]: |
bb29ae8708b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
343 if 'nb' in hits_collection[cdd_id][contig]: |
bb29ae8708b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
344 otu_collection[row[0]][sample][contig]['nb'] = hits_collection[cdd_id][contig]["nb"] |
bb29ae8708b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
345 else: |
bb29ae8708b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
346 otu_collection[row[0]][sample][contig]['nb'] = 0 |
bb29ae8708b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
347 if 'taxonomy' in hits_collection[cdd_id][contig]: |
bb29ae8708b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
348 otu_collection[row[0]][sample][contig]['taxonomy'] = hits_collection[cdd_id][contig]["taxonomy"] |
bb29ae8708b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
349 else: |
bb29ae8708b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
350 otu_collection[row[0]][sample][contig]['taxonomy'] = 'unknown' |
0
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
351 else: |
4
bb29ae8708b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
352 otu_collection[row[0]][sample][contig] = {'nb': 0, 'taxonomy': 'unknown'} |
0
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
353 else: |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
354 otu_collection[row[0]][sample][contig] = {} |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
355 # add read number of the contig and annotation |
4
bb29ae8708b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
356 if contig in hits_collection[cdd_id]: |
bb29ae8708b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
357 if 'nb' in hits_collection[cdd_id][contig]: |
bb29ae8708b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
358 otu_collection[row[0]][sample][contig]['nb'] = hits_collection[cdd_id][contig]["nb"] |
bb29ae8708b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
359 else: |
bb29ae8708b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
360 otu_collection[row[0]][sample][contig]['nb'] = 0 |
bb29ae8708b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
361 if 'taxonomy' in hits_collection[cdd_id][contig]: |
bb29ae8708b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
362 otu_collection[row[0]][sample][contig]['taxonomy'] = hits_collection[cdd_id][contig]["taxonomy"] |
bb29ae8708b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
363 else: |
bb29ae8708b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
364 otu_collection[row[0]][sample][contig]['taxonomy'] = 'unknown' |
0
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
365 else: |
4
bb29ae8708b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
366 otu_collection[row[0]][sample][contig] = {'nb': 0, 'taxonomy': 'unknown'} |
0
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
367 if 'taxonomy' in hits_collection[cdd_id][contig]: |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
368 otu_collection[row[0]]['global_taxonomy'] = hits_collection[cdd_id][contig]["taxonomy"] |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
369 else: |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
370 otu_collection[row[0]]['global_taxonomy'] = 'unknown' |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
371 |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
372 # calculate total number of reads for each sample of each OTU |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
373 for otu in otu_collection: |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
374 for sample in otu_collection[otu]: |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
375 if sample not in ['contigs_list', 'global_taxonomy']: |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
376 total_nb_read = 0 |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
377 for contig in otu_collection[otu][sample]: |
3
f8ebd1e802d7
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 16701bfbffd605805e847897799251ab748f559f
iuc
parents:
0
diff
changeset
|
378 if otu_collection[otu][sample][contig]['nb'] == '': |
f8ebd1e802d7
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 16701bfbffd605805e847897799251ab748f559f
iuc
parents:
0
diff
changeset
|
379 otu_collection[otu][sample][contig]['nb'] = 0 |
0
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
380 total_nb_read += int(otu_collection[otu][sample][contig]['nb']) |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
381 otu_collection[otu][sample]['total_nb_read'] = total_nb_read |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
382 row = 0 |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
383 column = 0 |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
384 item = '#OTU_name' |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
385 worksheet.write(row, column, item) |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
386 for samp in samples_list: |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
387 column += 1 |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
388 worksheet.write(row, column, samp) |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
389 worksheet.write(row, column + 1, 'taxonomy') |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
390 worksheet.write(row, column + 2, 'contigs_list') |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
391 row = 1 |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
392 # column = 0 |
3
f8ebd1e802d7
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 16701bfbffd605805e847897799251ab748f559f
iuc
parents:
0
diff
changeset
|
393 with open(file_fasta_nucc, "w+") as f_nucc: |
f8ebd1e802d7
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 16701bfbffd605805e847897799251ab748f559f
iuc
parents:
0
diff
changeset
|
394 for otu in otu_collection: |
f8ebd1e802d7
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 16701bfbffd605805e847897799251ab748f559f
iuc
parents:
0
diff
changeset
|
395 if isinstance(otu_collection[otu], dict): |
f8ebd1e802d7
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 16701bfbffd605805e847897799251ab748f559f
iuc
parents:
0
diff
changeset
|
396 column = 0 |
f8ebd1e802d7
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 16701bfbffd605805e847897799251ab748f559f
iuc
parents:
0
diff
changeset
|
397 worksheet.write(row, column, otu) |
f8ebd1e802d7
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 16701bfbffd605805e847897799251ab748f559f
iuc
parents:
0
diff
changeset
|
398 # prepare table with 0 in each cells |
f8ebd1e802d7
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 16701bfbffd605805e847897799251ab748f559f
iuc
parents:
0
diff
changeset
|
399 for sample in otu_collection[otu]: |
f8ebd1e802d7
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 16701bfbffd605805e847897799251ab748f559f
iuc
parents:
0
diff
changeset
|
400 column = 1 |
f8ebd1e802d7
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 16701bfbffd605805e847897799251ab748f559f
iuc
parents:
0
diff
changeset
|
401 for samp in samples_list: |
f8ebd1e802d7
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 16701bfbffd605805e847897799251ab748f559f
iuc
parents:
0
diff
changeset
|
402 worksheet.write(row, column, 0) |
f8ebd1e802d7
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 16701bfbffd605805e847897799251ab748f559f
iuc
parents:
0
diff
changeset
|
403 column += 1 |
f8ebd1e802d7
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 16701bfbffd605805e847897799251ab748f559f
iuc
parents:
0
diff
changeset
|
404 # fill in table with nb of read for each sample and each OTU |
f8ebd1e802d7
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 16701bfbffd605805e847897799251ab748f559f
iuc
parents:
0
diff
changeset
|
405 for sample in otu_collection[otu]: |
f8ebd1e802d7
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 16701bfbffd605805e847897799251ab748f559f
iuc
parents:
0
diff
changeset
|
406 column = 1 |
f8ebd1e802d7
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 16701bfbffd605805e847897799251ab748f559f
iuc
parents:
0
diff
changeset
|
407 for samp in samples_list: |
f8ebd1e802d7
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 16701bfbffd605805e847897799251ab748f559f
iuc
parents:
0
diff
changeset
|
408 if samp == sample: |
f8ebd1e802d7
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 16701bfbffd605805e847897799251ab748f559f
iuc
parents:
0
diff
changeset
|
409 worksheet.write(row, column, otu_collection[otu][sample]['total_nb_read']) |
f8ebd1e802d7
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 16701bfbffd605805e847897799251ab748f559f
iuc
parents:
0
diff
changeset
|
410 column += 1 |
f8ebd1e802d7
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 16701bfbffd605805e847897799251ab748f559f
iuc
parents:
0
diff
changeset
|
411 worksheet.write(row, len(samples_list) + 1, otu_collection[otu]['global_taxonomy'].replace(';', ' ')) |
f8ebd1e802d7
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 16701bfbffd605805e847897799251ab748f559f
iuc
parents:
0
diff
changeset
|
412 worksheet.write(row, len(samples_list) + 2, ",".join(otu_collection[otu]['contigs_list'])) |
f8ebd1e802d7
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 16701bfbffd605805e847897799251ab748f559f
iuc
parents:
0
diff
changeset
|
413 row += 1 |
f8ebd1e802d7
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 16701bfbffd605805e847897799251ab748f559f
iuc
parents:
0
diff
changeset
|
414 f_nucc.write(">" + cdd_id + "_" + otu + "_" + otu_collection[otu]['contigs_list'][0] + "\n") |
f8ebd1e802d7
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 16701bfbffd605805e847897799251ab748f559f
iuc
parents:
0
diff
changeset
|
415 f_nucc.write(str(hits_collection[cdd_id][otu_collection[otu]['contigs_list'][0]]['nuccleotide']) + "\n") |
0
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
416 workbook.close() |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
417 read_file = pd.ExcelFile(file_xlsx) |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
418 for sheet in read_file.sheet_names: |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
419 cluster_nb_reads_file = options.output + "/" + sheet.replace(" ", "_") + "/cluster_nb_reads_files.tab" |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
420 data_xls = pd.read_excel(file_xlsx, sheet, dtype=str, index_col=None) |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
421 data_xls.to_csv(cluster_nb_reads_file, encoding='utf-8', index=False, sep='\t') |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
422 |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
423 |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
424 def _create_html(options, hits_collection): |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
425 """ |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
426 Create HTML file with all results |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
427 """ |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
428 # create mapping file with all informations to use to create HTML report |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
429 map_file_path = options.output + "/map.txt" |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
430 if os.path.exists(map_file_path): |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
431 os.remove(map_file_path) |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
432 |
3
f8ebd1e802d7
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 16701bfbffd605805e847897799251ab748f559f
iuc
parents:
0
diff
changeset
|
433 with open(map_file_path, "w+") as map_file: |
f8ebd1e802d7
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 16701bfbffd605805e847897799251ab748f559f
iuc
parents:
0
diff
changeset
|
434 headers = ['#cdd_id', 'align_files', 'tree_files', 'cluster_files', 'cluster_nb_reads_files', 'pairwise_files', 'description', 'full_description\n'] |
f8ebd1e802d7
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 16701bfbffd605805e847897799251ab748f559f
iuc
parents:
0
diff
changeset
|
435 map_file.write("\t".join(headers)) |
f8ebd1e802d7
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 16701bfbffd605805e847897799251ab748f559f
iuc
parents:
0
diff
changeset
|
436 for cdd_id in hits_collection: |
4
bb29ae8708b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
437 if cdd_id == "RdRp_merge": |
bb29ae8708b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
438 cdd_output = "RdRp_merge" |
bb29ae8708b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
439 else: |
bb29ae8708b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
440 cdd_output = hits_collection[cdd_id]["short_description"].replace(" ", "_") |
3
f8ebd1e802d7
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 16701bfbffd605805e847897799251ab748f559f
iuc
parents:
0
diff
changeset
|
441 short_description = cdd_output |
f8ebd1e802d7
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 16701bfbffd605805e847897799251ab748f559f
iuc
parents:
0
diff
changeset
|
442 file_seq_aligned = cdd_output + '/seq_aligned.final_tree.fa' |
f8ebd1e802d7
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 16701bfbffd605805e847897799251ab748f559f
iuc
parents:
0
diff
changeset
|
443 tree_file = cdd_output + '/tree.dnd.png' |
f8ebd1e802d7
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 16701bfbffd605805e847897799251ab748f559f
iuc
parents:
0
diff
changeset
|
444 file_cluster = cdd_output + '/otu_cluster.csv' |
f8ebd1e802d7
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 16701bfbffd605805e847897799251ab748f559f
iuc
parents:
0
diff
changeset
|
445 file_matrix = cdd_output + "/identity_matrix.csv" |
f8ebd1e802d7
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 16701bfbffd605805e847897799251ab748f559f
iuc
parents:
0
diff
changeset
|
446 cluster_nb_reads_files = cdd_output + "/cluster_nb_reads_files.tab" |
f8ebd1e802d7
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 16701bfbffd605805e847897799251ab748f559f
iuc
parents:
0
diff
changeset
|
447 map_file.write(cdd_id + "\t" + file_seq_aligned + "\t" + tree_file + "\t") |
f8ebd1e802d7
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 16701bfbffd605805e847897799251ab748f559f
iuc
parents:
0
diff
changeset
|
448 map_file.write(file_cluster + "\t" + cluster_nb_reads_files + "\t" + file_matrix + "\t") |
f8ebd1e802d7
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 16701bfbffd605805e847897799251ab748f559f
iuc
parents:
0
diff
changeset
|
449 map_file.write(short_description + "\t" + hits_collection[cdd_id]["full_description"] + "\n") |
0
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
450 log.info("Writing HTML report") |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
451 html_cmd = os.path.join(options.tool_path, 'rps2tree_html.py') + ' -m ' + map_file_path + ' -o ' + options.output |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
452 log.debug(html_cmd) |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
453 os.system(html_cmd) |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
454 |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
455 |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
456 def _set_options(): |
4
bb29ae8708b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
457 """ |
bb29ae8708b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
458 Set parameters |
bb29ae8708b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
459 """ |
0
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
460 parser = argparse.ArgumentParser() |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
461 parser.add_argument('-b', '--blast', help='TAB blast file from blast2ecsv module.', action='append', required=False, dest='blast', nargs='+') |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
462 parser.add_argument('-r', '--rps', help='TAB rpsblast file from rps2ecsv module.', action='append', required=True, dest='rps', nargs='+') |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
463 parser.add_argument('-f', '--fasta', help='FASTA file with contigs', action='append', required=True, dest='fasta', nargs='+') |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
464 parser.add_argument('-p', '--percentage', help='Percentage similarity threshold for OTUs cutoff.', action='store', type=int, default=90, dest='perc') |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
465 parser.add_argument('-vp', '--viral_portion', help='Minimun portion of viral sequences in RPS domain to be included.', action='store', type=float, default=0.3, dest='viral_portion') |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
466 parser.add_argument('-mpl', '--min_protein_length', help='Minimum query protein length.', action='store', type=int, default=100, dest='min_protein_length') |
4
bb29ae8708b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
467 parser.add_argument('-m', '--merge_rdrp', help='Merge RdRp1, 2, 3 and 4 to create otu on it.', action='store', type=str, default="no", dest='merge_rdrp') |
0
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
468 parser.add_argument('-tp', '--tool_path', help='Path to otu_seek.R', action='store', type=str, default='./', dest='tool_path') |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
469 parser.add_argument('-o', '--out', help='The output directory', action='store', type=str, default='./Rps2tree_OTU', dest='output') |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
470 parser.add_argument('-rgb', '--rgb-conf', help='Color palette for contigs coloration', action='store', type=str, default='rgb.txt', dest='file_rgb') |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
471 parser.add_argument('-v', '--verbosity', help='Verbose level', action='store', type=int, choices=[1, 2, 3, 4], default=1) |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
472 args = parser.parse_args() |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
473 return args |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
474 |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
475 |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
476 def _set_log_level(verbosity): |
4
bb29ae8708b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
477 """ |
bb29ae8708b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
478 Debbug |
bb29ae8708b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 7036ce0e06b6dc64332b1a5642fc58928523c5c6
iuc
parents:
3
diff
changeset
|
479 """ |
0
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
480 if verbosity == 1: |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
481 log_format = '%(asctime)s %(levelname)-8s %(message)s' |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
482 log.basicConfig(level=log.INFO, format=log_format) |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
483 elif verbosity == 3: |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
484 log_format = '%(filename)s:%(lineno)s - %(asctime)s %(levelname)-8s %(message)s' |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
485 log.basicConfig(level=log.DEBUG, format=log_format) |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
486 |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
487 |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
488 if __name__ == "__main__": |
e889010415a1
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 3a3b40c15ae5e82334f016e88b1f3c5bbbb3b2cd
iuc
parents:
diff
changeset
|
489 main() |