Mercurial > repos > bebatut > convert_extract_sequence_file
annotate convert_extract_sequence_file.py @ 1:158642ce204f draft default tip
planemo upload commit 0a1bbb0870f6b776175151d4bc818c5546731ca8-dirty
author | bebatut |
---|---|
date | Thu, 28 Apr 2016 08:36:12 -0400 |
parents | 01c2b74b3a21 |
children |
rev | line source |
---|---|
0
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
1 #!/usr/bin/python |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
2 # -*- coding: utf-8 -*- |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
3 |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
4 import sys |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
5 import os |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
6 import argparse |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
7 import copy |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
8 import operator |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
9 |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
10 FASTA_FILE_LAST_POS = None |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
11 |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
12 ################# |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
13 # Parse methods # |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
14 ################# |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
15 def text_end_of_file(row): |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
16 if row == '': |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
17 return True |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
18 else: |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
19 return False |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
20 |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
21 def get_new_line(input_file, generate_error = True): |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
22 row = input_file.readline() |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
23 if text_end_of_file(row): |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
24 if generate_error : |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
25 string = os.path.basename(__file__) + ': ' |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
26 string += ' unexpected end of file' |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
27 raise ValueError(string) |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
28 else : |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
29 return None |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
30 else: |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
31 return row[:-1] |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
32 |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
33 def next_fasta_record(input_file): |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
34 global FASTA_FILE_LAST_POS |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
35 if FASTA_FILE_LAST_POS != None: |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
36 input_file.seek(FASTA_FILE_LAST_POS) |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
37 else: |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
38 FASTA_FILE_LAST_POS = input_file.tell() |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
39 |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
40 id_line = get_new_line(input_file, generate_error = False) |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
41 if id_line == None: |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
42 return None |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
43 split_line = id_line[1:].split(' ') |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
44 seq_id = split_line[0] |
1
158642ce204f
planemo upload commit 0a1bbb0870f6b776175151d4bc818c5546731ca8-dirty
bebatut
parents:
0
diff
changeset
|
45 description = id_line[1:] |
0
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
46 new_line = get_new_line(input_file, generate_error = False) |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
47 seq = '' |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
48 while new_line != None: |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
49 if new_line[0] != '>': |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
50 seq += new_line |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
51 FASTA_FILE_LAST_POS = input_file.tell() |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
52 new_line = get_new_line(input_file, generate_error = False) |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
53 else: |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
54 new_line = None |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
55 return SeqRecord(seq_id, seq, description) |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
56 |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
57 def next_fastq_record(input_file): |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
58 id_line = get_new_line(input_file, generate_error = False) |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
59 if id_line == None: |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
60 return None |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
61 if id_line[0] != '@': |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
62 string = os.path.basename(__file__) + ': ' |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
63 string += ' issue in fastq file' |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
64 raise ValueError(string) |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
65 split_line = id_line[1:].split(' ') |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
66 seq_id = split_line[0] |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
67 description = ' '.join(split_line[1:]) |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
68 seq = get_new_line(input_file) |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
69 spacer = get_new_line(input_file) |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
70 quals = get_new_line(input_file) |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
71 return SeqRecord(seq_id, seq, description, quals) |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
72 |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
73 def next_record(input_file, file_format): |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
74 if file_format == 'fasta': |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
75 return next_fasta_record(input_file) |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
76 elif file_format == 'fastq': |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
77 return next_fastq_record(input_file) |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
78 else: |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
79 string = os.path.basename(__file__) + ': ' |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
80 string += file_format + ' is not managed' |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
81 raise ValueError(string) |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
82 |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
83 def write_fasta_record(record, output_sequence_file): |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
84 output_sequence_file.write('>' + record.get_id() + ' ' + |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
85 record.get_description() + '\n') |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
86 seq = record.get_sequence() |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
87 split_seq = [seq[i:i+60] for i in xrange(0,len(seq),60)] |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
88 for split in split_seq: |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
89 output_sequence_file.write(split + '\n') |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
90 |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
91 def format_qual_value(qual_score, sliding_value, authorized_range, qual_format): |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
92 ascii_value = ord(qual_score) |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
93 score = ascii_value-sliding_value |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
94 if score < authorized_range[0] or score > authorized_range[1]: |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
95 string = os.path.basename(__file__) + ': wrong score (' |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
96 string += str(score) + ') with quality format (' |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
97 string += qual_format |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
98 raise ValueError(string) |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
99 return score |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
100 |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
101 def format_qual_string(qual_string, qual_format): |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
102 if qual_format == 'sanger': |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
103 return format_qual_value(qual_string, 33 ,[0,40], qual_format) |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
104 elif qual_format == "solexa": |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
105 return format_qual_value(qual_string, 64 ,[-5,40], qual_format) |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
106 elif qual_format == "illumina_1_3": |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
107 return format_qual_value(qual_string, 33 ,[0,40], qual_format) |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
108 elif qual_format == "illumina_1_5": |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
109 return format_qual_value(qual_string, 33 ,[3,40], qual_format) |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
110 elif qual_format == "illumina_1_8": |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
111 return format_qual_value(qual_string, 33 ,[0,41], qual_format) |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
112 else: |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
113 string = os.path.basename(__file__) + ': quality format (' |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
114 string += qual_format + ') is not managed' |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
115 raise ValueError(string) |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
116 |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
117 def write_qual_record(record, output_qual_file, qual_format): |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
118 output_qual_file.write('>' + record.get_id() + ' ' + |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
119 record.get_description() + '\n') |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
120 qual = record.get_quality() |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
121 qual = [str(format_qual_string(qual_str,qual_format)) for qual_str in qual] |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
122 split_seq = [qual[i:i+60] for i in xrange(0,len(qual),60)] |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
123 for split in split_seq: |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
124 output_qual_file.write(' '.join(split) + '\n') |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
125 |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
126 def write_fastq_record(record, output_sequence_file): |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
127 output_sequence_file.write('@' + record.get_id() + ' ' + |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
128 record.get_description() + '\n') |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
129 output_sequence_file.write(record.get_sequence() + '\n') |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
130 output_sequence_file.write('+\n') |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
131 output_sequence_file.write(record.get_quality() + '\n') |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
132 |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
133 def write_information(record, output_file_formats, output_sequence_file, |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
134 output_qual_file, qual_format): |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
135 if "fasta" in output_file_formats: |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
136 write_fasta_record(record, output_sequence_file) |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
137 if "qual" in output_file_formats: |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
138 write_qual_record(record, output_qual_file, qual_format) |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
139 if "fastq" in output_file_formats: |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
140 write_fastq_record(record, output_sequence_file) |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
141 |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
142 def fast_test_element_in_list(element,list_to_test): |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
143 to_continue = True |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
144 i = 0 |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
145 while to_continue: |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
146 if i == len(list_to_test) or list_to_test[i] >= element: |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
147 to_continue = False |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
148 else: |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
149 i += 1 |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
150 |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
151 found = False |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
152 if i < len(list_to_test): |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
153 if list_to_test[i] == element: |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
154 found = True |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
155 |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
156 return found |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
157 |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
158 ######################### |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
159 # Constraint definition # |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
160 ######################### |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
161 constraints = { |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
162 'equal': operator.eq, |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
163 'different': operator.ne, |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
164 'lower': operator.le, |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
165 'strictly_lower': operator.lt, |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
166 'greater': operator.ge, |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
167 'strictly_greater': operator.gt, |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
168 'in': operator.contains, |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
169 'not_in': 'in' |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
170 } |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
171 |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
172 extractable_information = { |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
173 'id': str, |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
174 'length': int, |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
175 'description': str |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
176 } |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
177 |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
178 ########### |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
179 # Classes # |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
180 ########### |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
181 class SeqRecord: |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
182 |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
183 def __init__(self, seq_id, sequence, description, quality = ""): |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
184 self.id = seq_id |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
185 self.sequence = sequence |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
186 self.quality = quality |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
187 self.description = description |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
188 self.length = len(self.sequence) |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
189 |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
190 # Getters |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
191 def get_id(self): |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
192 return self.id |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
193 |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
194 def get_sequence(self): |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
195 return self.sequence |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
196 |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
197 def get_quality(self): |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
198 return self.quality |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
199 |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
200 def get_length(self): |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
201 return self.length |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
202 |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
203 def get_description(self): |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
204 return self.description |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
205 |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
206 def get(self, category): |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
207 if category == 'id': |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
208 return self.get_id() |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
209 elif category == 'length': |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
210 return self.get_length() |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
211 elif category == 'description': |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
212 return self.get_description() |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
213 else: |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
214 string = os.path.basename(__file__) + ': ' |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
215 string += category + ' can not be extracted from SeqRecord' |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
216 raise ValueError(string) |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
217 |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
218 # Other functions |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
219 def extract_information(self,to_extract): |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
220 extracted_info = [] |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
221 for info_to_extract in to_extract: |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
222 extracted_info.append(self.get(info_to_extract)) |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
223 return extracted_info |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
224 |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
225 def test_conservation(self, constraints): |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
226 to_conserve = True |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
227 for constrained_info in constraints: |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
228 record_value = self.get(constrained_info) |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
229 for constraint in constraints[constrained_info]: |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
230 to_conserve &= constraint.test_constraint(record_value) |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
231 return to_conserve |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
232 |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
233 class Records: |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
234 |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
235 def __init__(self, input_filepath, file_format, constraints): |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
236 self.records = [] |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
237 self.conserved_records = [] |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
238 with open(input_filepath, 'r') as input_file: |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
239 to_continue = True |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
240 while to_continue: |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
241 record = next_record(input_file, file_format) |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
242 if record != None: |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
243 self.records.append(record) |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
244 to_conserve = record.test_conservation(constraints) |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
245 if to_conserve: |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
246 self.conserved_records.append(copy.copy(record)) |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
247 else: |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
248 to_continue = False |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
249 |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
250 # Getters |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
251 def get_records(self): |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
252 return copy.copy(self.records) |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
253 |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
254 def get_record_nb(self): |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
255 return len(self.records) |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
256 |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
257 def get_conserved_records(self): |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
258 return copy.copy(self.conserved_records) |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
259 |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
260 def get_conserved_record_nb(self): |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
261 return len(self.conserved_records) |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
262 |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
263 # Other functions |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
264 def save_conserved_records(self,args): |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
265 if args.custom_extraction_type == 'True': |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
266 to_extract = args.to_extract[1:-1].split(',') |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
267 with open(args.output_information, 'w') as output_information_file: |
1
158642ce204f
planemo upload commit 0a1bbb0870f6b776175151d4bc818c5546731ca8-dirty
bebatut
parents:
0
diff
changeset
|
268 #output_information_file.write('\t'.join(to_extract) + '\n') |
0
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
269 for record in self.conserved_records: |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
270 extracted_info = record.extract_information(to_extract) |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
271 string_info = [str(info) for info in extracted_info] |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
272 string = '\t'.join(string_info) |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
273 output_information_file.write(string + '\n') |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
274 else: |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
275 qual_format = None |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
276 if args.format == 'fasta': |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
277 output_file_formats = ['fasta'] |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
278 elif args.format == 'fastq': |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
279 if args.split == 'True': |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
280 output_file_formats = ['fasta','qual'] |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
281 qual_format = args.quality_format |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
282 else: |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
283 output_file_formats = ['fastq'] |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
284 |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
285 with open(args.output_sequence,'w') as output_sequence_file: |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
286 if "qual" in output_file_formats: |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
287 output_qual_file = open(args.output_quality, 'w') |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
288 else: |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
289 output_qual_file = None |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
290 for record in self.conserved_records: |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
291 write_information(record, output_file_formats, |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
292 output_sequence_file, output_qual_file, qual_format) |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
293 if "qual" in output_file_formats: |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
294 output_qual_file.close() |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
295 |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
296 class Constraint: |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
297 |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
298 def __init__(self, constraint_type, value, constrained_information): |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
299 if not constraints.has_key(constraint_type): |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
300 string = os.path.basename(__file__) + ': ' |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
301 string += constraint_type + ' is not a correct type of constraint' |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
302 raise ValueError(string) |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
303 self.raw_constraint_type = constraint_type |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
304 self.type = constraints[constraint_type] |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
305 |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
306 value_format = extractable_information[constrained_information] |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
307 if self.raw_constraint_type in ['in', 'not_in']: |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
308 self.values = [] |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
309 with open(value, 'r') as value_file: |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
310 for row in value_file.readlines(): |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
311 value = row[:-1] |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
312 self.values.append(value_format(value)) |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
313 else: |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
314 self.values = [value_format(value)] |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
315 self.values.sort() |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
316 |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
317 def get_raw_constraint_type(self): |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
318 return self.raw_constraint_type |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
319 |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
320 def get_type(self): |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
321 return self.type |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
322 |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
323 def get_values(self): |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
324 return self.values |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
325 |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
326 def test_constraint(self, similarity_info_value): |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
327 to_conserve = True |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
328 if self.raw_constraint_type == 'in': |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
329 to_conserve &= fast_test_element_in_list(similarity_info_value, |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
330 self.values) |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
331 elif self.raw_constraint_type == 'not_in': |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
332 to_conserve &= (not fast_test_element_in_list(similarity_info_value, |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
333 self.values)) |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
334 else: |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
335 to_conserve &= self.type(similarity_info_value, self.values[0]) |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
336 return to_conserve |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
337 |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
338 ################ |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
339 # Misc methods # |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
340 ################ |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
341 def test_input_filepath(input_filepath, tool, file_format): |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
342 if not os.path.exists(input_filepath): |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
343 string = os.path.basename(__file__) + ': ' |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
344 string += input_filepath + ' does not exist' |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
345 raise ValueError(string) |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
346 |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
347 def format_constraints(constraints): |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
348 formatted_constraints = {} |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
349 if constraints != None: |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
350 for constr in constraints: |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
351 split_constraint = constr.split(': ') |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
352 constrained_information = split_constraint[0] |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
353 constraint = Constraint(split_constraint[1], split_constraint[2], |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
354 constrained_information) |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
355 formatted_constraints.setdefault(constrained_information,[]).append( |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
356 constraint) |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
357 return formatted_constraints |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
358 |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
359 def convert_extract_sequence_file(args): |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
360 input_filepath = args.input |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
361 file_format = args.format |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
362 constraints = args.constraint |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
363 formatted_constraints = format_constraints(constraints) |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
364 |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
365 records = Records(input_filepath, file_format, formatted_constraints) |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
366 records.save_conserved_records(args) |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
367 |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
368 report_filepath = args.report |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
369 with open(report_filepath, 'w') as report_file: |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
370 |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
371 report_file.write('Information to extract:\n') |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
372 if args.custom_extraction_type == 'True': |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
373 for info in args.to_extract[1:-1].split(','): |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
374 report_file.write('\t' + info + '\n') |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
375 else: |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
376 report_file.write('\tsequences\n') |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
377 |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
378 if constraints != None: |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
379 report_file.write('Constraints on extraction:\n') |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
380 for constrained_info in formatted_constraints: |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
381 report_file.write('\tInfo to constraint: ' + constrained_info |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
382 + '\n') |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
383 for constraint in formatted_constraints[constrained_info]: |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
384 report_file.write('\t\tType of constraint: ' + |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
385 constraint.get_raw_constraint_type() |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
386 + '\n') |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
387 report_file.write('\t\tValues:\n') |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
388 values = constraint.get_values() |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
389 for value in values: |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
390 report_file.write('\t\t\t' + str(value) + '\n') |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
391 report_file.write('Number of similarity records: ' + |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
392 str(records.get_record_nb()) + '\n') |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
393 report_file.write('Number of extracted similarity records: ' + |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
394 str(records.get_conserved_record_nb()) + '\n') |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
395 |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
396 ######## |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
397 # Main # |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
398 ######## |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
399 if __name__ == "__main__": |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
400 parser = argparse.ArgumentParser() |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
401 parser.add_argument('--input', required=True) |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
402 parser.add_argument('--format', required=True) |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
403 parser.add_argument('--custom_extraction_type', required=True) |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
404 parser.add_argument('--to_extract') |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
405 parser.add_argument('--output_information') |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
406 parser.add_argument('--split') |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
407 parser.add_argument('--quality_format') |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
408 parser.add_argument('--output_sequence') |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
409 parser.add_argument('--output_quality') |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
410 parser.add_argument('--constraint', action='append') |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
411 parser.add_argument('--report', required=True) |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
412 args = parser.parse_args() |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
413 |
01c2b74b3a21
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
bebatut
parents:
diff
changeset
|
414 convert_extract_sequence_file(args) |