Mercurial > repos > artbio > sequence_format_converter
annotate sequence_format_converter.py @ 1:9ce7ccd468aa draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit dfecfb40f245a3cdb09dd1cfe37be4cb164ad2eb
author | artbio |
---|---|
date | Fri, 16 Feb 2018 04:55:44 -0500 |
parents | a8aacccd79a3 |
children | 772bd67ef26a |
rev | line source |
---|---|
0
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
1 #!/usr/bin/env python |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
2 # |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
3 import argparse |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
4 import logging |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
5 import sys |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
6 from collections import defaultdict |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
7 |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
8 |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
9 def Parser(): |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
10 the_parser = argparse.ArgumentParser() |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
11 the_parser.add_argument( |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
12 '--input', action="store", type=str, |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
13 help="input file, accepted format: fastq, fasta, fasta_weigthed, \ |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
14 tabular") |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
15 the_parser.add_argument( |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
16 '--output', action="store", type=str, help="output converted file") |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
17 the_parser.add_argument( |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
18 '--format', action="store", type=str, |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
19 help="select output format (fasta, fasta_weigthed, tabular") |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
20 args = the_parser.parse_args() |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
21 return args |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
22 |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
23 |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
24 class Sequencing: |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
25 |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
26 def __init__(self, input, output, format): |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
27 self.input = input |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
28 self.output = open(output, 'w') |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
29 self.outputformat = format |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
30 self.inputformat = self.detectformat(self.input) |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
31 self.seqdic = defaultdict(int) |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
32 self.read(self.input, self.inputformat) |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
33 self.write(self.output, self.outputformat) |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
34 |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
35 def detectformat(self, input): |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
36 input = open(input, 'r') |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
37 block = [] |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
38 reference = ['A', 'T', 'G', 'C', 'N'] |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
39 format = '' |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
40 try: |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
41 for l in range(4): |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
42 block.append(input.readline()[:-1]) |
1
9ce7ccd468aa
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit dfecfb40f245a3cdb09dd1cfe37be4cb164ad2eb
artbio
parents:
0
diff
changeset
|
43 except IndexError: |
0
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
44 logging.info("File hasn't at leat four lines !") |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
45 sys.exit("File hasn't at leat four lines !") |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
46 input.close() |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
47 line1, line2, line3, line4 = block[0], block[1], block[2], block[3] |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
48 if line1[0] == '>' and line3[0] == '>': |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
49 logging.info("'>' detected in lines 1 and 3") |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
50 sequence = ''.join([line2, line4]).upper() |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
51 nucleotides = set([base for base in sequence]) |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
52 for nucleotide in nucleotides: |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
53 if nucleotide not in reference: |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
54 logging.info("But other nucleotides that A, T, G, C or N") |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
55 sys.exit('input appears to be Fasta but with \ |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
56 unexpected nucleotides') |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
57 format = 'fasta' |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
58 elif line1[0] == '>' and line4[0] == '>': |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
59 logging.info("'>' detected in lines 1 and 4") |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
60 sequence = ''.join([line2, line3]).upper() |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
61 nucleotides = set([base for base in sequence]) |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
62 for nucleotide in nucleotides: |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
63 if nucleotide not in reference: |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
64 logging.info("But other nucleotides that A, T, G, C or N") |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
65 sys.exit('input appears to be Fasta but with \ |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
66 unexpected nucleotides') |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
67 format = 'fasta' |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
68 elif line1[0] == '>': |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
69 logging.info("'>' detected in lines 1") |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
70 sequence = ''.join([line2, line3, line4]).upper() |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
71 nucleotides = set([base for base in sequence]) |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
72 for nucleotide in nucleotides: |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
73 if nucleotide not in reference: |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
74 logging.info("But other nucleotides that A, T, G, C or N") |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
75 sys.exit('input appears to be Fasta but with \ |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
76 unexpected nucleotides') |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
77 format = 'fasta' |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
78 if format == 'fasta': |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
79 try: |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
80 for line in block: |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
81 if line[0] == '>': |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
82 int(line.split('_')[-1]) |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
83 return 'fastaw' |
1
9ce7ccd468aa
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit dfecfb40f245a3cdb09dd1cfe37be4cb164ad2eb
artbio
parents:
0
diff
changeset
|
84 except ValueError: |
0
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
85 return 'fasta' |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
86 if line1[0] == '@' and line3[0] == '+': |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
87 nucleotides = set([base for base in line2]) |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
88 for nucleotide in nucleotides: |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
89 if nucleotide not in reference: |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
90 logging.info("Looks like fastq input but other nucleotides \ |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
91 that A, T, G, C or N") |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
92 sys.exit("input appears to be Fastq \ |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
93 but with unexpected nucleotides") |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
94 return 'fastq' |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
95 for line in block: |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
96 if len(line.split('\t')) != 2: |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
97 logging.info("No valid format detected") |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
98 sys.exit('No valid format detected') |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
99 try: |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
100 int(line.split('\t')[-1]) |
1
9ce7ccd468aa
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit dfecfb40f245a3cdb09dd1cfe37be4cb164ad2eb
artbio
parents:
0
diff
changeset
|
101 except ValueError: |
0
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
102 logging.info("No valid format detected") |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
103 sys.exit('No valid format detected') |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
104 for nucleotide in line.split('\t')[0]: |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
105 if nucleotide not in reference: |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
106 logging.info("No valid format detected") |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
107 sys.exit('No valid format detected') |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
108 return 'tabular' |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
109 |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
110 def read(self, input, format): |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
111 input = open(input, 'r') |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
112 if format == 'fasta': |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
113 try: |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
114 self.readfasta(input) |
1
9ce7ccd468aa
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit dfecfb40f245a3cdb09dd1cfe37be4cb164ad2eb
artbio
parents:
0
diff
changeset
|
115 except Exception: |
0
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
116 logging.info("an error occured while reading fasta") |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
117 elif format == 'fastaw': |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
118 try: |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
119 self.readfastaw(input) |
1
9ce7ccd468aa
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit dfecfb40f245a3cdb09dd1cfe37be4cb164ad2eb
artbio
parents:
0
diff
changeset
|
120 except Exception: |
0
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
121 logging.info("an error occured while reading fastaw") |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
122 elif format == 'tabular': |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
123 try: |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
124 self.readtabular(input) |
1
9ce7ccd468aa
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit dfecfb40f245a3cdb09dd1cfe37be4cb164ad2eb
artbio
parents:
0
diff
changeset
|
125 except Exception: |
0
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
126 logging.info("an error occured while reading tabular") |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
127 elif format == 'fastq': |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
128 try: |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
129 self.readfastq(input) |
1
9ce7ccd468aa
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit dfecfb40f245a3cdb09dd1cfe37be4cb164ad2eb
artbio
parents:
0
diff
changeset
|
130 except Exception: |
0
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
131 logging.info("an error occured while reading fastq") |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
132 else: |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
133 logging.info("no valid format detected") |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
134 sys.exit('No valid format detected') |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
135 |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
136 def readfastaw(self, input): |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
137 for line in input: |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
138 if line[0] == ">": |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
139 weigth = int(line[:-1].split("_")[-1]) |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
140 else: |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
141 self.seqdic[line[:-1]] += weigth |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
142 input.close() |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
143 |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
144 def readfasta(self, input): |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
145 ''' this method is able to read multi-line fasta sequence''' |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
146 for line in input: |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
147 if line[0] == ">": |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
148 try: |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
149 # to dump the sequence of the previous item |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
150 # try because of first missing stringlist variable |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
151 self.seqdic["".join(stringlist)] += 1 |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
152 except NameError: |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
153 pass |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
154 stringlist = [] |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
155 else: |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
156 try: |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
157 stringlist.append(line[:-1]) |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
158 except UnboundLocalError: |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
159 # if file went through filter and contains only empty lines |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
160 logging.info("first line is empty.") |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
161 try: |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
162 self.seqdic["".join(stringlist)] += 1 # for the last sequence |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
163 except NameError: |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
164 logging.info("input file has not fasta sequences.") |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
165 input.close() |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
166 |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
167 def readtabular(self, input): |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
168 for line in input: |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
169 fields = line[:-1].split('\t') |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
170 self.seqdic[fields[0]] += int(fields[1]) |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
171 input.close() |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
172 |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
173 def readfastq(self, input): |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
174 linecount = 0 |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
175 for line in input: |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
176 linecount += 1 |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
177 if linecount % 4 == 2: |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
178 self.seqdic[line[:-1]] += 1 |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
179 input.close() |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
180 |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
181 def write(self, output, format='fasta'): |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
182 if format == 'fasta': |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
183 headercount = 0 |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
184 for seq in sorted(self.seqdic, key=self.seqdic.get, reverse=True): |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
185 for i in range(self.seqdic[seq]): |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
186 headercount += 1 |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
187 output.write('>%s\n%s\n' % (headercount, seq)) |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
188 elif format == 'fastaw': |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
189 headercount = 0 |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
190 for seq in sorted(self.seqdic, key=self.seqdic.get, reverse=True): |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
191 headercount += 1 |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
192 output.write('>%s_%s\n%s\n' % (headercount, |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
193 self.seqdic[seq], seq)) |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
194 elif format == 'tabular': |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
195 for seq in sorted(self.seqdic, key=self.seqdic.get, reverse=True): |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
196 output.write('%s\t%s\n' % (seq, self.seqdic[seq])) |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
197 output.close() |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
198 |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
199 |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
200 def main(input, output, format): |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
201 Sequencing(input, output, format) |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
202 |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
203 |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
204 if __name__ == "__main__": |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
205 args = Parser() |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
206 log = logging.getLogger(__name__) |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
207 logging.basicConfig(stream=sys.stdout, level=logging.INFO) |
a8aacccd79a3
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit d6ef80f9db43eae4f58b33f58b5ef6f8209907db
artbio
parents:
diff
changeset
|
208 main(args.input, args.output, args.format) |