comparison sequence_format_converter.py @ 1:9ce7ccd468aa draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sequence_format_converter commit dfecfb40f245a3cdb09dd1cfe37be4cb164ad2eb
author artbio
date Fri, 16 Feb 2018 04:55:44 -0500
parents a8aacccd79a3
children 772bd67ef26a
comparison
equal deleted inserted replaced
0:a8aacccd79a3 1:9ce7ccd468aa
38 reference = ['A', 'T', 'G', 'C', 'N'] 38 reference = ['A', 'T', 'G', 'C', 'N']
39 format = '' 39 format = ''
40 try: 40 try:
41 for l in range(4): 41 for l in range(4):
42 block.append(input.readline()[:-1]) 42 block.append(input.readline()[:-1])
43 except: 43 except IndexError:
44 logging.info("File hasn't at leat four lines !") 44 logging.info("File hasn't at leat four lines !")
45 sys.exit("File hasn't at leat four lines !") 45 sys.exit("File hasn't at leat four lines !")
46 input.close() 46 input.close()
47 line1, line2, line3, line4 = block[0], block[1], block[2], block[3] 47 line1, line2, line3, line4 = block[0], block[1], block[2], block[3]
48 if line1[0] == '>' and line3[0] == '>': 48 if line1[0] == '>' and line3[0] == '>':
79 try: 79 try:
80 for line in block: 80 for line in block:
81 if line[0] == '>': 81 if line[0] == '>':
82 int(line.split('_')[-1]) 82 int(line.split('_')[-1])
83 return 'fastaw' 83 return 'fastaw'
84 except: 84 except ValueError:
85 return 'fasta' 85 return 'fasta'
86 if line1[0] == '@' and line3[0] == '+': 86 if line1[0] == '@' and line3[0] == '+':
87 nucleotides = set([base for base in line2]) 87 nucleotides = set([base for base in line2])
88 for nucleotide in nucleotides: 88 for nucleotide in nucleotides:
89 if nucleotide not in reference: 89 if nucleotide not in reference:
96 if len(line.split('\t')) != 2: 96 if len(line.split('\t')) != 2:
97 logging.info("No valid format detected") 97 logging.info("No valid format detected")
98 sys.exit('No valid format detected') 98 sys.exit('No valid format detected')
99 try: 99 try:
100 int(line.split('\t')[-1]) 100 int(line.split('\t')[-1])
101 except: 101 except ValueError:
102 logging.info("No valid format detected") 102 logging.info("No valid format detected")
103 sys.exit('No valid format detected') 103 sys.exit('No valid format detected')
104 for nucleotide in line.split('\t')[0]: 104 for nucleotide in line.split('\t')[0]:
105 if nucleotide not in reference: 105 if nucleotide not in reference:
106 logging.info("No valid format detected") 106 logging.info("No valid format detected")
110 def read(self, input, format): 110 def read(self, input, format):
111 input = open(input, 'r') 111 input = open(input, 'r')
112 if format == 'fasta': 112 if format == 'fasta':
113 try: 113 try:
114 self.readfasta(input) 114 self.readfasta(input)
115 except: 115 except Exception:
116 logging.info("an error occured while reading fasta") 116 logging.info("an error occured while reading fasta")
117 elif format == 'fastaw': 117 elif format == 'fastaw':
118 try: 118 try:
119 self.readfastaw(input) 119 self.readfastaw(input)
120 except: 120 except Exception:
121 logging.info("an error occured while reading fastaw") 121 logging.info("an error occured while reading fastaw")
122 elif format == 'tabular': 122 elif format == 'tabular':
123 try: 123 try:
124 self.readtabular(input) 124 self.readtabular(input)
125 except: 125 except Exception:
126 logging.info("an error occured while reading tabular") 126 logging.info("an error occured while reading tabular")
127 elif format == 'fastq': 127 elif format == 'fastq':
128 try: 128 try:
129 self.readfastq(input) 129 self.readfastq(input)
130 except: 130 except Exception:
131 logging.info("an error occured while reading fastq") 131 logging.info("an error occured while reading fastq")
132 else: 132 else:
133 logging.info("no valid format detected") 133 logging.info("no valid format detected")
134 sys.exit('No valid format detected') 134 sys.exit('No valid format detected')
135 135