comparison extract_features.py @ 4:4de31938431b draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit 2896dcfd180800d00ea413a59264ef8b11788b8e
author iuc
date Fri, 20 Oct 2017 03:48:57 -0400
parents af307d3285c5
children 09855551d713
comparison
equal deleted inserted replaced
3:f5075dee9d6b 4:4de31938431b
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 2
3 import os 3 import argparse
4 import sys 4 import sys
5 import argparse
6 import textwrap 5 import textwrap
6
7 7
8 def main( args ): 8 def main( args ):
9 """ 9 """
10 Extract the protein and coding section from an augustus gff, gtf file 10 Extract the protein and coding section from an augustus gff, gtf file
11 Example file: 11 Example file:
43 if line.startswith('#'): 43 if line.startswith('#'):
44 line = line[2:].strip() 44 line = line[2:].strip()
45 if line.startswith('start gene'): 45 if line.startswith('start gene'):
46 gene_name = line[11:].strip() 46 gene_name = line[11:].strip()
47 47
48 if protein_seq:
49 if line.endswith(']'):
50 protein_seq += line[:-1]
51 po.write( '>%s\n%s\n' % (gene_name, '\n'.join( textwrap.wrap( protein_seq, 80 ) ) ) )
52 protein_seq = ''
53 else:
54 protein_seq += line
55
56 if coding_seq:
57 if line.endswith(']'):
58 coding_seq += line[:-1]
59 co.write( '>%s\n%s\n' % (gene_name, '\n'.join( textwrap.wrap( coding_seq, 80 ) ) ) )
60 coding_seq = ''
61 else:
62 coding_seq += line
63
48 if args.protein and line.startswith('protein sequence = ['): 64 if args.protein and line.startswith('protein sequence = ['):
49 if line.endswith(']'): 65 if line.endswith(']'):
50 protein_seq = line[20:-1] 66 protein_seq = line[20:-1]
51 po.write( '>%s\n%s\n' % (gene_name, '\n'.join( textwrap.wrap( protein_seq, 80 ) ) ) ) 67 po.write( '>%s\n%s\n' % (gene_name, '\n'.join( textwrap.wrap( protein_seq, 80 ) ) ) )
52 protein_seq = '' 68 protein_seq = ''
61 coding_seq = '' 77 coding_seq = ''
62 else: 78 else:
63 line = line[19:] 79 line = line[19:]
64 coding_seq = line 80 coding_seq = line
65 81
66 if protein_seq:
67 if line.endswith(']'):
68 protein_seq += line[:-1]
69 po.write( '>%s\n%s\n' % (gene_name, '\n'.join( textwrap.wrap( protein_seq, 80 ) ) ) )
70 protein_seq = ''
71 else:
72 protein_seq += line
73
74 if coding_seq:
75 if line.endswith(']'):
76 coding_seq += line[:-1]
77 co.write( '>%s\n%s\n' % (gene_name, '\n'.join( textwrap.wrap( coding_seq, 80 ) ) ) )
78 coding_seq = ''
79 else:
80 coding_seq += line
81 if args.codingseq: 82 if args.codingseq:
82 co.close() 83 co.close()
83 if args.protein: 84 if args.protein:
84 po.close() 85 po.close()
86
85 87
86 if __name__ == '__main__': 88 if __name__ == '__main__':
87 parser = argparse.ArgumentParser() 89 parser = argparse.ArgumentParser()
88 parser.add_argument('-p', '--protein', help='Path to the protein file.') 90 parser.add_argument('-p', '--protein', help='Path to the protein file.')
89 parser.add_argument('-c', '--codingseq', help='Path to the coding file.') 91 parser.add_argument('-c', '--codingseq', help='Path to the coding file.')
90 92
91 args = parser.parse_args() 93 args = parser.parse_args()
92 main( args ) 94 main( args )
93