annotate extract_features.py @ 7:09855551d713 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
author iuc
date Thu, 15 Jul 2021 17:16:12 +0000
parents 4de31938431b
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
af307d3285c5 Uploaded
bgruening
parents:
diff changeset
1 #!/usr/bin/env python
af307d3285c5 Uploaded
bgruening
parents:
diff changeset
2
4
4de31938431b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit 2896dcfd180800d00ea413a59264ef8b11788b8e
iuc
parents: 0
diff changeset
3 import argparse
0
af307d3285c5 Uploaded
bgruening
parents:
diff changeset
4 import sys
af307d3285c5 Uploaded
bgruening
parents:
diff changeset
5 import textwrap
af307d3285c5 Uploaded
bgruening
parents:
diff changeset
6
4
4de31938431b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit 2896dcfd180800d00ea413a59264ef8b11788b8e
iuc
parents: 0
diff changeset
7
7
09855551d713 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents: 4
diff changeset
8 def main(args):
0
af307d3285c5 Uploaded
bgruening
parents:
diff changeset
9 """
7
09855551d713 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents: 4
diff changeset
10 Extract the protein and coding section from an augustus gff, gtf file
09855551d713 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents: 4
diff changeset
11 Example file:
09855551d713 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents: 4
diff changeset
12 HS04636 AUGUSTUS stop_codon 6901 6903 . + 0 Parent=g1.t1
09855551d713 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents: 4
diff changeset
13 HS04636 AUGUSTUS transcription_end_site 8857 8857 . + . Parent=g1.t1
09855551d713 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents: 4
diff changeset
14 # protein sequence = [MLARALLLCAVLALSHTANPCCSHPCQNRGVCMSVGFDQYKCDCTRTGFYGENCSTPEFLTRIKLFLKPTPNTVHYIL
09855551d713 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents: 4
diff changeset
15 # THFKGFWNVVNNIPFLRNAIMSYVLTSRSHLIDSPPTYNADYGYKSWEAFSNLSYYTRALPPVPDDCPTPLGVKGKKQLPDSNEIVEKLLLRRKFIPD
09855551d713 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents: 4
diff changeset
16 # PQGSNMMFAFFAQHFTHQFFKTDHKRGPAFTNGLGHGVDLNHIYGETLARQRKLRLFKDGKMKYQIIDGEMYPPTVKDTQAEMIYPPQVPEHLRFAVG
09855551d713 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents: 4
diff changeset
17 # QEVFGLVPGLMMYATIWLREHNRVCDVLKQEHPEWGDEQLFQTSRLILIGETIKIVIEDYVQHLSGYHFKLKFDPELLFNKQFQYQNRIAAEFNTLYH
09855551d713 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents: 4
diff changeset
18 # WHPLLPDTFQIHDQKYNYQQFIYNNSILLEHGITQFVESFTRQIAGRVAGGRNVPPAVQKVSQASIDQSRQMKYQSFNEYRKRFMLKPYESFEELTGE
09855551d713 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents: 4
diff changeset
19 # KEMSAELEALYGDIDAVELYPALLVEKPRPDAIFGETMVEVGAPFSLKGLMGNVICSPAYWKPSTFGGEVGFQIINTASIQSLICNNVKGCPFTSFSV
09855551d713 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents: 4
diff changeset
20 # PDPELIKTVTINASSSRSGLDDINPTVLLKERSTEL]
09855551d713 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents: 4
diff changeset
21 # end gene g1
09855551d713 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents: 4
diff changeset
22 ###
09855551d713 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents: 4
diff changeset
23 #
09855551d713 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents: 4
diff changeset
24 # ----- prediction on sequence number 2 (length = 2344, name = HS08198) -----
09855551d713 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents: 4
diff changeset
25 #
09855551d713 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents: 4
diff changeset
26 # Predicted genes for sequence number 2 on both strands
09855551d713 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents: 4
diff changeset
27 # start gene g2
09855551d713 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents: 4
diff changeset
28 HS08198 AUGUSTUS gene 86 2344 1 + . ID=g2
09855551d713 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents: 4
diff changeset
29 HS08198 AUGUSTUS transcript 86 2344 . + . ID=g2.t1;Parent=g2
09855551d713 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents: 4
diff changeset
30 HS08198 AUGUSTUS transcription_start_site 86 86 . + . Parent=g2.t1
09855551d713 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents: 4
diff changeset
31 HS08198 AUGUSTUS exon 86 582 . + . Parent=g2.t1
09855551d713 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents: 4
diff changeset
32 HS08198 AUGUSTUS start_codon 445 447 . + 0 Parent=g2.t1
0
af307d3285c5 Uploaded
bgruening
parents:
diff changeset
33 """
7
09855551d713 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents: 4
diff changeset
34 protein_seq = ""
09855551d713 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents: 4
diff changeset
35 coding_seq = ""
0
af307d3285c5 Uploaded
bgruening
parents:
diff changeset
36 if args.protein:
7
09855551d713 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents: 4
diff changeset
37 po = open(args.protein, "w+")
0
af307d3285c5 Uploaded
bgruening
parents:
diff changeset
38 if args.codingseq:
7
09855551d713 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents: 4
diff changeset
39 co = open(args.codingseq, "w+")
0
af307d3285c5 Uploaded
bgruening
parents:
diff changeset
40
af307d3285c5 Uploaded
bgruening
parents:
diff changeset
41 for line in sys.stdin:
af307d3285c5 Uploaded
bgruening
parents:
diff changeset
42 # protein- and coding-sequence are stored as comments
7
09855551d713 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents: 4
diff changeset
43 if line.startswith("#"):
0
af307d3285c5 Uploaded
bgruening
parents:
diff changeset
44 line = line[2:].strip()
7
09855551d713 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents: 4
diff changeset
45 if line.startswith("start gene"):
0
af307d3285c5 Uploaded
bgruening
parents:
diff changeset
46 gene_name = line[11:].strip()
af307d3285c5 Uploaded
bgruening
parents:
diff changeset
47
4
4de31938431b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit 2896dcfd180800d00ea413a59264ef8b11788b8e
iuc
parents: 0
diff changeset
48 if protein_seq:
7
09855551d713 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents: 4
diff changeset
49 if line.endswith("]"):
4
4de31938431b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit 2896dcfd180800d00ea413a59264ef8b11788b8e
iuc
parents: 0
diff changeset
50 protein_seq += line[:-1]
7
09855551d713 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents: 4
diff changeset
51 po.write(
09855551d713 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents: 4
diff changeset
52 ">%s\n%s\n"
09855551d713 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents: 4
diff changeset
53 % (gene_name, "\n".join(textwrap.wrap(protein_seq, 80)))
09855551d713 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents: 4
diff changeset
54 )
09855551d713 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents: 4
diff changeset
55 protein_seq = ""
4
4de31938431b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit 2896dcfd180800d00ea413a59264ef8b11788b8e
iuc
parents: 0
diff changeset
56 else:
4de31938431b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit 2896dcfd180800d00ea413a59264ef8b11788b8e
iuc
parents: 0
diff changeset
57 protein_seq += line
4de31938431b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit 2896dcfd180800d00ea413a59264ef8b11788b8e
iuc
parents: 0
diff changeset
58
4de31938431b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit 2896dcfd180800d00ea413a59264ef8b11788b8e
iuc
parents: 0
diff changeset
59 if coding_seq:
7
09855551d713 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents: 4
diff changeset
60 if line.endswith("]"):
4
4de31938431b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit 2896dcfd180800d00ea413a59264ef8b11788b8e
iuc
parents: 0
diff changeset
61 coding_seq += line[:-1]
7
09855551d713 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents: 4
diff changeset
62 co.write(
09855551d713 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents: 4
diff changeset
63 ">%s\n%s\n"
09855551d713 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents: 4
diff changeset
64 % (gene_name, "\n".join(textwrap.wrap(coding_seq, 80)))
09855551d713 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents: 4
diff changeset
65 )
09855551d713 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents: 4
diff changeset
66 coding_seq = ""
4
4de31938431b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit 2896dcfd180800d00ea413a59264ef8b11788b8e
iuc
parents: 0
diff changeset
67 else:
4de31938431b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit 2896dcfd180800d00ea413a59264ef8b11788b8e
iuc
parents: 0
diff changeset
68 coding_seq += line
4de31938431b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit 2896dcfd180800d00ea413a59264ef8b11788b8e
iuc
parents: 0
diff changeset
69
7
09855551d713 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents: 4
diff changeset
70 if args.protein and line.startswith("protein sequence = ["):
09855551d713 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents: 4
diff changeset
71 if line.endswith("]"):
0
af307d3285c5 Uploaded
bgruening
parents:
diff changeset
72 protein_seq = line[20:-1]
7
09855551d713 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents: 4
diff changeset
73 po.write(
09855551d713 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents: 4
diff changeset
74 ">%s\n%s\n"
09855551d713 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents: 4
diff changeset
75 % (gene_name, "\n".join(textwrap.wrap(protein_seq, 80)))
09855551d713 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents: 4
diff changeset
76 )
09855551d713 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents: 4
diff changeset
77 protein_seq = ""
0
af307d3285c5 Uploaded
bgruening
parents:
diff changeset
78 else:
af307d3285c5 Uploaded
bgruening
parents:
diff changeset
79 line = line[20:]
af307d3285c5 Uploaded
bgruening
parents:
diff changeset
80 protein_seq = line
af307d3285c5 Uploaded
bgruening
parents:
diff changeset
81
7
09855551d713 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents: 4
diff changeset
82 if args.codingseq and line.startswith("coding sequence = ["):
09855551d713 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents: 4
diff changeset
83 if line.endswith("]"):
0
af307d3285c5 Uploaded
bgruening
parents:
diff changeset
84 coding_seq = line[19:-1]
7
09855551d713 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents: 4
diff changeset
85 co.write(
09855551d713 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents: 4
diff changeset
86 ">%s\n%s\n"
09855551d713 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents: 4
diff changeset
87 % (gene_name, "\n".join(textwrap.wrap(coding_seq, 80)))
09855551d713 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents: 4
diff changeset
88 )
09855551d713 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents: 4
diff changeset
89 coding_seq = ""
0
af307d3285c5 Uploaded
bgruening
parents:
diff changeset
90 else:
af307d3285c5 Uploaded
bgruening
parents:
diff changeset
91 line = line[19:]
af307d3285c5 Uploaded
bgruening
parents:
diff changeset
92 coding_seq = line
af307d3285c5 Uploaded
bgruening
parents:
diff changeset
93
af307d3285c5 Uploaded
bgruening
parents:
diff changeset
94 if args.codingseq:
af307d3285c5 Uploaded
bgruening
parents:
diff changeset
95 co.close()
af307d3285c5 Uploaded
bgruening
parents:
diff changeset
96 if args.protein:
af307d3285c5 Uploaded
bgruening
parents:
diff changeset
97 po.close()
af307d3285c5 Uploaded
bgruening
parents:
diff changeset
98
4
4de31938431b planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit 2896dcfd180800d00ea413a59264ef8b11788b8e
iuc
parents: 0
diff changeset
99
7
09855551d713 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents: 4
diff changeset
100 if __name__ == "__main__":
0
af307d3285c5 Uploaded
bgruening
parents:
diff changeset
101 parser = argparse.ArgumentParser()
7
09855551d713 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents: 4
diff changeset
102 parser.add_argument("-p", "--protein", help="Path to the protein file.")
09855551d713 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents: 4
diff changeset
103 parser.add_argument("-c", "--codingseq", help="Path to the coding file.")
0
af307d3285c5 Uploaded
bgruening
parents:
diff changeset
104
af307d3285c5 Uploaded
bgruening
parents:
diff changeset
105 args = parser.parse_args()
7
09855551d713 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents: 4
diff changeset
106 main(args)