annotate gff_to_prot.py @ 1:c68753eedf72 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit bd061799ebd49ada1a666d0815720c18291728da"
author iuc
date Wed, 16 Oct 2019 04:32:50 -0400
parents e4f44f327cf1
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
e4f44f327cf1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff changeset
1 #!/usr/bin/env python
e4f44f327cf1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff changeset
2 import csv
e4f44f327cf1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff changeset
3 import os
e4f44f327cf1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff changeset
4 import sys
e4f44f327cf1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff changeset
5
e4f44f327cf1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff changeset
6
e4f44f327cf1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff changeset
7 def get_description(line, parent):
e4f44f327cf1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff changeset
8 cols = line.split('\t')
e4f44f327cf1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff changeset
9 labels = {}
e4f44f327cf1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff changeset
10 for pair in cols[8].split(";"):
e4f44f327cf1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff changeset
11 k, v = pair.split('=')
e4f44f327cf1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff changeset
12 labels[k] = v
e4f44f327cf1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff changeset
13
e4f44f327cf1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff changeset
14 if (cols[2]) == "CDS" and labels["Parent"] == parent:
e4f44f327cf1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff changeset
15 return labels.get("Note", '-')
e4f44f327cf1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff changeset
16 return '-'
e4f44f327cf1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff changeset
17
e4f44f327cf1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff changeset
18
e4f44f327cf1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff changeset
19 def convert_to_prot_table(fname, output_name):
e4f44f327cf1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff changeset
20 gff_file = open(fname)
e4f44f327cf1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff changeset
21 output_file = open(output_name, 'w')
e4f44f327cf1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff changeset
22 writer = csv.writer(output_file, delimiter='\t')
e4f44f327cf1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff changeset
23 lines = gff_file.readlines()
e4f44f327cf1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff changeset
24 gff_file.close()
e4f44f327cf1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff changeset
25 for i, line in enumerate(lines):
e4f44f327cf1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff changeset
26 line = line.strip()
e4f44f327cf1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff changeset
27 if line.startswith('#'):
e4f44f327cf1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff changeset
28 continue
e4f44f327cf1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff changeset
29 cols = line.split('\t')
e4f44f327cf1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff changeset
30 if (len(cols) < 9):
e4f44f327cf1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff changeset
31 print("Ignoring invalid row with entries: {0}".format(cols))
e4f44f327cf1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff changeset
32 elif (cols[2]) == "region":
e4f44f327cf1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff changeset
33 continue
e4f44f327cf1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff changeset
34 elif (cols[2]) == "CDS":
e4f44f327cf1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff changeset
35 continue
e4f44f327cf1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff changeset
36 elif (cols[2]) == "gene":
e4f44f327cf1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff changeset
37 start = int(cols[3])
e4f44f327cf1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff changeset
38 end = int(cols[4])
e4f44f327cf1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff changeset
39 strand = cols[6].strip()
e4f44f327cf1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff changeset
40 labels = {}
e4f44f327cf1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff changeset
41 diff = int(abs(end - start) / 3) # What is this called?
e4f44f327cf1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff changeset
42 for pair in cols[8].split(";"):
e4f44f327cf1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff changeset
43 k, v = pair.split('=')
e4f44f327cf1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff changeset
44 labels[k.strip()] = v.strip()
e4f44f327cf1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff changeset
45
e4f44f327cf1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff changeset
46 Rv = labels["locus_tag"].strip() # error out if not found
e4f44f327cf1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff changeset
47 gene = labels.get('Name', '')
e4f44f327cf1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff changeset
48 desc = get_description(lines[i + 1], labels.get("ID", "")) if (i + 1) < len(lines) else '-'
e4f44f327cf1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff changeset
49 vals = [desc, start, end, strand, diff, '-', '-', gene, Rv, '-']
e4f44f327cf1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff changeset
50 writer.writerow(vals)
e4f44f327cf1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff changeset
51 output_file.close()
e4f44f327cf1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff changeset
52
e4f44f327cf1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff changeset
53
e4f44f327cf1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff changeset
54 if __name__ == "__main__":
e4f44f327cf1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff changeset
55 usage_string = "Usage: python gff-prot-converter.py <gff filename> <output filename>"
e4f44f327cf1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff changeset
56
e4f44f327cf1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff changeset
57 if len(sys.argv) < 3:
e4f44f327cf1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff changeset
58 print(usage_string)
e4f44f327cf1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff changeset
59 sys.exit(0)
e4f44f327cf1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff changeset
60 file_name = sys.argv[1]
e4f44f327cf1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff changeset
61 if not os.path.exists(file_name):
e4f44f327cf1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff changeset
62 print("File not found. Exiting...")
e4f44f327cf1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff changeset
63 print(usage_string)
e4f44f327cf1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff changeset
64 sys.exit(0)
e4f44f327cf1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff changeset
65 convert_to_prot_table(file_name, sys.argv[2])