Mercurial > repos > iuc > transit_tn5gaps
annotate gff_to_prot.py @ 0:e4f44f327cf1 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
author | iuc |
---|---|
date | Tue, 08 Oct 2019 08:24:46 -0400 |
parents | |
children |
rev | line source |
---|---|
0
e4f44f327cf1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff
changeset
|
1 #!/usr/bin/env python |
e4f44f327cf1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff
changeset
|
2 import csv |
e4f44f327cf1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff
changeset
|
3 import os |
e4f44f327cf1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff
changeset
|
4 import sys |
e4f44f327cf1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff
changeset
|
5 |
e4f44f327cf1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff
changeset
|
6 |
e4f44f327cf1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff
changeset
|
7 def get_description(line, parent): |
e4f44f327cf1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff
changeset
|
8 cols = line.split('\t') |
e4f44f327cf1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff
changeset
|
9 labels = {} |
e4f44f327cf1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff
changeset
|
10 for pair in cols[8].split(";"): |
e4f44f327cf1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff
changeset
|
11 k, v = pair.split('=') |
e4f44f327cf1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff
changeset
|
12 labels[k] = v |
e4f44f327cf1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff
changeset
|
13 |
e4f44f327cf1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff
changeset
|
14 if (cols[2]) == "CDS" and labels["Parent"] == parent: |
e4f44f327cf1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff
changeset
|
15 return labels.get("Note", '-') |
e4f44f327cf1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff
changeset
|
16 return '-' |
e4f44f327cf1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff
changeset
|
17 |
e4f44f327cf1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff
changeset
|
18 |
e4f44f327cf1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff
changeset
|
19 def convert_to_prot_table(fname, output_name): |
e4f44f327cf1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff
changeset
|
20 gff_file = open(fname) |
e4f44f327cf1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff
changeset
|
21 output_file = open(output_name, 'w') |
e4f44f327cf1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff
changeset
|
22 writer = csv.writer(output_file, delimiter='\t') |
e4f44f327cf1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff
changeset
|
23 lines = gff_file.readlines() |
e4f44f327cf1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff
changeset
|
24 gff_file.close() |
e4f44f327cf1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff
changeset
|
25 for i, line in enumerate(lines): |
e4f44f327cf1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff
changeset
|
26 line = line.strip() |
e4f44f327cf1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff
changeset
|
27 if line.startswith('#'): |
e4f44f327cf1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff
changeset
|
28 continue |
e4f44f327cf1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff
changeset
|
29 cols = line.split('\t') |
e4f44f327cf1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff
changeset
|
30 if (len(cols) < 9): |
e4f44f327cf1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff
changeset
|
31 print("Ignoring invalid row with entries: {0}".format(cols)) |
e4f44f327cf1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff
changeset
|
32 elif (cols[2]) == "region": |
e4f44f327cf1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff
changeset
|
33 continue |
e4f44f327cf1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff
changeset
|
34 elif (cols[2]) == "CDS": |
e4f44f327cf1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff
changeset
|
35 continue |
e4f44f327cf1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff
changeset
|
36 elif (cols[2]) == "gene": |
e4f44f327cf1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff
changeset
|
37 start = int(cols[3]) |
e4f44f327cf1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff
changeset
|
38 end = int(cols[4]) |
e4f44f327cf1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff
changeset
|
39 strand = cols[6].strip() |
e4f44f327cf1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff
changeset
|
40 labels = {} |
e4f44f327cf1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff
changeset
|
41 diff = int(abs(end - start) / 3) # What is this called? |
e4f44f327cf1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff
changeset
|
42 for pair in cols[8].split(";"): |
e4f44f327cf1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff
changeset
|
43 k, v = pair.split('=') |
e4f44f327cf1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff
changeset
|
44 labels[k.strip()] = v.strip() |
e4f44f327cf1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff
changeset
|
45 |
e4f44f327cf1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff
changeset
|
46 Rv = labels["locus_tag"].strip() # error out if not found |
e4f44f327cf1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff
changeset
|
47 gene = labels.get('Name', '') |
e4f44f327cf1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff
changeset
|
48 desc = get_description(lines[i + 1], labels.get("ID", "")) if (i + 1) < len(lines) else '-' |
e4f44f327cf1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff
changeset
|
49 vals = [desc, start, end, strand, diff, '-', '-', gene, Rv, '-'] |
e4f44f327cf1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff
changeset
|
50 writer.writerow(vals) |
e4f44f327cf1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff
changeset
|
51 output_file.close() |
e4f44f327cf1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff
changeset
|
52 |
e4f44f327cf1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff
changeset
|
53 |
e4f44f327cf1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff
changeset
|
54 if __name__ == "__main__": |
e4f44f327cf1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff
changeset
|
55 usage_string = "Usage: python gff-prot-converter.py <gff filename> <output filename>" |
e4f44f327cf1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff
changeset
|
56 |
e4f44f327cf1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff
changeset
|
57 if len(sys.argv) < 3: |
e4f44f327cf1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff
changeset
|
58 print(usage_string) |
e4f44f327cf1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff
changeset
|
59 sys.exit(0) |
e4f44f327cf1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff
changeset
|
60 file_name = sys.argv[1] |
e4f44f327cf1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff
changeset
|
61 if not os.path.exists(file_name): |
e4f44f327cf1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff
changeset
|
62 print("File not found. Exiting...") |
e4f44f327cf1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff
changeset
|
63 print(usage_string) |
e4f44f327cf1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff
changeset
|
64 sys.exit(0) |
e4f44f327cf1
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit f63413d629e4de3c69984b3a96ad8ccfe0d47ada"
iuc
parents:
diff
changeset
|
65 convert_to_prot_table(file_name, sys.argv[2]) |