Mercurial > repos > iuc > gff_to_prot
comparison gff_to_prot.py @ 0:99810cf51f2e draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/transit/ commit 73c6b2baf9dda26c6809a4f36582f7cbdb161ea1
| author | iuc |
|---|---|
| date | Mon, 22 Apr 2019 14:42:21 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:99810cf51f2e |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 import csv | |
| 3 import os | |
| 4 import sys | |
| 5 | |
| 6 | |
| 7 def get_description(line, parent): | |
| 8 cols = line.split('\t') | |
| 9 labels = {} | |
| 10 for pair in cols[8].split(";"): | |
| 11 k, v = pair.split('=') | |
| 12 labels[k] = v | |
| 13 | |
| 14 if (cols[2]) == "CDS" and labels["Parent"] == parent: | |
| 15 return labels.get("Note", '-') | |
| 16 return '-' | |
| 17 | |
| 18 | |
| 19 def convert_to_prot_table(fname, output_name): | |
| 20 gff_file = open(fname) | |
| 21 output_file = open(output_name, 'w') | |
| 22 writer = csv.writer(output_file, delimiter='\t') | |
| 23 lines = gff_file.readlines() | |
| 24 gff_file.close() | |
| 25 for i, line in enumerate(lines): | |
| 26 line = line.strip() | |
| 27 if line.startswith('#'): | |
| 28 continue | |
| 29 cols = line.split('\t') | |
| 30 if (len(cols) < 9): | |
| 31 print("Ignoring invalid row with entries: {0}".format(cols)) | |
| 32 elif (cols[2]) == "region": | |
| 33 continue | |
| 34 elif (cols[2]) == "CDS": | |
| 35 continue | |
| 36 elif (cols[2]) == "gene": | |
| 37 start = int(cols[3]) | |
| 38 end = int(cols[4]) | |
| 39 strand = cols[6].strip() | |
| 40 labels = {} | |
| 41 diff = int(abs(end - start) / 3) # What is this called? | |
| 42 for pair in cols[8].split(";"): | |
| 43 k, v = pair.split('=') | |
| 44 labels[k.strip()] = v.strip() | |
| 45 | |
| 46 Rv = labels["locus_tag"].strip() # error out if not found | |
| 47 gene = labels.get('Name', '') | |
| 48 desc = get_description(lines[i + 1], labels.get("ID", "")) if (i + 1) < len(lines) else '-' | |
| 49 vals = [desc, start, end, strand, diff, '-', '-', gene, Rv, '-'] | |
| 50 writer.writerow(vals) | |
| 51 output_file.close() | |
| 52 | |
| 53 | |
| 54 if __name__ == "__main__": | |
| 55 usage_string = "Usage: python gff-prot-converter.py <gff filename> <output filename>" | |
| 56 | |
| 57 if len(sys.argv) < 3: | |
| 58 print(usage_string) | |
| 59 sys.exit(0) | |
| 60 file_name = sys.argv[1] | |
| 61 if not os.path.exists(file_name): | |
| 62 print("File not found. Exiting...") | |
| 63 print(usage_string) | |
| 64 sys.exit(0) | |
| 65 convert_to_prot_table(file_name, sys.argv[2]) |
