Mercurial > repos > iss > eurl_vtec_wgs_pt
annotate scripts/ReMatCh/utils/gffParser.py @ 2:65378117a8c0 draft
planemo upload commit cd404de897dc786471b6ea98f9dda612501b2469
| author | iss | 
|---|---|
| date | Thu, 19 Oct 2023 11:19:56 +0000 | 
| parents | c6bab5103a14 | 
| children | 
| rev | line source | 
|---|---|
| 
0
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
1 #!/usr/bin/env python3 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
2 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
3 import argparse | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
4 import os | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
5 from Bio import SeqIO | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
6 from Bio.Seq import Seq | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
7 from Bio.SeqRecord import SeqRecord | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
8 import ntpath | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
9 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
10 version = '1.0' | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
11 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
12 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
13 def parse_id(filename): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
14 # get wanted feature IDs | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
15 gff_ids = [] | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
16 with open(filename, 'r') as in_handle: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
17 for line in in_handle: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
18 line = line.strip() | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
19 gff_ids.append(line) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
20 return gff_ids | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
21 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
22 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
23 def retrieve_seq_file(fasta_file, coord_file, extra_seq, filename, output_dir): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
24 # Parsing the sequence file, using the provided txt file containing the contig ID and positions to retrieve sequences. | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
25 handle = open(fasta_file, "rU") | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
26 records_dict = SeqIO.to_dict(SeqIO.parse(handle, "fasta")) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
27 handle.close() | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
28 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
29 seq_2_get = {} | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
30 with open(coord_file, 'r') as sequeces2get: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
31 for line in sequeces2get: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
32 line = line.split(',') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
33 coords = (int(line[-2]), int(line[-1])) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
34 contig_id = line[0] | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
35 if contig_id in list(seq_2_get.keys()): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
36 seq_2_get[contig_id].append(coords) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
37 else: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
38 seq_2_get[contig_id] = [coords] | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
39 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
40 with open(output_dir + '/' + filename + '.fasta', 'w') as output_handle: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
41 fails = 0 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
42 successes = 0 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
43 records = [] | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
44 for contig, listCoords in list(seq_2_get.items()): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
45 contig_seq = records_dict[contig].seq | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
46 for coord in listCoords: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
47 coord1 = coord[0] - extra_seq | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
48 coord2 = coord[1] + extra_seq | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
49 if coord1 < 0 or coord2 > len(contig_seq): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
50 fail_log = open(output_dir + '/' + filename + '_fails.txt', 'a') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
51 fail_log.write(contig + ',' + str(coord[0]) + ',' + str(coord[1]) + '\n') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
52 fail_log.close() | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
53 fails += 1 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
54 else: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
55 geneseq = str(contig_seq[coord1:coord2]) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
56 record = SeqRecord(Seq(geneseq), id=str(str(contig) + '#' + str(coord1) + '_' + str(coord2)), | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
57 description='') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
58 records.append(record) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
59 successes += 1 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
60 SeqIO.write(records, output_handle, "fasta") | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
61 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
62 print('Retrived %s features successfully from %s with %s bp as extra' | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
63 ' sequence.' % (str(successes), filename, str(extra_seq))) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
64 if fails > 0: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
65 print('%s featrued failed to retrieve. Check %s_fails.txt file.' % (str(fails), filename)) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
66 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
67 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
68 def retrieve_seq(fasta_file, gff_features, extra_seq, filename, output_dir): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
69 # parsing the sequence file into a SeqIO dictionary. one contig per entry | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
70 handle = open(fasta_file, "rU") | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
71 records_dict = SeqIO.to_dict(SeqIO.parse(handle, "fasta")) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
72 handle.close() | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
73 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
74 with open(output_dir + '/' + filename + '.fasta', 'w') as output_handle: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
75 fails = 0 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
76 successes = 0 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
77 records = [] | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
78 for locus, location in list(gff_features.items()): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
79 # print locus | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
80 contig_seq = records_dict[location[0]].seq | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
81 coord1 = location[1] - extra_seq | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
82 coord2 = location[2] + extra_seq | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
83 if coord1 < 0 or coord2 > len(contig_seq): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
84 fail_log = open(output_dir + '/' + filename + '_fails.txt', 'a') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
85 fail_log.write(locus + '\n') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
86 fail_log.close() | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
87 fails += 1 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
88 else: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
89 geneseq = str(contig_seq[coord1:coord2]) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
90 if location[3] == '-': | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
91 seq = Seq(geneseq) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
92 geneseq = str(seq.reverse_complement()) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
93 record = SeqRecord(Seq(geneseq), | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
94 id=str(locus + '-' + str(location[0]) + '#' + str(location[1]) + '_' + | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
95 str(location[2])), | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
96 description='') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
97 records.append(record) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
98 successes += 1 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
99 SeqIO.write(records, output_handle, "fasta") | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
100 print('Retrived %s features successfully from %s with %s bp as extra' | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
101 ' sequence.' % (str(successes), filename, str(extra_seq))) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
102 if fails > 0: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
103 print('%s featrued failed to retrieve. Check %s_fails.txt file.' % (str(fails), filename)) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
104 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
105 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
106 def parse_features(temp_gff): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
107 # parsing the feature file into a dictionary | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
108 gff_features = {} | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
109 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
110 with open(temp_gff, 'r') as temp_genes: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
111 for line in temp_genes: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
112 line = line.split('\t') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
113 if "CDS" in line[2]: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
114 id = line[-1].split(';') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
115 locus_id = str(id[0].split('=')[1]) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
116 contig = line[0] | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
117 begining = int(line[3]) - 1 # to get the full sequence | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
118 end = int(line[4]) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
119 strand = line[6] | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
120 location = [contig, begining, end, strand] | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
121 gff_features[locus_id] = location | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
122 return gff_features | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
123 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
124 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
125 def gff_parser(gff_file, extra_seq=0, output_dir='.', keep_temporary_files=False, ids=None, coord_file=None): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
126 filename = ntpath.basename(gff_file).replace('.gff', '') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
127 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
128 # cleaning temp files if they exist | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
129 if os.path.isfile(output_dir + '/' + filename + '_features.gff'): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
130 os.remove(output_dir + '/' + filename + '_features.gff') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
131 if os.path.isfile(output_dir + '/' + filename + '_sequence.fasta'): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
132 os.remove(output_dir + '/' + filename + '_sequence.fasta') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
133 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
134 # cleaning fails file if it exists | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
135 if os.path.isfile(output_dir + '/' + filename + '_fails.txt'): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
136 os.remove(output_dir + '/' + filename + '_fails.txt') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
137 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
138 if coord_file is None: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
139 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
140 if ids is not None: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
141 select_ids = parse_id(ids) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
142 else: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
143 select_ids = None | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
144 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
145 # separating the gff into 2 different files: one with the features and another with the conting sequences | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
146 with open(gff_file, 'r') as in_handle, open(output_dir + '/' + filename + '_features.gff', 'a') as temp_genes, \ | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
147 open(output_dir + '/' + filename + '_sequence.fasta', 'a') as temp_contigs: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
148 for line in in_handle: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
149 if not line.startswith('##'): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
150 if '\t' in line: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
151 if select_ids is not None: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
152 items = line.split('\t') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
153 id = items[-1].split(';')[0] | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
154 id = id.split('=')[1] | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
155 if id in select_ids: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
156 temp_genes.write(line) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
157 else: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
158 temp_genes.write(line) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
159 else: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
160 temp_contigs.write(line) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
161 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
162 gff_files = parse_features(output_dir + '/' + filename + '_features.gff') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
163 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
164 retrieve_seq(output_dir + '/' + filename + '_sequence.fasta', gff_files, extra_seq, filename, output_dir) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
165 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
166 else: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
167 with open(gff_file, 'r') as in_handle, \ | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
168 open(output_dir + '/' + filename + '_sequence.fasta', 'a') as temp_contigs: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
169 for line in in_handle: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
170 if not line.startswith('##'): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
171 if '\t' in line: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
172 pass | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
173 else: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
174 temp_contigs.write(line) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
175 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
176 retrieve_seq_file(output_dir + '/' + filename + '_sequence.fasta', coord_file, extra_seq, filename, output_dir) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
177 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
178 # removing temp files | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
179 if not keep_temporary_files: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
180 try: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
181 os.remove(output_dir + '/' + filename + '_features.gff') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
182 except: | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
183 pass | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
184 os.remove(output_dir + '/' + filename + '_sequence.fasta') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
185 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
186 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
187 def main(): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
188 parser = argparse.ArgumentParser(prog='gffParser.py', description='GFF3 parser for feature sequence retrival.', | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
189 epilog='by C I Mendes (cimendes@medicina.ulisboa.pt)') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
190 parser.add_argument('--version', help='Version information', action='version', version=str('%(prog)s v' + version)) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
191 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
192 parser.add_argument('-i', '--input', | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
193 help='GFF3 file to parse, containing both sequences and annotations (like the one obtained from' | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
194 ' PROKKA).', type=argparse.FileType('r'), required=True) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
195 parser.add_argument('-x', '--extraSeq', help='Extra sequence to retrieve per feature in gff.', default=0, type=int, | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
196 required=False) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
197 parser.add_argument('-k', '--keepTemporaryFiles', help='Keep temporary gff(without sequence) and fasta files.', | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
198 action='store_true') | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
199 parser.add_argument('-o', '--outputDir', help='Path to where the output is to be saved.', default='.', | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
200 required=False) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
201 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
202 parser_optional_selected_regions_exclusive = parser.add_mutually_exclusive_group() | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
203 parser_optional_selected_regions_exclusive.add_argument('-s', '--select', | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
204 help='txt file with the IDs of interest, one per line', | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
205 type=argparse.FileType('r'), required=False) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
206 parser_optional_selected_regions_exclusive.add_argument('-f', '--fromFile', | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
207 help='Sequence coordinates to be retrieved. Requires contig' | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
208 ' ID and coords (contig,strart,end) in a csv file. One' | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
209 ' per line.', type=argparse.FileType('r'), | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
210 required=False) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
211 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
212 args = parser.parse_args() | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
213 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
214 args.outputDir = os.path.abspath(args.outputDir) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
215 if not os.path.isdir(args.outputDir): | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
216 os.makedirs(args.outputDir) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
217 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
218 gff_parser(os.path.abspath(args.input.name), args.extraSeq, os.path.abspath(args.outputDir), | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
219 args.keepTemporaryFiles, | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
220 os.path.abspath(args.select.name) if args.select is not None else None, | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
221 os.path.abspath(args.fromFile.name) if args.fromFile is not None else None) | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
222 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
223 | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
224 if __name__ == "__main__": | 
| 
 
c6bab5103a14
"planemo upload commit 6abf3e299d82d07e6c3cf8642bdea80e96df64c3-dirty"
 
iss 
parents:  
diff
changeset
 | 
225 main() | 
