comparison make_unique_id.py @ 1:061c3402a977 draft

"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit b661895b5e2dff7235eb32476602c3b93411a42e"
author brinkmanlab
date Mon, 15 Jun 2020 18:48:27 -0400
parents a3a09dd8d09a
children c8bda09480ae
comparison
equal deleted inserted replaced
0:a3a09dd8d09a 1:061c3402a977
27 27
28 format = sys.argv[1] 28 format = sys.argv[1]
29 ids = defaultdict(int) 29 ids = defaultdict(int)
30 30
31 def makeUnique(seq): 31 def makeUnique(seq):
32 count = ids[seq.id] 32 newid = seq.id[:28] # Genbank has a max length for the id and sequence length number, truncate the sequence id if too long
33 ids[seq.id] += 1 33 count = ids[newid]
34 ids[newid] += 1
34 if count: 35 if count:
35 suffix = "_" + str(count) 36 suffix = "_" + str(count)
36 newid = seq.id
37 seqlenlen = len(str(len(seq))) 37 seqlenlen = len(str(len(seq)))
38 if len(newid) + len(suffix) + 1 + seqlenlen > 28: 38 if len(newid) + len(suffix) + 1 + seqlenlen > 28:
39 # Genbank has a max length for the id and sequence length number, truncate the sequence id if too long
40 newid = newid[:27 - seqlenlen - len(suffix)] 39 newid = newid[:27 - seqlenlen - len(suffix)]
41 40
42 print(f"{seq.id}\t{newid}{suffix}") 41 newid += suffix
43 seq.id = newid + suffix
44 seq.name += suffix 42 seq.name += suffix
45 43
44 if seq.id != newid:
45 print(f"{seq.id}\t{newid}")
46
47 seq.id = newid
46 return seq 48 return seq
47 49
48 50
49 paths = iter(sys.argv[2:]) 51 paths = iter(sys.argv[2:])
50 52