comparison make_unique_id.py @ 4:f2656e644641 draft

"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit e3bef6bd24b0d0bf5645a5d1083c1fa886c19e4a"
author brinkmanlab
date Tue, 16 Jun 2020 12:47:07 -0400
parents a2258ce2d58c
children
comparison
equal deleted inserted replaced
3:a2258ce2d58c 4:f2656e644641
27 27
28 format = sys.argv[1] 28 format = sys.argv[1]
29 ids = defaultdict(int) 29 ids = defaultdict(int)
30 30
31 def makeUnique(seq): 31 def makeUnique(seq):
32 seqlenlen = len(str(len(seq))) 32 count = ids[seq.id]
33 newid = seq.id[:26 - seqlenlen] # Genbank has a max length for the id and sequence length number, truncate the sequence id if too long 33 ids[seq.id] += 1
34 count = ids[newid]
35 ids[newid] += 1
36 if count: 34 if count:
35 oldid = seq.id
37 suffix = "_" + str(count) 36 suffix = "_" + str(count)
38 if len(newid) + len(suffix) + 1 + seqlenlen > 26: 37 seq.id += suffix
39 newid = newid[:25 - seqlenlen - len(suffix)] 38 seq.name += suffix
39 print(f"{oldid}\t{seq.id}")
40 40
41 newid += suffix
42 seq.name += suffix
43
44 if seq.id != newid:
45 print(f"{seq.id}\t{newid}")
46
47 seq.id = newid
48 return seq 41 return seq
49 42
50 43
51 paths = iter(sys.argv[2:]) 44 paths = iter(sys.argv[2:])
52 45