Mercurial > repos > brinkmanlab > make_unique_id
comparison make_unique_id.py @ 4:f2656e644641 draft
"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit e3bef6bd24b0d0bf5645a5d1083c1fa886c19e4a"
author | brinkmanlab |
---|---|
date | Tue, 16 Jun 2020 12:47:07 -0400 |
parents | a2258ce2d58c |
children |
comparison
equal
deleted
inserted
replaced
3:a2258ce2d58c | 4:f2656e644641 |
---|---|
27 | 27 |
28 format = sys.argv[1] | 28 format = sys.argv[1] |
29 ids = defaultdict(int) | 29 ids = defaultdict(int) |
30 | 30 |
31 def makeUnique(seq): | 31 def makeUnique(seq): |
32 seqlenlen = len(str(len(seq))) | 32 count = ids[seq.id] |
33 newid = seq.id[:26 - seqlenlen] # Genbank has a max length for the id and sequence length number, truncate the sequence id if too long | 33 ids[seq.id] += 1 |
34 count = ids[newid] | |
35 ids[newid] += 1 | |
36 if count: | 34 if count: |
35 oldid = seq.id | |
37 suffix = "_" + str(count) | 36 suffix = "_" + str(count) |
38 if len(newid) + len(suffix) + 1 + seqlenlen > 26: | 37 seq.id += suffix |
39 newid = newid[:25 - seqlenlen - len(suffix)] | 38 seq.name += suffix |
39 print(f"{oldid}\t{seq.id}") | |
40 | 40 |
41 newid += suffix | |
42 seq.name += suffix | |
43 | |
44 if seq.id != newid: | |
45 print(f"{seq.id}\t{newid}") | |
46 | |
47 seq.id = newid | |
48 return seq | 41 return seq |
49 | 42 |
50 | 43 |
51 paths = iter(sys.argv[2:]) | 44 paths = iter(sys.argv[2:]) |
52 | 45 |