Mercurial > repos > brinkmanlab > make_unique_id
comparison make_unique_id.py @ 1:061c3402a977 draft
"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit b661895b5e2dff7235eb32476602c3b93411a42e"
| author | brinkmanlab | 
|---|---|
| date | Mon, 15 Jun 2020 18:48:27 -0400 | 
| parents | a3a09dd8d09a | 
| children | c8bda09480ae | 
   comparison
  equal
  deleted
  inserted
  replaced
| 0:a3a09dd8d09a | 1:061c3402a977 | 
|---|---|
| 27 | 27 | 
| 28 format = sys.argv[1] | 28 format = sys.argv[1] | 
| 29 ids = defaultdict(int) | 29 ids = defaultdict(int) | 
| 30 | 30 | 
| 31 def makeUnique(seq): | 31 def makeUnique(seq): | 
| 32 count = ids[seq.id] | 32 newid = seq.id[:28] # Genbank has a max length for the id and sequence length number, truncate the sequence id if too long | 
| 33 ids[seq.id] += 1 | 33 count = ids[newid] | 
| 34 ids[newid] += 1 | |
| 34 if count: | 35 if count: | 
| 35 suffix = "_" + str(count) | 36 suffix = "_" + str(count) | 
| 36 newid = seq.id | |
| 37 seqlenlen = len(str(len(seq))) | 37 seqlenlen = len(str(len(seq))) | 
| 38 if len(newid) + len(suffix) + 1 + seqlenlen > 28: | 38 if len(newid) + len(suffix) + 1 + seqlenlen > 28: | 
| 39 # Genbank has a max length for the id and sequence length number, truncate the sequence id if too long | |
| 40 newid = newid[:27 - seqlenlen - len(suffix)] | 39 newid = newid[:27 - seqlenlen - len(suffix)] | 
| 41 | 40 | 
| 42 print(f"{seq.id}\t{newid}{suffix}") | 41 newid += suffix | 
| 43 seq.id = newid + suffix | |
| 44 seq.name += suffix | 42 seq.name += suffix | 
| 45 | 43 | 
| 44 if seq.id != newid: | |
| 45 print(f"{seq.id}\t{newid}") | |
| 46 | |
| 47 seq.id = newid | |
| 46 return seq | 48 return seq | 
| 47 | 49 | 
| 48 | 50 | 
| 49 paths = iter(sys.argv[2:]) | 51 paths = iter(sys.argv[2:]) | 
| 50 | 52 | 
