Mercurial > repos > brinkmanlab > make_unique_id
comparison make_unique_id.py @ 3:a2258ce2d58c draft
"planemo upload for repository https://github.com/brinkmanlab/galaxy-tools/tree/master/make_unique_id commit 08b7b36e1eba516e4d7eb97086baf7ec8d177c5d"
| author | brinkmanlab |
|---|---|
| date | Mon, 15 Jun 2020 19:32:32 -0400 |
| parents | c8bda09480ae |
| children | f2656e644641 |
comparison
equal
deleted
inserted
replaced
| 2:c8bda09480ae | 3:a2258ce2d58c |
|---|---|
| 27 | 27 |
| 28 format = sys.argv[1] | 28 format = sys.argv[1] |
| 29 ids = defaultdict(int) | 29 ids = defaultdict(int) |
| 30 | 30 |
| 31 def makeUnique(seq): | 31 def makeUnique(seq): |
| 32 newid = seq.id[:16] # Genbank has a max length for the id and sequence length number, truncate the sequence id if too long | 32 seqlenlen = len(str(len(seq))) |
| 33 newid = seq.id[:26 - seqlenlen] # Genbank has a max length for the id and sequence length number, truncate the sequence id if too long | |
| 33 count = ids[newid] | 34 count = ids[newid] |
| 34 ids[newid] += 1 | 35 ids[newid] += 1 |
| 35 if count: | 36 if count: |
| 36 suffix = "_" + str(count) | 37 suffix = "_" + str(count) |
| 37 seqlenlen = len(str(len(seq))) | 38 if len(newid) + len(suffix) + 1 + seqlenlen > 26: |
| 38 if len(newid) + len(suffix) + 1 + seqlenlen > 16: | 39 newid = newid[:25 - seqlenlen - len(suffix)] |
| 39 newid = newid[:16 - seqlenlen - len(suffix)] | |
| 40 | 40 |
| 41 newid += suffix | 41 newid += suffix |
| 42 seq.name += suffix | 42 seq.name += suffix |
| 43 | 43 |
| 44 if seq.id != newid: | 44 if seq.id != newid: |
| 45 print(f"{seq.id}\t{newid}") | 45 print(f"{seq.id}\t{newid}") |
| 46 | 46 |
| 47 seq.id = newid | 47 seq.id = newid |
| 48 return seq | 48 return seq |
| 49 | 49 |
| 50 | |
| 50 paths = iter(sys.argv[2:]) | 51 paths = iter(sys.argv[2:]) |
| 51 | 52 |
| 52 for input, output in zip(paths, paths): | 53 for input, output in zip(paths, paths): |
| 53 SeqIO.write( | 54 SeqIO.write( |
| 54 map(makeUnique, SeqIO.parse(input, format)), | 55 map(makeUnique, SeqIO.parse(input, format)), |
| 55 output, | 56 output, |
| 56 format | 57 format |
| 57 ) | 58 ) |
| 58 | 59 |
| 60 |
