Mercurial > repos > yating-l > rename_tracks
comparison rename_tracks.py @ 0:8b19a698d90e draft
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
| author | yating-l |
|---|---|
| date | Mon, 25 Jun 2018 15:01:41 -0400 |
| parents | |
| children | 72ccb2f848a0 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:8b19a698d90e |
|---|---|
| 1 # -*- coding: utf8 -*- | |
| 2 | |
| 3 """ | |
| 4 Rename the custom evidence tracks so that the tracks use the same sequence names as the renamed reference | |
| 5 """ | |
| 6 import sys | |
| 7 import csv | |
| 8 import subprocess | |
| 9 import tempfile | |
| 10 | |
| 11 def rename_interval(inputFile, nameDict, renamedFile): | |
| 12 writer = open(renamedFile, 'w') | |
| 13 with open(inputFile, 'r') as f: | |
| 14 lines = f.readlines() | |
| 15 for l in lines: | |
| 16 if not l.startswith("#"): | |
| 17 scaffold_name = l.split()[0] | |
| 18 if scaffold_name in nameDict: | |
| 19 l = l.replace(scaffold_name, nameDict[scaffold_name]) | |
| 20 writer.write(l) | |
| 21 writer.close() | |
| 22 | |
| 23 def rename_bam(inputFile, nameDict, renamedFile): | |
| 24 header = subprocess.Popen(['samtools', 'view', '-H', inputFile], stdout=subprocess.PIPE) | |
| 25 array_call = ['sed'] | |
| 26 for k,v in nameDict.items(): | |
| 27 substitute = "s/%s/%s/" % (str(k), str(v)) | |
| 28 array_call.append('-e') | |
| 29 array_call.append(substitute) | |
| 30 reheader = subprocess.Popen(array_call, stdin=header.stdout, stdout=subprocess.PIPE) | |
| 31 out = open(renamedFile, 'w') | |
| 32 subprocess.Popen(['samtools', 'reheader', '-', inputFile], stdin=reheader.stdout, stdout=out) | |
| 33 | |
| 34 def rename_bigwig(inputFile, renamedReference, nameDict, renamedFile): | |
| 35 bedGraphFile = tempfile.NamedTemporaryFile(bufsize=0) | |
| 36 chrom_sizes = tempfile.NamedTemporaryFile(bufsize=0) | |
| 37 sorted_bedGraphFile = tempfile.NamedTemporaryFile(bufsize=0) | |
| 38 renamed_sorted_bedGraphFile = tempfile.NamedTemporaryFile(bufsize=0) | |
| 39 | |
| 40 subprocess.call(['bigWigToBedGraph', inputFile, bedGraphFile.name]) | |
| 41 subprocess.call(['faSize', '-detailed', '-tab', renamedReference], stdout=chrom_sizes) | |
| 42 subprocess.call(['sort', '-k1,1', '-k2,2n', bedGraphFile.name], stdout=sorted_bedGraphFile) | |
| 43 rename_interval(sorted_bedGraphFile.name, nameDict, renamed_sorted_bedGraphFile.name) | |
| 44 subprocess.call(['bedGraphToBigWig', renamed_sorted_bedGraphFile.name, chrom_sizes.name, renamedFile]) | |
| 45 | |
| 46 def getNameDict(nameMapping): | |
| 47 nameDict = {} | |
| 48 with open(nameMapping, 'r') as f: | |
| 49 reader = csv.reader(f) | |
| 50 for row in reader: | |
| 51 nameDict[row[0]] = row[1] | |
| 52 return nameDict | |
| 53 | |
| 54 def main(): | |
| 55 inputFile = sys.argv[1] | |
| 56 nameMapping = sys.argv[2] | |
| 57 inputFormat = sys.argv[3] | |
| 58 outputfile = sys.argv[4] | |
| 59 nameDict = getNameDict(nameMapping) | |
| 60 if inputFormat == "bed" or inputFormat == "gff3" or inputFormat == "gtf": | |
| 61 rename_interval(inputFile, nameDict, outputfile) | |
| 62 elif inputFormat == "bam": | |
| 63 rename_bam(inputFile, nameDict, outputfile) | |
| 64 elif inputFormat == "bigwig": | |
| 65 renamedReference = sys.argv[5] | |
| 66 rename_bigwig(inputFile, renamedReference, nameDict, outputfile) | |
| 67 | |
| 68 if __name__ == "__main__": | |
| 69 main() |
