annotate createMissingFiles.py @ 1:7f49f3aa3861 draft

planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 6b8463ba27d0c91b736d579b0891632d4c032402
author iuc
date Wed, 22 Jan 2025 14:10:45 +0000
parents a9a78a68d6c8
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
a9a78a68d6c8 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff changeset
1 import glob
a9a78a68d6c8 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff changeset
2 import os
a9a78a68d6c8 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff changeset
3 import subprocess
a9a78a68d6c8 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff changeset
4
a9a78a68d6c8 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff changeset
5 dirPrefix = "resultDir/"
1
7f49f3aa3861 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 6b8463ba27d0c91b736d579b0891632d4c032402
iuc
parents: 0
diff changeset
6 expectedSegments = {"A_MP": 7, "A_NP": 5, "A_HA": 4, "A_PB1": 2,
7f49f3aa3861 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 6b8463ba27d0c91b736d579b0891632d4c032402
iuc
parents: 0
diff changeset
7 "A_PB2": 1, "A_NA": 6, "A_PA": 3, "A_NS": 8}
0
a9a78a68d6c8 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff changeset
8
a9a78a68d6c8 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff changeset
9
a9a78a68d6c8 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff changeset
10 def renameSubtypeFiles(identifier):
a9a78a68d6c8 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff changeset
11 files = glob.glob(dirPrefix + "A_" + identifier + "_*.*")
a9a78a68d6c8 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff changeset
12 for file in files:
a9a78a68d6c8 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff changeset
13 ext = file.split('.')[-1]
a9a78a68d6c8 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff changeset
14 os.rename(file, dirPrefix + "A_" + identifier + "." + ext)
a9a78a68d6c8 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff changeset
15
a9a78a68d6c8 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff changeset
16
a9a78a68d6c8 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff changeset
17 def getMissingSegments():
a9a78a68d6c8 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff changeset
18 presentSegments = []
a9a78a68d6c8 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff changeset
19 for file in os.listdir(dirPrefix):
a9a78a68d6c8 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff changeset
20 if file.endswith(".fasta"):
a9a78a68d6c8 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff changeset
21 presentSegments.append(file.split('.')[0])
1
7f49f3aa3861 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 6b8463ba27d0c91b736d579b0891632d4c032402
iuc
parents: 0
diff changeset
22 return [segment for segment in expectedSegments.keys()
0
a9a78a68d6c8 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff changeset
23 if segment not in presentSegments]
a9a78a68d6c8 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff changeset
24
a9a78a68d6c8 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff changeset
25
a9a78a68d6c8 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff changeset
26 def getBamHeaderFromAnyFile():
a9a78a68d6c8 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff changeset
27 anyBamFile = glob.glob(dirPrefix + "*.bam")[0]
a9a78a68d6c8 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff changeset
28 samtoolsCmd = ["samtools", "view", "-H", anyBamFile]
a9a78a68d6c8 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff changeset
29 result = subprocess.check_output(samtoolsCmd, text=True)
a9a78a68d6c8 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff changeset
30 return result.split('\n')[0]
a9a78a68d6c8 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff changeset
31
a9a78a68d6c8 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff changeset
32
a9a78a68d6c8 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff changeset
33 def getVcfHeaderFromAnyFile():
a9a78a68d6c8 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff changeset
34 with open(glob.glob(dirPrefix + "*.vcf")[0]) as f:
a9a78a68d6c8 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff changeset
35 anyVersionAndDateLines = f.readline() + f.readline()
a9a78a68d6c8 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff changeset
36 emptyHeaderLine = "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO"
a9a78a68d6c8 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff changeset
37 return anyVersionAndDateLines + emptyHeaderLine
a9a78a68d6c8 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff changeset
38
a9a78a68d6c8 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff changeset
39
a9a78a68d6c8 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff changeset
40 def writeEmptyBam(identifier, bamHeader):
a9a78a68d6c8 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff changeset
41 with open("headerSamFile.sam", "w") as f:
a9a78a68d6c8 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff changeset
42 f.write(bamHeader) # write header to a temporary sam file
a9a78a68d6c8 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff changeset
43 cmd = ['samtools', 'view', '-H', '-b', 'headerSamFile.sam']
a9a78a68d6c8 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff changeset
44 targetBam = dirPrefix + identifier + ".bam"
a9a78a68d6c8 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff changeset
45 with open(targetBam, "xb") as tB:
a9a78a68d6c8 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff changeset
46 subprocess.check_call(cmd, stdout=tB)
a9a78a68d6c8 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff changeset
47 os.remove("headerSamFile.sam")
a9a78a68d6c8 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff changeset
48
a9a78a68d6c8 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff changeset
49
a9a78a68d6c8 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff changeset
50 def writeEmptyFasta(identifier):
a9a78a68d6c8 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff changeset
51 open(dirPrefix + identifier + ".fasta", 'x').close()
a9a78a68d6c8 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff changeset
52
a9a78a68d6c8 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff changeset
53
a9a78a68d6c8 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff changeset
54 def writeEmptyVcf(identifier, vcfHeader):
a9a78a68d6c8 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff changeset
55 with open(dirPrefix + identifier + ".vcf", 'x') as f:
a9a78a68d6c8 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff changeset
56 f.write(vcfHeader)
a9a78a68d6c8 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff changeset
57
a9a78a68d6c8 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff changeset
58
1
7f49f3aa3861 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 6b8463ba27d0c91b736d579b0891632d4c032402
iuc
parents: 0
diff changeset
59 def writeEmptyAmendedFasta(identifier):
7f49f3aa3861 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 6b8463ba27d0c91b736d579b0891632d4c032402
iuc
parents: 0
diff changeset
60 # irma names these files like: resultDir/amended_consensus/resultDir_<segNr>.fa
7f49f3aa3861 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 6b8463ba27d0c91b736d579b0891632d4c032402
iuc
parents: 0
diff changeset
61 open(dirPrefix + "amended_consensus/resultDir_" + str(expectedSegments[identifier]) + ".fa", 'x').close()
7f49f3aa3861 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 6b8463ba27d0c91b736d579b0891632d4c032402
iuc
parents: 0
diff changeset
62
7f49f3aa3861 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 6b8463ba27d0c91b736d579b0891632d4c032402
iuc
parents: 0
diff changeset
63
0
a9a78a68d6c8 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff changeset
64 def samtoolsSortAllBam():
1
7f49f3aa3861 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 6b8463ba27d0c91b736d579b0891632d4c032402
iuc
parents: 0
diff changeset
65 for segment in expectedSegments.keys():
0
a9a78a68d6c8 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff changeset
66 os.rename(dirPrefix + segment + ".bam",
a9a78a68d6c8 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff changeset
67 dirPrefix + segment + "_unsorted.bam")
a9a78a68d6c8 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff changeset
68 cmd = ['samtools', 'sort', dirPrefix + segment + "_unsorted.bam"]
a9a78a68d6c8 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff changeset
69 targetBam = dirPrefix + segment + ".bam"
a9a78a68d6c8 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff changeset
70 with open(targetBam, "w") as tB:
a9a78a68d6c8 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff changeset
71 subprocess.check_call(cmd, stdout=tB, text=True)
a9a78a68d6c8 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff changeset
72 os.remove(dirPrefix + segment + "_unsorted.bam")
a9a78a68d6c8 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff changeset
73
a9a78a68d6c8 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff changeset
74
a9a78a68d6c8 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff changeset
75 if __name__ == "__main__":
a9a78a68d6c8 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff changeset
76 renameSubtypeFiles("HA")
a9a78a68d6c8 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff changeset
77 renameSubtypeFiles("NA")
a9a78a68d6c8 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff changeset
78 bamHeader = getBamHeaderFromAnyFile()
a9a78a68d6c8 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff changeset
79 vcfHeader = getVcfHeaderFromAnyFile()
a9a78a68d6c8 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff changeset
80 for segment in getMissingSegments():
a9a78a68d6c8 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff changeset
81 writeEmptyBam(segment, bamHeader)
a9a78a68d6c8 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff changeset
82 writeEmptyFasta(segment)
a9a78a68d6c8 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff changeset
83 writeEmptyVcf(segment, vcfHeader)
1
7f49f3aa3861 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 6b8463ba27d0c91b736d579b0891632d4c032402
iuc
parents: 0
diff changeset
84 writeEmptyAmendedFasta(segment)
0
a9a78a68d6c8 planemo upload for repository https://github.com/aaronKol/tools-iuc/tree/main/tools/irma commit 0ee665c3393af083833fdb9becbe6965d009e16c
iuc
parents:
diff changeset
85 samtoolsSortAllBam()