annotate vcf2hrdetect.py @ 11:5a326a6fa105 draft default tip

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
author artbio
date Fri, 17 Oct 2025 17:21:17 +0000
parents 7dcf61950215
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
11
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
1 #!/usr/bin/env python
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
2 import argparse
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
3 import re
6
ad8853ee9909 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff changeset
4 import sys
ad8853ee9909 "planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy_smoove commit 7d69f5c91e7ec5e252e9728fdfb0f7bcc254bf24"
artbio
parents:
diff changeset
5
11
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
6
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
7 def create_arg_parser():
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
8 """Creates and returns the argument parser."""
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
9 parser = argparse.ArgumentParser(
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
10 description=(
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
11 "Convert a VCF file from lumpy-smoove to a tabular format "
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
12 "compatible with the HRDetect pipeline."
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
13 )
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
14 )
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
15 parser.add_argument(
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
16 'vcf_file',
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
17 help='Path to the input VCF file.'
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
18 )
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
19 parser.add_argument(
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
20 'output_file',
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
21 help='Path to the output tabular file.'
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
22 )
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
23 return parser
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
24
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
25
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
26 def parse_breakend_alt(alt_field):
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
27 """
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
28 Parses the ALT field for a breakend and returns chromosome and position.
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
29
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
30 Args:
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
31 alt_field (str): The ALT field (column 5) of a VCF line.
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
32
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
33 Returns:
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
34 tuple: A tuple containing (chromosome, position) or (None, None)
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
35 if parsing fails.
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
36 """
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
37 # Search for patterns ]chr:pos] or [chr:pos[
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
38 pattern = (
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
39 r"\](?P<chrom1>[^:]+):(?P<pos1>\d+)\]|"
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
40 r"\[(?P<chrom2>[^:]+):(?P<pos2>\d+)\["
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
41 )
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
42 match = re.search(pattern, alt_field)
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
43
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
44 if not match:
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
45 return None, None
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
46
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
47 groups = match.groupdict()
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
48 chrom = groups['chrom1'] or groups['chrom2']
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
49 pos = groups['pos1'] or groups['pos2']
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
50 return chrom, pos
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
51
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
52
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
53 def process_vcf(vcf_path, output_path):
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
54 """
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
55 Reads a VCF file, converts it, and writes the result to a tabular file.
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
56
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
57 Args:
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
58 vcf_path (str): Path to the input VCF file.
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
59 output_path (str): Path to the output tabular file.
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
60 """
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
61 header = ["chr1", "pos1", "chr2", "pos2", "type"]
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
62 try:
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
63 with open(vcf_path, 'r') as infile, open(output_path, 'w') as outfile:
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
64 outfile.write("\t".join(header) + "\n")
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
65
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
66 for line in infile:
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
67 if line.startswith('#'):
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
68 continue
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
69
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
70 fields = line.strip().split('\t')
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
71 if len(fields) < 8:
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
72 continue
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
73
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
74 chrom1 = fields[0]
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
75 pos1 = fields[1]
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
76 info = fields[7]
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
77
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
78 # Attempt to extract the structural variant type from the info
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
79 svtype_match = re.search(r'SVTYPE=([^;]+)', info)
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
80 if not svtype_match:
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
81 continue # Skip lines without SVTYPE tag
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
82 svtype = svtype_match.group(1)
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
83
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
84 if svtype == "BND": # Breakend (INV or TRA)
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
85 alt_field = fields[4]
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
86 chrom2, pos2 = parse_breakend_alt(alt_field)
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
87 if not (chrom2 and pos2):
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
88 continue
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
89 event_type = "INV" if chrom1 == chrom2 else "TRA"
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
90 row = [chrom1, pos1, chrom2, pos2, event_type]
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
91 outfile.write("\t".join(row) + "\n")
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
92
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
93 else: # Other SV types (DEL, DUP, etc.)
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
94 end_match = re.search(r'END=([^;]+)', info)
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
95 if not end_match:
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
96 continue
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
97 pos2 = end_match.group(1)
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
98 chrom2 = chrom1
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
99 row = [chrom1, pos1, chrom2, pos2, svtype]
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
100 outfile.write("\t".join(row) + "\n")
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
101
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
102 except FileNotFoundError:
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
103 print(f"Error: File '{vcf_path}' not found.",
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
104 file=sys.stderr)
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
105 sys.exit(1)
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
106 except IOError as e:
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
107 print(f"IO Error: {e}", file=sys.stderr)
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
108 sys.exit(1)
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
109
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
110
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
111 def main():
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
112 """Main function of the script."""
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
113 parser = create_arg_parser()
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
114 args = parser.parse_args()
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
115 process_vcf(args.vcf_file, args.output_file)
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
116
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
117
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
118 if __name__ == '__main__':
5a326a6fa105 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/lumpy_smoove commit 8b10e8fc832f8ca7c32479e20d5edbd62088a3aa
artbio
parents: 9
diff changeset
119 main()