repenrich2: RepEnrich2_setup.py comparison

comparison RepEnrich2_setup.py @ 4:c5bb2f9af708 draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 6b3b1194af0de793a1b4892c5973da835f5c0902

author	artbio
date	Sat, 20 Apr 2024 23:23:40 +0000
parents	4905a332a094
children

comparison

equal deleted inserted replaced

-:0efb0ee6a7e9
+:c5bb2f9af708
 #!/usr/bin/env python
 import argparse
 import csv
-import os
 import shlex
 import subprocess
 import sys
 from collections import defaultdict
 from concurrent.futures import ProcessPoolExecutor
 flankingl = args.flankinglength
 annotation_file = args.annotation_file
 genomefasta = args.genomefasta
 cpus = args.cpus
-# check that the programs we need are available
-try:
-subprocess.call(shlex.split("bowtie2 --version"),
-stdout=open(os.devnull, 'wb'),
-stderr=open(os.devnull, 'wb'))
-except OSError:
-print("Error: Bowtie2 not available in the path")
-raise
 def starts_with_numerical(list):
 try:
 if len(list) == 0:
 return False
 return True
 except ValueError:
 return False
-# define a text importer for .out/.txt format of repbase
+# text import function for .out/.txt format of repbase
 def import_text(filename, separator):
 csv.field_size_limit(sys.maxsize)
 file = csv.reader(open(filename), delimiter=separator,
 skipinitialspace=True)
 return [line for line in file if starts_with_numerical(line)]
 # load genome into dictionary and compute length
 g = SeqIO.to_dict(SeqIO.parse(genomefasta, "fasta"))
 genome = defaultdict(dict)
 for chr in g.keys():
-genome[chr]['sequence'] = g[chr].seq
+genome[chr]['sequence'] = str(g[chr].seq)
 genome[chr]['length'] = len(g[chr].seq)
 # Build a bedfile of repeatcoordinates to use by RepEnrich region_sorter
 repeat_elements = set()
 rep_coords = defaultdict(list)  # Merged dictionary for coordinates
 # generate spacer for pseudogenomes
 spacer = ''.join(['N' for i in range(gapl)])
 # generate metagenomes and save them to FASTA files for bowtie build
 for repname in rep_coords:
-metagenome = ''
+genomes_list = []
 # iterating coordinate list by block of 3 (chr, start, end)
 block = 3
 for i in range(0, len(rep_coords[repname]) - block + 1, block):
 batch = rep_coords[repname][i:i+block]
 chromosome = batch[0]
 start = max(int(batch[1]) - flankingl, 0)
 end = min(int(batch[2]) + flankingl,
 int(genome[chromosome]['length'])-1) + 1
-metagenome = (
+genomes_list.append(genome[chromosome]['sequence'][start:end])
-f"{metagenome}{spacer}"
+metagenome = spacer.join(genomes_list)
-f"{genome[chromosome]['sequence'][start:end]}"
-)
 # Create Fasta of repeat pseudogenome
 fastafilename = f"{repname}.fa"
 record = SeqRecord(Seq(metagenome), id=repname, name='', description='')
 SeqIO.write(record, fastafilename, "fasta")

Mercurial > repos > artbio > repenrich2

comparison RepEnrich2_setup.py @ 4:c5bb2f9af708 draft