Mercurial > repos > artbio > repenrich2
diff RepEnrich2_setup.py @ 4:c5bb2f9af708 draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 6b3b1194af0de793a1b4892c5973da835f5c0902
author | artbio |
---|---|
date | Sat, 20 Apr 2024 23:23:40 +0000 |
parents | 4905a332a094 |
children |
line wrap: on
line diff
--- a/RepEnrich2_setup.py Sat Apr 20 15:45:33 2024 +0000 +++ b/RepEnrich2_setup.py Sat Apr 20 23:23:40 2024 +0000 @@ -1,7 +1,6 @@ #!/usr/bin/env python import argparse import csv -import os import shlex import subprocess import sys @@ -48,15 +47,6 @@ genomefasta = args.genomefasta cpus = args.cpus -# check that the programs we need are available -try: - subprocess.call(shlex.split("bowtie2 --version"), - stdout=open(os.devnull, 'wb'), - stderr=open(os.devnull, 'wb')) -except OSError: - print("Error: Bowtie2 not available in the path") - raise - def starts_with_numerical(list): try: @@ -68,7 +58,7 @@ return False -# define a text importer for .out/.txt format of repbase +# text import function for .out/.txt format of repbase def import_text(filename, separator): csv.field_size_limit(sys.maxsize) file = csv.reader(open(filename), delimiter=separator, @@ -81,7 +71,7 @@ genome = defaultdict(dict) for chr in g.keys(): - genome[chr]['sequence'] = g[chr].seq + genome[chr]['sequence'] = str(g[chr].seq) genome[chr]['length'] = len(g[chr].seq) # Build a bedfile of repeatcoordinates to use by RepEnrich region_sorter @@ -110,7 +100,7 @@ # generate metagenomes and save them to FASTA files for bowtie build for repname in rep_coords: - metagenome = '' + genomes_list = [] # iterating coordinate list by block of 3 (chr, start, end) block = 3 for i in range(0, len(rep_coords[repname]) - block + 1, block): @@ -119,11 +109,8 @@ start = max(int(batch[1]) - flankingl, 0) end = min(int(batch[2]) + flankingl, int(genome[chromosome]['length'])-1) + 1 - metagenome = ( - f"{metagenome}{spacer}" - f"{genome[chromosome]['sequence'][start:end]}" - ) - + genomes_list.append(genome[chromosome]['sequence'][start:end]) + metagenome = spacer.join(genomes_list) # Create Fasta of repeat pseudogenome fastafilename = f"{repname}.fa" record = SeqRecord(Seq(metagenome), id=repname, name='', description='')