Mercurial > repos > artbio > repenrich2

diff RepEnrich2_setup.py @ 4:c5bb2f9af708 draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/repenrich2 commit 6b3b1194af0de793a1b4892c5973da835f5c0902
author: artbio
date: Sat, 20 Apr 2024 23:23:40 +0000
parents: 4905a332a094
--- a/RepEnrich2_setup.py	Sat Apr 20 15:45:33 2024 +0000
+++ b/RepEnrich2_setup.py	Sat Apr 20 23:23:40 2024 +0000
@@ -1,7 +1,6 @@
 #!/usr/bin/env python
 import argparse
 import csv
-import os
 import shlex
 import subprocess
 import sys
@@ -48,15 +47,6 @@
 genomefasta = args.genomefasta
 cpus = args.cpus
 
-# check that the programs we need are available
-try:
-    subprocess.call(shlex.split("bowtie2 --version"),
-                    stdout=open(os.devnull, 'wb'),
-                    stderr=open(os.devnull, 'wb'))
-except OSError:
-    print("Error: Bowtie2 not available in the path")
-    raise
-
 
 def starts_with_numerical(list):
     try:
@@ -68,7 +58,7 @@
         return False
 
 
-# define a text importer for .out/.txt format of repbase
+# text import function for .out/.txt format of repbase
 def import_text(filename, separator):
     csv.field_size_limit(sys.maxsize)
     file = csv.reader(open(filename), delimiter=separator,
@@ -81,7 +71,7 @@
 genome = defaultdict(dict)
 
 for chr in g.keys():
-    genome[chr]['sequence'] = g[chr].seq
+    genome[chr]['sequence'] = str(g[chr].seq)
     genome[chr]['length'] = len(g[chr].seq)
 
 # Build a bedfile of repeatcoordinates to use by RepEnrich region_sorter
@@ -110,7 +100,7 @@
 
 # generate metagenomes and save them to FASTA files for bowtie build
 for repname in rep_coords:
-    metagenome = ''
+    genomes_list = []
     # iterating coordinate list by block of 3 (chr, start, end)
     block = 3
     for i in range(0, len(rep_coords[repname]) - block + 1, block):
@@ -119,11 +109,8 @@
         start = max(int(batch[1]) - flankingl, 0)
         end = min(int(batch[2]) + flankingl,
                   int(genome[chromosome]['length'])-1) + 1
-        metagenome = (
-            f"{metagenome}{spacer}"
-            f"{genome[chromosome]['sequence'][start:end]}"
-            )
-
+        genomes_list.append(genome[chromosome]['sequence'][start:end])
+    metagenome = spacer.join(genomes_list)
     # Create Fasta of repeat pseudogenome
     fastafilename = f"{repname}.fa"
     record = SeqRecord(Seq(metagenome), id=repname, name='', description='')
author	artbio
date	Sat, 20 Apr 2024 23:23:40 +0000
parents	4905a332a094
children