changeset 96:385dea3c6cb5 draft

planemo upload commit 423a48569c69301fdbf893ac3a649128404dfff5
author rhpvorderman
date Fri, 05 Jan 2024 08:53:22 +0000
parents d63eff357515
children fbc6307dd83b
files CHANGELOG.md CONTROL_NWK377_PB_IGHC_MID1_40nt_2.txz a.out merge_and_filter.r nt_overview.r sequence_overview.py shm_csr.py shm_csr.xml show_time_as_float tests/__pycache__/test_shm_csr.cpython-37-pytest-6.2.5.pyc tests/__pycache__/test_shm_csr.cpython-39-pytest-7.2.1.pyc tests/__pycache__/test_shm_csr.cpython-39-pytest-7.4.4.pyc tests/data/.~lock.handleiding activeren pas.docx# tests/data/handleiding activeren pas.docx time_ns wget-log wrapper.sh
diffstat 17 files changed, 46 insertions(+), 17 deletions(-) [+]
line wrap: on
line diff
--- a/CHANGELOG.md	Mon Mar 27 13:11:53 2023 +0000
+++ b/CHANGELOG.md	Fri Jan 05 08:53:22 2024 +0000
@@ -1,3 +1,8 @@
+version 1.9.0
+-----------------
++ Add a no filter region filter. 
++ Fix a bug where tar would not open certain filenames.
+
 version 1.8.1
 -----------------
 + Fix a bug where input files with spaces could not be used.
Binary file CONTROL_NWK377_PB_IGHC_MID1_40nt_2.txz has changed
Binary file a.out has changed
--- a/merge_and_filter.r	Mon Mar 27 13:11:53 2023 +0000
+++ b/merge_and_filter.r	Fri Jan 05 08:53:22 2024 +0000
@@ -183,6 +183,7 @@
 } else if(empty.region.filter == "FR2"){
 	result = result[result$CDR2.IMGT.seq != "" & result$FR3.IMGT.seq != "", ]
 }
+# If empty region filter is None, nothing happens.
 
 print(paste("After removal sequences that are missing a gene region:", nrow(result)))
 filtering.steps = rbind(filtering.steps, c("After removal sequences that are missing a gene region", nrow(result)))
@@ -219,7 +220,7 @@
 	clmns = names(result)
 	if(filter.unique == "remove_vjaa"){
 		result$unique.def = paste(result$VGene, result$JGene, result$CDR3.IMGT.AA)
-	} else if(empty.region.filter == "leader"){
+	} else if(empty.region.filter == "leader" || empty.region.filter == "None"){
 		result$unique.def = paste(result$FR1.IMGT.seq, result$CDR1.IMGT.seq, result$FR2.IMGT.seq, result$CDR2.IMGT.seq, result$FR3.IMGT.seq, result$CDR3.IMGT.seq)
 	} else if(empty.region.filter == "FR1"){
 		result$unique.def = paste(result$CDR1.IMGT.seq, result$FR2.IMGT.seq, result$CDR2.IMGT.seq, result$FR3.IMGT.seq, result$CDR3.IMGT.seq)
--- a/nt_overview.r	Mon Mar 27 13:11:53 2023 +0000
+++ b/nt_overview.r	Fri Jan 05 08:53:22 2024 +0000
@@ -17,7 +17,7 @@
 
 NToverview = merged
 
-if(empty.region.filter == "leader"){
+if(empty.region.filter == "leader" || empty.region.filter == "None"){
 	NToverview$seq = paste(NToverview$FR1.IMGT.seq, NToverview$CDR1.IMGT.seq, NToverview$FR2.IMGT.seq, NToverview$CDR2.IMGT.seq, NToverview$FR3.IMGT.seq)
 } else if(empty.region.filter == "FR1"){
 	NToverview$seq = paste(NToverview$CDR1.IMGT.seq, NToverview$FR2.IMGT.seq, NToverview$CDR2.IMGT.seq, NToverview$FR3.IMGT.seq)
--- a/sequence_overview.py	Mon Mar 27 13:11:53 2023 +0000
+++ b/sequence_overview.py	Fri Jan 05 08:53:22 2024 +0000
@@ -101,7 +101,7 @@
     sequence_columns = [
         "FR1.IMGT.seq", "CDR1.IMGT.seq", "FR2.IMGT.seq", "CDR2.IMGT.seq",
         "FR3.IMGT.seq", "CDR3.IMGT.seq"]
-    if empty_region_filter == "leader":
+    if empty_region_filter == "leader" or empty_region_filter == "None":
         sequence_columns = sequence_columns
     elif empty_region_filter == "FR1":
         sequence_columns = sequence_columns[1:]
--- a/shm_csr.py	Mon Mar 27 13:11:53 2023 +0000
+++ b/shm_csr.py	Fri Jan 05 08:53:22 2024 +0000
@@ -2,13 +2,20 @@
 import logging
 import sys
 import os
+import traceback
 import typing
 from typing import Optional
 
 from collections import defaultdict
 
-REGION_FILTERS = ("leader", "FR1", "CDR1", "FR2", "CDR2")
+REGION_FILTERS = ("leader", "FR1", "CDR1", "FR2", "CDR2", "None")
+
 
+def int_or_zero(value: typing.Any):
+	try:
+		return int(value)
+	except ValueError:
+		return 0
 
 class Mutation(typing.NamedTuple):
 	"""Represent a mutation type as a tuple"""
@@ -177,15 +184,15 @@
 			mutationList += mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"]
 			mutationListByID[ID] = mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"]
 
-			fr1Length = int(linesplt[fr1LengthIndex])
-			fr2Length = int(linesplt[fr2LengthIndex])
-			fr3Length = int(linesplt[fr3LengthIndex])
-			cdr1Length = int(linesplt[cdr1LengthIndex])
-			cdr2Length = int(linesplt[cdr2LengthIndex])
+			fr1Length = int_or_zero(linesplt[fr1LengthIndex])
+			fr2Length = int_or_zero(linesplt[fr2LengthIndex])
+			fr3Length = int_or_zero(linesplt[fr3LengthIndex])
+			cdr1Length = int_or_zero(linesplt[cdr1LengthIndex])
+			cdr2Length = int_or_zero(linesplt[cdr2LengthIndex])
 			LengthDic[ID] = (fr1Length, cdr1Length, fr2Length, cdr2Length, fr3Length)
 
-			cdr1AALengthDic[ID] = int(linesplt[cdr1AALengthIndex])
-			cdr2AALengthDic[ID] = int(linesplt[cdr2AALengthIndex])
+			cdr1AALengthDic[ID] = int_or_zero(linesplt[cdr1AALengthIndex])
+			cdr2AALengthDic[ID] = int_or_zero(linesplt[cdr2AALengthIndex])
 
 			IDlist += [ID]
 	print("len(mutationdic) =", len(mutationdic))
@@ -222,6 +229,8 @@
 		# We determine the position to start summing below.
 		# This returns 0 for leader, 1 for FR1 etc.
 		length_start_pos = REGION_FILTERS.index(empty_region_filter)
+		if empty_region_filter == "None":
+			length_start_pos = 0
 
 		o.write("Sequence.ID\tnumber_of_mutations\tnumber_of_tandems\tregion_length\texpected_tandems\tlongest_tandem\ttandems\n")
 		for ID in IDlist:
--- a/shm_csr.xml	Mon Mar 27 13:11:53 2023 +0000
+++ b/shm_csr.xml	Fri Jan 05 08:53:22 2024 +0000
@@ -1,4 +1,4 @@
-<tool id="shm_csr" name="SHM &amp; CSR pipeline" version="1.8.1" profile="16.04">
+<tool id="shm_csr" name="SHM &amp; CSR pipeline" version="1.9.0" profile="16.04">
 	<description></description>
 	<requirements>
 		<requirement type="package" version="3.7.1">python</requirement>
@@ -24,9 +24,8 @@
 		<![CDATA[
 		#import os
 		#set $input=os.path.basename($in_file.name)
-		ln -s "$in_file" "$input" &&
 		#if str ( $filter_unique.filter_unique_select ) == "remove":
-			$__tool_directory__/wrapper.sh "$input"
+			$__tool_directory__/wrapper.sh "$in_file"
 			custom $out_file $out_file.files_path
 			"$input" "-" $functionality $unique
 			$naive_output_cond.naive_output $naive_output_ca $naive_output_cg
@@ -60,6 +59,7 @@
 			<option value="FR1" selected="true">FR1: include CDR1,FR2,CDR2,FR3 in filters</option>
 			<option value="CDR1">CDR1: include FR2,CDR2,FR3 in filters</option>
 			<option value="FR2">FR2: include CDR2,FR3 in filters</option>
+			<option value="None">No filter: sequences with mission regions are not filtered.</option>
 		</param>
 		<param name="functionality" type="select" label="Functionality filter" help="" >
 			<option value="productive" selected="true">Productive (Productive and Productive see comment)</option>
Binary file show_time_as_float has changed
Binary file tests/__pycache__/test_shm_csr.cpython-37-pytest-6.2.5.pyc has changed
Binary file tests/__pycache__/test_shm_csr.cpython-39-pytest-7.2.1.pyc has changed
Binary file tests/__pycache__/test_shm_csr.cpython-39-pytest-7.4.4.pyc has changed
--- a/tests/data/.~lock.handleiding activeren pas.docx#	Mon Mar 27 13:11:53 2023 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1 +0,0 @@
-Vorderman\, R.H.P. (MOLEPI) ,rhpvorderman,sasc-pc-6,21.02.2023 15:01,file:///home/rhpvorderman/.config/libreoffice/4;
\ No newline at end of file
Binary file tests/data/handleiding activeren pas.docx has changed
Binary file time_ns has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/wget-log	Fri Jan 05 08:53:22 2024 +0000
@@ -0,0 +1,15 @@
+--2024-01-03 14:15:48--  https://filesender.surf.nl/download.php?token=e17473dc-a342-4bfc-b1ff-5bc3b5fb8bd8
+Herleiden van filesender.surf.nl (filesender.surf.nl)... 2001:610:188:f001:145:101:124:6, 145.101.124.6
+Verbinding maken met filesender.surf.nl (filesender.surf.nl)|2001:610:188:f001:145:101:124:6|:443... verbonden.
+HTTP-verzoek is verzonden; wachten op antwoord... 302 Found
+Locatie: /?s=exception&exception=eyJtZXNzYWdlIjoiZG93bmxvYWRfbWlzc2luZ19maWxlc19pZHMiLCJ1aWQiOiI2NTk1NWUwNDI2NmFiIiwiZGV0YWlscyI6bnVsbH0= [volgen...]
+--2024-01-03 14:15:48--  https://filesender.surf.nl/?s=exception&exception=eyJtZXNzYWdlIjoiZG93bmxvYWRfbWlzc2luZ19maWxlc19pZHMiLCJ1aWQiOiI2NTk1NWUwNDI2NmFiIiwiZGV0YWlscyI6bnVsbH0=
+Verbinding met [filesender.surf.nl]:443 wordt hergebruikt.
+HTTP-verzoek is verzonden; wachten op antwoord... 200 OK
+Lengte: 5324 (5,2K) [text/html]
+Wordt opgeslagen als: ‘/home/rhpvorderman/Downloads/tmp/test.tar.xz’
+
+
/home/rhpvorderman/Downloads/tmp/test.tar.xz                0%[                                                                                                                                      ]       0  --.-KB/s               
/home/rhpvorderman/Downloads/tmp/test.tar.xz              100%[=====================================================================================================================================>]   5,20K  --.-KB/s    in 0s      
+
+2024-01-03 14:15:48 (414 MB/s) - '‘/home/rhpvorderman/Downloads/tmp/test.tar.xz’' opgeslagen [5324/5324]
+
--- a/wrapper.sh	Mon Mar 27 13:11:53 2023 +0000
+++ b/wrapper.sh	Fri Jan 05 08:53:22 2024 +0000
@@ -23,7 +23,7 @@
 class_filter=${19}
 empty_region_filter=${20}
 fast=${21}
-BASENAME=$(basename "$input")
+BASENAME=$(basename "$title")
 # Cut off .txz or .tgz suffix and also replace spaces with underscores.
 NEW_IMGT_PREFIX="new_IMGT_${BASENAME%.*}"
 NEW_IMGT_PREFIX=${NEW_IMGT_PREFIX// /_}
@@ -31,7 +31,7 @@
 #exec 5> debug_output.txt
 #BASH_XTRACEFD="5"
 ## Busybox date does not support '+%s.%N'. So use a custom program. Can be
-## Compiled with cc -Os show_time_as_float.c -o show_time_as_float
+## Compiled with cc -static -Os show_time_as_float.c -o show_time_as_float
 #PS4='$(${dir}/show_time_as_float) $LINENO: '
 #set -x