# HG changeset patch # User rhpvorderman # Date 1704444802 0 # Node ID 385dea3c6cb509c2e624207f1103e78e7c17e6e8 # Parent d63eff3575151d978fccef0771fa24978bf8d2cc planemo upload commit 423a48569c69301fdbf893ac3a649128404dfff5 diff -r d63eff357515 -r 385dea3c6cb5 CHANGELOG.md --- a/CHANGELOG.md Mon Mar 27 13:11:53 2023 +0000 +++ b/CHANGELOG.md Fri Jan 05 08:53:22 2024 +0000 @@ -1,3 +1,8 @@ +version 1.9.0 +----------------- ++ Add a no filter region filter. ++ Fix a bug where tar would not open certain filenames. + version 1.8.1 ----------------- + Fix a bug where input files with spaces could not be used. diff -r d63eff357515 -r 385dea3c6cb5 CONTROL_NWK377_PB_IGHC_MID1_40nt_2.txz Binary file CONTROL_NWK377_PB_IGHC_MID1_40nt_2.txz has changed diff -r d63eff357515 -r 385dea3c6cb5 a.out Binary file a.out has changed diff -r d63eff357515 -r 385dea3c6cb5 merge_and_filter.r --- a/merge_and_filter.r Mon Mar 27 13:11:53 2023 +0000 +++ b/merge_and_filter.r Fri Jan 05 08:53:22 2024 +0000 @@ -183,6 +183,7 @@ } else if(empty.region.filter == "FR2"){ result = result[result$CDR2.IMGT.seq != "" & result$FR3.IMGT.seq != "", ] } +# If empty region filter is None, nothing happens. print(paste("After removal sequences that are missing a gene region:", nrow(result))) filtering.steps = rbind(filtering.steps, c("After removal sequences that are missing a gene region", nrow(result))) @@ -219,7 +220,7 @@ clmns = names(result) if(filter.unique == "remove_vjaa"){ result$unique.def = paste(result$VGene, result$JGene, result$CDR3.IMGT.AA) - } else if(empty.region.filter == "leader"){ + } else if(empty.region.filter == "leader" || empty.region.filter == "None"){ result$unique.def = paste(result$FR1.IMGT.seq, result$CDR1.IMGT.seq, result$FR2.IMGT.seq, result$CDR2.IMGT.seq, result$FR3.IMGT.seq, result$CDR3.IMGT.seq) } else if(empty.region.filter == "FR1"){ result$unique.def = paste(result$CDR1.IMGT.seq, result$FR2.IMGT.seq, result$CDR2.IMGT.seq, result$FR3.IMGT.seq, result$CDR3.IMGT.seq) diff -r d63eff357515 -r 385dea3c6cb5 nt_overview.r --- a/nt_overview.r Mon Mar 27 13:11:53 2023 +0000 +++ b/nt_overview.r Fri Jan 05 08:53:22 2024 +0000 @@ -17,7 +17,7 @@ NToverview = merged -if(empty.region.filter == "leader"){ +if(empty.region.filter == "leader" || empty.region.filter == "None"){ NToverview$seq = paste(NToverview$FR1.IMGT.seq, NToverview$CDR1.IMGT.seq, NToverview$FR2.IMGT.seq, NToverview$CDR2.IMGT.seq, NToverview$FR3.IMGT.seq) } else if(empty.region.filter == "FR1"){ NToverview$seq = paste(NToverview$CDR1.IMGT.seq, NToverview$FR2.IMGT.seq, NToverview$CDR2.IMGT.seq, NToverview$FR3.IMGT.seq) diff -r d63eff357515 -r 385dea3c6cb5 sequence_overview.py --- a/sequence_overview.py Mon Mar 27 13:11:53 2023 +0000 +++ b/sequence_overview.py Fri Jan 05 08:53:22 2024 +0000 @@ -101,7 +101,7 @@ sequence_columns = [ "FR1.IMGT.seq", "CDR1.IMGT.seq", "FR2.IMGT.seq", "CDR2.IMGT.seq", "FR3.IMGT.seq", "CDR3.IMGT.seq"] - if empty_region_filter == "leader": + if empty_region_filter == "leader" or empty_region_filter == "None": sequence_columns = sequence_columns elif empty_region_filter == "FR1": sequence_columns = sequence_columns[1:] diff -r d63eff357515 -r 385dea3c6cb5 shm_csr.py --- a/shm_csr.py Mon Mar 27 13:11:53 2023 +0000 +++ b/shm_csr.py Fri Jan 05 08:53:22 2024 +0000 @@ -2,13 +2,20 @@ import logging import sys import os +import traceback import typing from typing import Optional from collections import defaultdict -REGION_FILTERS = ("leader", "FR1", "CDR1", "FR2", "CDR2") +REGION_FILTERS = ("leader", "FR1", "CDR1", "FR2", "CDR2", "None") + +def int_or_zero(value: typing.Any): + try: + return int(value) + except ValueError: + return 0 class Mutation(typing.NamedTuple): """Represent a mutation type as a tuple""" @@ -177,15 +184,15 @@ mutationList += mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"] mutationListByID[ID] = mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"] - fr1Length = int(linesplt[fr1LengthIndex]) - fr2Length = int(linesplt[fr2LengthIndex]) - fr3Length = int(linesplt[fr3LengthIndex]) - cdr1Length = int(linesplt[cdr1LengthIndex]) - cdr2Length = int(linesplt[cdr2LengthIndex]) + fr1Length = int_or_zero(linesplt[fr1LengthIndex]) + fr2Length = int_or_zero(linesplt[fr2LengthIndex]) + fr3Length = int_or_zero(linesplt[fr3LengthIndex]) + cdr1Length = int_or_zero(linesplt[cdr1LengthIndex]) + cdr2Length = int_or_zero(linesplt[cdr2LengthIndex]) LengthDic[ID] = (fr1Length, cdr1Length, fr2Length, cdr2Length, fr3Length) - cdr1AALengthDic[ID] = int(linesplt[cdr1AALengthIndex]) - cdr2AALengthDic[ID] = int(linesplt[cdr2AALengthIndex]) + cdr1AALengthDic[ID] = int_or_zero(linesplt[cdr1AALengthIndex]) + cdr2AALengthDic[ID] = int_or_zero(linesplt[cdr2AALengthIndex]) IDlist += [ID] print("len(mutationdic) =", len(mutationdic)) @@ -222,6 +229,8 @@ # We determine the position to start summing below. # This returns 0 for leader, 1 for FR1 etc. length_start_pos = REGION_FILTERS.index(empty_region_filter) + if empty_region_filter == "None": + length_start_pos = 0 o.write("Sequence.ID\tnumber_of_mutations\tnumber_of_tandems\tregion_length\texpected_tandems\tlongest_tandem\ttandems\n") for ID in IDlist: diff -r d63eff357515 -r 385dea3c6cb5 shm_csr.xml --- a/shm_csr.xml Mon Mar 27 13:11:53 2023 +0000 +++ b/shm_csr.xml Fri Jan 05 08:53:22 2024 +0000 @@ -1,4 +1,4 @@ - + python @@ -24,9 +24,8 @@ FR1: include CDR1,FR2,CDR2,FR3 in filters + diff -r d63eff357515 -r 385dea3c6cb5 show_time_as_float Binary file show_time_as_float has changed diff -r d63eff357515 -r 385dea3c6cb5 tests/__pycache__/test_shm_csr.cpython-37-pytest-6.2.5.pyc Binary file tests/__pycache__/test_shm_csr.cpython-37-pytest-6.2.5.pyc has changed diff -r d63eff357515 -r 385dea3c6cb5 tests/__pycache__/test_shm_csr.cpython-39-pytest-7.2.1.pyc Binary file tests/__pycache__/test_shm_csr.cpython-39-pytest-7.2.1.pyc has changed diff -r d63eff357515 -r 385dea3c6cb5 tests/__pycache__/test_shm_csr.cpython-39-pytest-7.4.4.pyc Binary file tests/__pycache__/test_shm_csr.cpython-39-pytest-7.4.4.pyc has changed diff -r d63eff357515 -r 385dea3c6cb5 tests/data/.~lock.handleiding activeren pas.docx# --- a/tests/data/.~lock.handleiding activeren pas.docx# Mon Mar 27 13:11:53 2023 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1 +0,0 @@ -Vorderman\, R.H.P. (MOLEPI) ,rhpvorderman,sasc-pc-6,21.02.2023 15:01,file:///home/rhpvorderman/.config/libreoffice/4; \ No newline at end of file diff -r d63eff357515 -r 385dea3c6cb5 tests/data/handleiding activeren pas.docx Binary file tests/data/handleiding activeren pas.docx has changed diff -r d63eff357515 -r 385dea3c6cb5 time_ns Binary file time_ns has changed diff -r d63eff357515 -r 385dea3c6cb5 wget-log --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/wget-log Fri Jan 05 08:53:22 2024 +0000 @@ -0,0 +1,15 @@ +--2024-01-03 14:15:48-- https://filesender.surf.nl/download.php?token=e17473dc-a342-4bfc-b1ff-5bc3b5fb8bd8 +Herleiden van filesender.surf.nl (filesender.surf.nl)... 2001:610:188:f001:145:101:124:6, 145.101.124.6 +Verbinding maken met filesender.surf.nl (filesender.surf.nl)|2001:610:188:f001:145:101:124:6|:443... verbonden. +HTTP-verzoek is verzonden; wachten op antwoord... 302 Found +Locatie: /?s=exception&exception=eyJtZXNzYWdlIjoiZG93bmxvYWRfbWlzc2luZ19maWxlc19pZHMiLCJ1aWQiOiI2NTk1NWUwNDI2NmFiIiwiZGV0YWlscyI6bnVsbH0= [volgen...] +--2024-01-03 14:15:48-- https://filesender.surf.nl/?s=exception&exception=eyJtZXNzYWdlIjoiZG93bmxvYWRfbWlzc2luZ19maWxlc19pZHMiLCJ1aWQiOiI2NTk1NWUwNDI2NmFiIiwiZGV0YWlscyI6bnVsbH0= +Verbinding met [filesender.surf.nl]:443 wordt hergebruikt. +HTTP-verzoek is verzonden; wachten op antwoord... 200 OK +Lengte: 5324 (5,2K) [text/html] +Wordt opgeslagen als: ‘/home/rhpvorderman/Downloads/tmp/test.tar.xz’ + + /home/rhpvorderman/Downloads/tmp/test.tar.xz 0%[ ] 0 --.-KB/s /home/rhpvorderman/Downloads/tmp/test.tar.xz 100%[=====================================================================================================================================>] 5,20K --.-KB/s in 0s + +2024-01-03 14:15:48 (414 MB/s) - '‘/home/rhpvorderman/Downloads/tmp/test.tar.xz’' opgeslagen [5324/5324] + diff -r d63eff357515 -r 385dea3c6cb5 wrapper.sh --- a/wrapper.sh Mon Mar 27 13:11:53 2023 +0000 +++ b/wrapper.sh Fri Jan 05 08:53:22 2024 +0000 @@ -23,7 +23,7 @@ class_filter=${19} empty_region_filter=${20} fast=${21} -BASENAME=$(basename "$input") +BASENAME=$(basename "$title") # Cut off .txz or .tgz suffix and also replace spaces with underscores. NEW_IMGT_PREFIX="new_IMGT_${BASENAME%.*}" NEW_IMGT_PREFIX=${NEW_IMGT_PREFIX// /_} @@ -31,7 +31,7 @@ #exec 5> debug_output.txt #BASH_XTRACEFD="5" ## Busybox date does not support '+%s.%N'. So use a custom program. Can be -## Compiled with cc -Os show_time_as_float.c -o show_time_as_float +## Compiled with cc -static -Os show_time_as_float.c -o show_time_as_float #PS4='$(${dir}/show_time_as_float) $LINENO: ' #set -x