changeset 6:a73c48890bde draft

Version v0.02.04.5: handle large output files
author pjbriggs
date Tue, 06 Jun 2017 08:54:49 -0400
parents 8159dab5dbdb
children 5e133b7b79a6
files README.rst pal_finder_wrapper.sh pal_finder_wrapper.xml
diffstat 3 files changed, 28 insertions(+), 4 deletions(-) [+]
line wrap: on
line diff
--- a/README.rst	Tue Apr 12 05:53:41 2016 -0400
+++ b/README.rst	Tue Jun 06 08:54:49 2017 -0400
@@ -60,6 +60,11 @@
 ========== ======================================================================
 Version    Changes
 ---------- ----------------------------------------------------------------------
+
+0.02.04.5  - Update to handle large output files which can sometimes be generated
+             by the ``pal_finder_v0.02.04.pl`` or ``pal_filter.py`` scripts (logs
+             of hundreds of Gb's have been observed in production): log files
+             longer than 500 lines are now truncated to avoid downstream problems. 
 0.02.04.4  - Update to the filter script (``pal_filter.py``) which removes some
              columns from the output assembly file.
 0.02.04.3  - Update to the Illumina filtering script from Graeme Fox (including
--- a/pal_finder_wrapper.sh	Tue Apr 12 05:53:41 2016 -0400
+++ b/pal_finder_wrapper.sh	Tue Jun 06 08:54:49 2017 -0400
@@ -50,6 +50,9 @@
 echo "### $(basename $0) ###"
 echo $*
 #
+# Maximum size reporting log file contents
+MAX_LINES=500
+#
 # Initialise locations of scripts, data and executables
 #
 # Set these in the environment to overide at execution time
@@ -313,8 +316,16 @@
 #
 # Run pal_finder
 echo "### Running pal_finder ###"
-perl $PALFINDER_SCRIPT_DIR/pal_finder_v0.02.04.pl config.txt 2>&1 | tee pal_finder.log
-echo "### pal_finder finised ###"
+perl $PALFINDER_SCRIPT_DIR/pal_finder_v0.02.04.pl config.txt 1>pal_finder.log 2>&1
+echo "### pal_finder finished ###"
+#
+# Handlers the pal_finder log file
+echo "### Output from pal_finder ###"
+if [ $(wc -l pal_finder.log | cut -d" " -f1) -gt $MAX_LINES ] ; then
+    echo WARNING output too long, truncated to last $MAX_LINES lines:
+    echo ...
+fi
+tail -$MAX_LINES pal_finder.log
 #
 # Check that log ends with "Done!!" message
 if [ -z "$(tail -n 1 pal_finder.log | grep Done!!)" ] ; then
@@ -335,7 +346,13 @@
 # Run the filtering & assembly script
 if [ ! -z "$FILTERED_MICROSATS" ] || [ ! -z "$OUTPUT_ASSEMBLY" ] ; then
     echo "### Running filtering & assembly script ###"
-    python $PALFINDER_FILTER -i $fastq_r1 -j $fastq_r2 -p Output/PAL_summary.txt $FILTER_OPTIONS 2>&1
+    python $PALFINDER_FILTER -i $fastq_r1 -j $fastq_r2 -p Output/PAL_summary.txt $FILTER_OPTIONS 1>pal_filter.log 2>&1
+    echo "### Output from pal_filter ###"
+    if [ $(wc -l pal_filter.log | cut -d" " -f1) -gt $MAX_LINES ] ; then
+	echo WARNING output too long, truncated to last $MAX_LINES lines:
+	echo ...
+    fi
+    tail -$MAX_LINES pal_filter.log
     if [ $? -ne 0 ] ; then
 	echo ERROR $PALFINDER_FILTER exited with non-zero status >&2
 	exit 1
@@ -368,5 +385,7 @@
 if [ ! -z "$OUTPUT_CONFIG_FILE" ] && [ -f config.txt ] ; then
     /bin/mv config.txt $OUTPUT_CONFIG_FILE
 fi
+#
+echo "### Pal_finder tool completed ###"
 ##
 #
--- a/pal_finder_wrapper.xml	Tue Apr 12 05:53:41 2016 -0400
+++ b/pal_finder_wrapper.xml	Tue Jun 06 08:54:49 2017 -0400
@@ -1,4 +1,4 @@
-<tool id="microsat_pal_finder" name="pal_finder" version="0.02.04.4">
+<tool id="microsat_pal_finder" name="pal_finder" version="0.02.04.5">
   <description>Find microsatellite repeat elements from sequencing reads and design PCR primers to amplify them</description>
   <requirements>
     <requirement type="package" version="5.16.3">perl</requirement>