changeset 1:8ff0ac66f1a3 draft

v0.0.4; Report FASTQ counts; misc internal changes
author peterjc
date Wed, 13 May 2015 11:08:58 -0400
parents 1d773da0ccf0
children 48e71dfd51b3
files tools/seq_filter_by_mapping/README.rst tools/seq_filter_by_mapping/seq_filter_by_mapping.py tools/seq_filter_by_mapping/seq_filter_by_mapping.xml tools/seq_filter_by_mapping/tool_dependencies.xml
diffstat 4 files changed, 54 insertions(+), 34 deletions(-) [+]
line wrap: on
line diff
--- a/tools/seq_filter_by_mapping/README.rst	Tue Jan 27 08:31:13 2015 -0500
+++ b/tools/seq_filter_by_mapping/README.rst	Wed May 13 11:08:58 2015 -0400
@@ -1,7 +1,7 @@
 Galaxy tool to filter FASTA, FASTQ or SFF sequences by SAM/BAM mapping
 ======================================================================
 
-This tool is copyright 2014 by Peter Cock, The James Hutton Institute
+This tool is copyright 2014-2015 by Peter Cock, The James Hutton Institute
 (formerly SCRI, Scottish Crop Research Institute), UK. All rights reserved.
 See the licence text below.
 
@@ -62,6 +62,10 @@
 ------- ----------------------------------------------------------------------
 v0.0.1  - Initial version.
 v0.0.2  - Fixed some error messages.
+v0.0.3  - Report counts for FASTQ as done for FASTA and SFF files.
+v0.0.4  - Use the ``format_source=...`` tag.
+        - Reorder XML elements (internal change only).
+        - Planemo for Tool Shed upload (``.shed.yml``, internal change only).
 ======= ======================================================================
 
 
@@ -74,22 +78,31 @@
 Much of the code was copied from my older tool:
 https://github.com/peterjc/pico_galaxy/tree/master/tools/seq_filter_by_id
 
-For making the "Galaxy Tool Shed" http://toolshed.g2.bx.psu.edu/ tarball use
-the following command from the Galaxy root folder::
+For pushing a release to the test or main "Galaxy Tool Shed", use the following
+Planemo commands (which requires you have set your Tool Shed access details in
+``~/.planemo.yml`` and that you have access rights on the Tool Shed)::
+
+    $ planemo shed_upload --shed_target testtoolshed --check_diff ~/repositories/pico_galaxy/tools/seq_filter_by_mapping/
+    ...
+
+or::
 
-    $ tar -czf seq_filter_by_mapping.tar.gz tools/seq_filter_by_mapping/README.rst tools/seq_filter_by_mapping/seq_filter_by_mapping.py tools/seq_filter_by_mapping/seq_filter_by_mapping.xml tools/seq_filter_by_mapping/tool_dependencies.xml test-data/SRR639755_mito_pairs.fastq.gz test-data/SRR639755_sample_by_coord.sam test-data/SRR639755_sample_strict.fastq test-data/SRR639755_sample_lax.fastq
+    $ planemo shed_upload --shed_target toolshed --check_diff ~/repositories/pico_galaxy/tools/seq_filter_by_mapping/
+    ...
+
+To just build and check the tar ball, use::
 
-Check this worked::
-
-    $ tar -tzf seq_filter_by_mapping.tar.gz
+    $ planemo shed_upload --tar_only  ~/repositories/pico_galaxy/tools/seq_filter_by_mapping/
+    ...
+    $ tar -tzf shed_upload.tar.gz
+    test-data/SRR639755_mito_pairs.fastq.gz
+    test-data/SRR639755_sample_by_coord.sam
+    test-data/SRR639755_sample_lax.fastq
+    test-data/SRR639755_sample_strict.fastq
     tools/seq_filter_by_mapping/README.rst
     tools/seq_filter_by_mapping/seq_filter_by_mapping.py
     tools/seq_filter_by_mapping/seq_filter_by_mapping.xml
     tools/seq_filter_by_mapping/tool_dependencies.xml
-    test-data/SRR639755_mito_pairs.fastq.gz
-    test-data/SRR639755_sample_by_coord.sam
-    test-data/SRR639755_sample_strict.fastq
-    test-data/SRR639755_sample_lax.fastq
 
 
 Licence (MIT)
--- a/tools/seq_filter_by_mapping/seq_filter_by_mapping.py	Tue Jan 27 08:31:13 2015 -0500
+++ b/tools/seq_filter_by_mapping/seq_filter_by_mapping.py	Wed May 13 11:08:58 2015 -0400
@@ -64,7 +64,7 @@
 options, args = parser.parse_args()
 
 if options.version:
-    print "v0.0.2"
+    print "v0.0.3"
     sys.exit(0)
 
 in_file = options.input
@@ -282,6 +282,7 @@
 def fastq_filter(in_file, pos_file, neg_file, wanted):
     """FASTQ filter."""
     from Bio.SeqIO.QualityIO import FastqGeneralIterator
+    pos_count = neg_count = 0
     handle = open(in_file, "r")
     if out_positive_file is not None and out_negative_file is not None:
         print "Generating two FASTQ files"
@@ -292,8 +293,10 @@
             # print("%s --> %s" % (title, clean_name(title.split(None, 1)[0])))
             if clean_name(title.split(None, 1)[0]) in ids:
                 positive_handle.write("@%s\n%s\n+\n%s\n" % (title, seq, qual))
+                pos_count += 1
             else:
                 negative_handle.write("@%s\n%s\n+\n%s\n" % (title, seq, qual))
+                neg_count += 1
         positive_handle.close()
         negative_handle.close()
     elif out_positive_file is not None:
@@ -302,16 +305,23 @@
         for title, seq, qual in FastqGeneralIterator(handle):
             if clean_name(title.split(None, 1)[0]) in ids:
                 positive_handle.write("@%s\n%s\n+\n%s\n" % (title, seq, qual))
+                pos_count += 1
+            else:
+                neg_count += 1
         positive_handle.close()
     elif out_negative_file is not None:
         print "Generating non-matching FASTQ file"
         negative_handle = open(out_negative_file, "w")
         for title, seq, qual in FastqGeneralIterator(handle):
-            if clean_name(title.split(None, 1)[0]) not in ids:
+            if clean_name(title.split(None, 1)[0]) in ids:
+                pos_count += 1
+            else:
                 negative_handle.write("@%s\n%s\n+\n%s\n" % (title, seq, qual))
+                neg_count += 1
         negative_handle.close()
     handle.close()
-    # This does not currently bother to record record counts (faster)
+    return pos_count, neg_count
+
 
 def sff_filter(in_file, pos_file, neg_file, wanted):
     """SFF filter."""
@@ -353,18 +363,15 @@
 
 
 if seq_format.lower()=="sff":
-    # Now write filtered SFF file based on IDs wanted
-    pos_count, neg_count = sff_filter(in_file, out_positive_file, out_negative_file, ids)
-    # At the time of writing, Galaxy doesn't show SFF file read counts,
-    # so it is useful to put them in stdout and thus shown in job info.
-    print "%i with and %i without specified IDs" % (pos_count, neg_count)
+    sequence_filter = sff_filter
 elif seq_format.lower()=="fasta":
-    # Write filtered FASTA file based on IDs from tabular file
-    pos_count, neg_count = fasta_filter(in_file, out_positive_file, out_negative_file, ids)
-    print "%i with and %i without specified IDs" % (pos_count, neg_count)
+    sequence_filter = fasta_filter
 elif seq_format.lower().startswith("fastq"):
-    #Write filtered FASTQ file based on IDs from mapping file
-    fastq_filter(in_file, out_positive_file, out_negative_file, ids)
-    # This does not currently track the counts
+    sequence_filter = fastq_filter
 else:
     sys_exit("Unsupported file type %r" % seq_format)
+
+pos_count, neg_count = sequence_filter(in_file, out_positive_file, out_negative_file, ids)
+print("%i mapped and %i unmapped reads." % (pos_count, neg_count))
+fraction = float(pos_count) * 100.0 / float(pos_count + neg_count)
+print("In total %i reads, of which %0.1f%% mapped." % (pos_count + neg_count, fraction))
--- a/tools/seq_filter_by_mapping/seq_filter_by_mapping.xml	Tue Jan 27 08:31:13 2015 -0500
+++ b/tools/seq_filter_by_mapping/seq_filter_by_mapping.xml	Wed May 13 11:08:58 2015 -0400
@@ -1,4 +1,4 @@
-<tool id="seq_filter_by_mapping" name="Filter sequences by mapping" version="0.0.2">
+<tool id="seq_filter_by_mapping" name="Filter sequences by mapping" version="0.0.4">
     <description>from SAM/BAM file</description>
     <requirements>
         <requirement type="package" version="1.64">biopython</requirement>
@@ -6,6 +6,11 @@
         <requirement type="binary">samtools</requirement>
         <requirement type="package" version="0.1.19">samtools</requirement>
     </requirements>
+    <stdio>
+        <!-- Anything other than zero is an error -->
+        <exit_code range="1:" />
+        <exit_code range=":-1" />
+    </stdio>
     <version_command interpreter="python">seq_filter_by_mapping.py --version</version_command>
     <command interpreter="python">
 seq_filter_by_mapping.py -i "$input_file" -f "$input_file.ext" -m $pair_mode
@@ -19,11 +24,6 @@
 ## Now loop over all the mapping files
 #for i in $mapping_file#${i} #end for#
     </command>
-    <stdio>
-        <!-- Anything other than zero is an error -->
-        <exit_code range="1:" />
-        <exit_code range=":-1" />
-    </stdio>
     <inputs>
         <param name="input_file" type="data" format="fasta,fastq,sff" label="Sequence file to be filtered" help="FASTA, FASTQ, or SFF format." />
 	<param name="mapping_file" type="data" format="sam,bam" multiple="true" label="SAM/BAM mapping of those sequences" help="SAM or BAM format." />
@@ -47,10 +47,10 @@
         </param>
     </inputs>
     <outputs>
-        <data name="output_pos" format="input" metadata_source="input_file" label="$input_file.name (mapped)">
+        <data name="output_pos" format_source="input_file" metadata_source="input_file" label="$input_file.name (mapped)">
             <filter>output_choice_cond["output_choice"] != "neg"</filter>
         </data>
-        <data name="output_neg" format="input" metadata_source="input_file" label="$input_file.name (unmapped)">
+        <data name="output_neg" format_source="input_file" metadata_source="input_file" label="$input_file.name (unmapped)">
             <filter>output_choice_cond["output_choice"] != "pos"</filter>
         </data>
     </outputs>
--- a/tools/seq_filter_by_mapping/tool_dependencies.xml	Tue Jan 27 08:31:13 2015 -0500
+++ b/tools/seq_filter_by_mapping/tool_dependencies.xml	Wed May 13 11:08:58 2015 -0400
@@ -4,6 +4,6 @@
         <repository changeset_revision="5477a05cc158" name="package_biopython_1_64" owner="biopython" toolshed="https://toolshed.g2.bx.psu.edu" />
     </package>
     <package name="samtools" version="0.1.19">
-        <repository changeset_revision="923adc89c666" name="package_samtools_0_1_19" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
+        <repository changeset_revision="96aab723499f" name="package_samtools_0_1_19" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
     </package>
 </tool_dependency>