# HG changeset patch # User lparsons # Date 1412618466 14400 # Node ID 93d58ffe39f1e041ed26eef349c3e1b44c8cf6cf # Parent 2d6671b10919c8b35f0897b55bb5ba8e5b91b641 Updated to version 1.6 diff -r 2d6671b10919 -r 93d58ffe39f1 README --- a/README Mon Nov 26 17:37:26 2012 -0500 +++ b/README Mon Oct 06 14:01:06 2014 -0400 @@ -1,8 +1,8 @@ Galaxy tool definition for cutadapt (http://code.google.com/p/cutadapt/) -Installation - Tool Shed ---------------------------- +Installation - Tool Shed +------------------------ The recommended way to install cutadapt as a tool in Galaxy is to the use the Galaxy Tool Shed (http://wiki.galaxyproject.org/Tool%20Shed). @@ -14,29 +14,42 @@ Installation - Manual --------------------- -1 - Install the cutadapt package and make sure it is in path for Galaxy -2 - Copy cutadapt.xml to $GALAXY_HOME/tools/cutadapt -3 - Add the tool to the $GALAXY_HOME/tool_conf.xml tool-registry file +1. Install the cutadapt package and make sure it is in path for Galaxy + +2. Copy cutadapt.xml to $GALAXY_HOME/tools/cutadapt + +3. Add the tool to the $GALAXY_HOME/tool_conf.xml tool-registry file -Optional steps to setup and run Galaxy functional tests + **Optional steps to setup and run Galaxy functional tests** + +4. Copy test-data/* to $GALAXY_HOME/test-data/ -4 - Copy test-data/* to $GALAXY_HOME/test-data/ -5 - Set GALAXY_TEST_TOOL_CONF environment variable to a tool_conf.xml file that +5. Set GALAXY_TEST_TOOL_CONF environment variable to a tool_conf.xml file that contains the tools you want to test. (e.g. 'tool_conf.xml') -6 - $GALAXY_HOME/run_functional_tests.sh -id cutadapt + +6. $GALAXY_HOME/run_functional_tests.sh -id cutadapt See the Galaxy Wiki for more information: http://wiki.g2.bx.psu.edu/ Configuration of Adapters ------------------------- -A list of predefined adapters may be specified in the fastx_clipper_sequences.txt +A list of predefined adapters may be specified in the cutadapt_adapters.txt file which resides in the tool-data directory underneath the Galaxy root. A sample file is provided. -Limitations ------------ +Limitations of the Galaxy wrapper +--------------------------------- + +Reading adapters from a fasta file is not supported +Colorspace data support is not implemented +Only one "Strip suffix" is suppored -Colorspace data support is not implemented -Name adapters support is not implemented + +Galaxy Wrapper Development +-------------------------- + +Author: Lance Parsons + +Repository: [https://bitbucket.org/lance_parsons/cutadapt\_galaxy\_wrapper](https://bitbucket.org/lance_parsons/cutadapt_galaxy_wrapper) diff -r 2d6671b10919 -r 93d58ffe39f1 cutadapt.xml --- a/cutadapt.xml Mon Nov 26 17:37:26 2012 -0500 +++ b/cutadapt.xml Mon Oct 06 14:01:06 2014 -0400 @@ -1,67 +1,106 @@ - - Remove adapter sequences from Fastq/Fasta - - cutadapt - + + Remove adapter sequences from Fastq/Fasta + + cutadapt + cutadapt --version - cutadapt - #if $input.extension.startswith( "fastq"): - --format=fastq + cutadapt + #if $input.extension.startswith( "fastq"): + --format=fastq #if $input.extension == "fastqillumina": --quality-base=64 - #end if + #end if #if $input.extension == "fastqsolexa": --quality-base=64 - #end if - #else - --format=$input.extension - #end if - #for $a in $adapters - --adapter='${a.adapter_source.adapter}' - #end for - #for $aa in $anywhere_adapters - --anywhere='${aa.anywhere_adapter_source.anywhere_adapter}' - #end for - #for $fa in $front_adapters - --front='${fa.front_adapter_source.front_adapter}' - #end for - --error-rate=$error_rate - --times=$count - --overlap=$overlap + #end if + #else + --format=$input.extension + #end if + #for $a in $adapters + #if $a.adapter_source.adapter_source_list == 'prebuilt': + --adapter="${a.adapter_source.adapter.fields.name}"='${a.adapter_source.adapter}' + #else if str($a.adapter_source.adapter_name) != "": + --adapter='${a.adapter_source.adapter_name}'='${a.adapter_source.adapter}' + #else + --adapter='${a.adapter_source.adapter}' + #end if + #end for + #for $aa in $anywhere_adapters + #if $aa.anywhere_adapter_source.anywhere_adapter_source_list == 'prebuilt': + --anywhere="${aa.anywhere_adapter_source.anywhere_adapter.fields.name}"='${aa.anywhere_adapter_source.anywhere_adapter}' + #else if str($aa.anywhere_adapter_source.anywhere_adapter_name) != "": + --anywhere='${aa.anywhere_adapter_source.anywhere_adapter_name}'='${aa.anywhere_adapter_source.anywhere_adapter}' + #else + --anywhere='${aa.anywhere_adapter_source.anywhere_adapter}' + #end if + #end for + #for $fa in $front_adapters + #if $fa.front_adapter_source.front_adapter_source_list == 'prebuilt': + --front="${fa.front_adapter_source.front_adapter.fields.name}"='${fa.front_adapter_source.front_adapter}' + #else if str($fa.front_adapter_source.front_adapter_name) != "": + --front='${fa.front_adapter_source.front_adapter_name}'='${fa.front_adapter_source.front_adapter}' + #else + --front='${fa.front_adapter_source.front_adapter}' + #end if + #end for + --error-rate=$error_rate + --times=$count + --overlap=$overlap + $no_indels $match_read_wildcards - $no_match_adapters_wildcards #if str( $output_filtering_options.output_filtering) == "filter": - $output_filtering_options.discard - #if str($output_filtering_options.min) != '0': - --minimum-length=$output_filtering_options.min - #end if - #if str($output_filtering_options.max) != '0': - --maximum-length=$output_filtering_options.max - #end if + $output_filtering_options.discard + $output_filtering_options.discard_untrimmed + $output_filtering_options.no_trim + $output_filtering_options.mask_adapter + #if str($output_filtering_options.min) != '0': + --minimum-length=$output_filtering_options.min + #end if + #if str($output_filtering_options.max) != '0': + --maximum-length=$output_filtering_options.max + #end if + #end if + + --output='$output' + + #if $paired_end.paired_end_boolean: + --paired-output='$paired_output' #end if - --output='$output' - #if str( $output_params.output_type ) == "additional": - #if $output_params.rest_file: - --rest-file=$rest_output - #end if - #if $output_params.wildcard_file: - --wildcard-file=$wild_output - #end if - #if $output_params.too_short_file: - --too-short-output=$too_short_output - #end if - #if $output_params.untrimmed_file: - --untrimmed-output=$untrimmed_output - #end if - #end if + #if str( $output_params.output_type ) == "additional": + #if $output_params.rest_file: + --rest-file=$rest_output + #end if + #if $output_params.wildcard_file: + --wildcard-file=$wild_output + #end if + #if $output_params.too_short_file: + --too-short-output=$too_short_output + #end if + #if $output_params.too_long_file: + --too-long-output=$too_long_output + #end if + #if $output_params.untrimmed_file: + --untrimmed-output=$untrimmed_output + #if $paired_end.paired_end_boolean: + --untrimmed-paired-output=$untrimmed_paired_output + #end if + #end if + #if $output_params.info_file: + --info-file=$info_file + #end if - #if str( $read_modification_params.read_modification) == "modify": - #if str($read_modification_params.quality_cutoff) != '0': - --quality-cutoff=$read_modification_params.quality_cutoff - #end if + #end if + + #if str( $read_modification_params.read_modification) == "modify": + #if str($read_modification_params.quality_cutoff) != '0': + --quality-cutoff=$read_modification_params.quality_cutoff + #end if + #if str($read_modification_params.cut) != '0': + --cut=$read_modification_params.cut + #end if #if $read_modification_params.prefix != '': --prefix="$read_modification_params.prefix" #end if @@ -74,82 +113,95 @@ $read_modification_params.zero_cap #end if - '$input' - > $report - - - + '$input' + + #if $paired_end.paired_end_boolean: + '$input2' + #end if - - - - - - - - - - + > $report + + + + + + + + + - - - - - - - - - - + + + + + + - - - - - - + + + + + + + + + + + + + + + - - - - - - - - - - - - - + + + + + + - - - - - - + + + + + + + + + + + + + + - - - - - - - - - - - - - + + + + + + - - - + + + + + + + + + + + + + + - - + + + + + @@ -158,25 +210,30 @@ - - - + + + + + + - - - - - - - - - - - - - + + + + + + + + + + + + + + + @@ -185,84 +242,108 @@ + + - + - - - - - (output_params['output_type'] == "additional") - (output_params['rest_file'] is True) - - - (output_params['output_type'] == "additional") - (output_params['wild_file'] is True) - - - (output_params['output_type'] == "additional") - (output_params['too_short_file'] is True) - - - (output_params['output_type'] == "additional") - (output_params['untrimmed_file'] is True) - - + + + + + (paired_end['paired_end_boolean'] is True) + + + (output_params['output_type'] == "additional") + (output_params['rest_file'] is True) + + + (output_params['output_type'] == "additional") + (output_params['wildcard_file'] is True) + + + (output_params['output_type'] == "additional") + (output_params['too_short_file'] is True) + + + (output_params['output_type'] == "additional") + (output_params['too_long_file'] is True) + + + (output_params['output_type'] == "additional") + (output_params['untrimmed_file'] is True) + + + (paired_end['paired_end_boolean'] is True) + (output_params['output_type'] == "additional") + (output_params['untrimmed_file'] is True) + + + (output_params['output_type'] == "additional") + (output_params['info_file'] is True) + + + + + + + + - - - - - - - - - - - - - - + + + + + + + + + + + + + + - + - + Summary ------- This tool removes adapter sequences from DNA high-throughput @@ -270,7 +351,9 @@ machine is longer than the molecule that is sequenced, such as in microRNA data. -The tool is based on the opensource cutadapt_ tool. +The tool is based on the opensource `cutadapt +<http://code.google.com/p/cutadapt/>`_ tool. See the `complete cutadapt +documentation <https://cutadapt.readthedocs.org/en/latest/index.html>`_ for additional details. ----- @@ -288,14 +371,14 @@ your adapter sequence is ``ADAPTER`` (specified via 3' Adapters parameter). If you have these input sequences:: - MYSEQUENCEADAPTER - MYSEQUENCEADAP - MYSEQUENCEADAPTERSOMETHINGELSE + MYSEQUENCEADAPTER + MYSEQUENCEADAP + MYSEQUENCEADAPTERSOMETHINGELSE All of them will be trimmed to ``MYSEQUENCE``. If the sequence starts with an adapter, like this:: - ADAPTERSOMETHING + ADAPTERSOMETHING It will be empty after trimming. @@ -308,7 +391,7 @@ --------------------- If you specify a 5' (Front) adapter, the adapter may overlap the beginning of the read or -occur anywhere whithin it. If it appears withing the read, the sequence that precedes it +occur anywhere whithin it. If it appears withing the read, the sequence that precedes it will also be trimmed in addition to the adapter. For example when the adapter sequence is ``ADAPTER``:: @@ -331,7 +414,7 @@ The assumption is enforced by the alignment algorithm, which only finds the adapter when its starting position is within the read. In other words, the 5' base of the adapter must appear within the read. The adapter and all bases following -it are remved. +it are removed. If, on the other hand, your adapter can also be ligated to the 5' end (on purpose or by accident), you should tell cutadapt so by using the Anywhere Adapter @@ -360,7 +443,30 @@ but trim the third to an empty sequence and trim the fourth not at all. +Format of the info file +----------------------- +The info file contains information about the found adapters. The output is a tab-separated text file. Each line corresponds to one read of the input file. The fields are: + +1. Read name +2. Number of errors +3. 0-based start coordinate of the adapter match +4. 0-based end coordinate of the adapter match +5. Sequence of the read to the left of the adapter match (can be empty) +6. Sequence of the read that was matched to the adapter +7. Sequence of the read to the right of the adapter match (can be empty) +8. Name of the found adapter. + +The concatenation of the fields 5-7 yields the full read sequence. In column 8, adapters without a name are numbered starting from 1. + +If no adapter was found, the format is as follows: + +1. Read name +2. The value -1 +3. The read sequence + +When parsing that file, be aware that additional columns may be added in the future. Note also that some fields can be empty, resulting in consecutive tabs within a line. Also, in the current version, when the *Match times* option is set to a value other than 1 (the default value), multiple lines are written to the info file for each read. + .. _cutadapt: http://code.google.com/p/cutadapt/ - + diff -r 2d6671b10919 -r 93d58ffe39f1 cutadapt_adapters.txt.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cutadapt_adapters.txt.sample Mon Oct 06 14:01:06 2014 -0400 @@ -0,0 +1,14 @@ +# +# Adapter/Linker sequences for FASTX-Clipper tool. +# Also used by cutadapt tool +# +# Format: +# Adapter Sequence Descriptive name +# +# Example: +# AAATTTGATAAGATA Our-Adapter +# +# Some adapters can be found here: +# http://seqanswers.com/forums/showthread.php?t=198 + +TGTAGGCC Dummy-Adapter (do not use me) diff -r 2d6671b10919 -r 93d58ffe39f1 fastx_clipper_sequences.txt.sample --- a/fastx_clipper_sequences.txt.sample Mon Nov 26 17:37:26 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,14 +0,0 @@ -# -# Adapter/Linker sequences for FASTX-Clipper tool. -# Also used by cutadapt tool -# -# Format: -# Adapter Sequence Descriptive name -# -# Example: -# AAATTTGATAAGATA Our-Adapter -# -# Some adapters can be found here: -# http://seqanswers.com/forums/showthread.php?t=198 - -TGTAGGCC Dummy-Adapter (don't use me) diff -r 2d6671b10919 -r 93d58ffe39f1 tool_dependencies.xml --- a/tool_dependencies.xml Mon Nov 26 17:37:26 2012 -0500 +++ b/tool_dependencies.xml Mon Oct 06 14:01:06 2014 -0400 @@ -1,17 +1,6 @@ - - - - - - http://pypi.python.org/packages/source/c/cutadapt/cutadapt-1.1.tar.gz - python setup.py install --home $INSTALL_DIR --install-scripts $INSTALL_DIR/bin - - $INSTALL_DIR/lib/python - $INSTALL_DIR/bin - - - - - - - + + + + + +