Mercurial > repos > bebatut > convert_extract_sequence_file
diff convert_extract_sequence_file.xml @ 0:01c2b74b3a21 draft
planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
author | bebatut |
---|---|
date | Tue, 26 Apr 2016 08:18:18 -0400 |
parents | |
children | 158642ce204f |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/convert_extract_sequence_file.xml Tue Apr 26 08:18:18 2016 -0400 @@ -0,0 +1,249 @@ +<tool id="convert_extract_sequence_file" name="Convert/ Extract information" version="1.0.0"> + + <description>from a sequence file, with possible constraints</description> + + <macros> + <xml name="extraction_option"> + <param name="to_extract" type="select" display="checkboxes" multiple="true" label="Information to extract" help=""> + <option value="id">Identifiant</option> + <option value="length">Length</option> + <validator type="no_options" message="Select at least one information to extract"/> + </param> + </xml> + + <xml name="extraction_test"> + <param name='specific_extraction' type="select" label="Extract specific information?" help="If no is selected, a sequence file is generated. If yes, a text file containing the wanted information is generated"> + <option value="True">Yes</option> + <option value="False" selected="true">No</option> + </param> + </xml> + </macros> + + <requirements> + </requirements> + + <stdio> + </stdio> + + <version_command> + </version_command> + + <!--<command>--> + <command><![CDATA[ + python $__tool_directory__/convert_extract_sequence_file.py + --input $sequence_file_format.sequence_file + + --custom_extraction_type $sequence_file_format.extraction.specific_extraction + + #if $sequence_file_format.extraction.specific_extraction == "True": + --to_extract "{$sequence_file_format.extraction.to_extract}" + --output_information $information_file + #else if $sequence_file_format.format=="fastq": + --split $sequence_file_format.extraction.split.split_test + #if $sequence_file_format.extraction.split.split_test : + --quality_format $sequence_file_format.extraction.split.quality_format + --output_sequence $fasta_sequence_file_from_fastq + --output_quality $quality_file + #else: + --output_sequence $fastq_sequence_file + #end if + #else: + --output_sequence $fasta_sequence_file + #end if + + #if $constraints.constrained_extraction == "True" : + #for $i, $constrain in enumerate( $constraints.constraint_definition ) + #set info_to_constrain=$constrain.constrained_information['info_to_constrain'] + #if $info_to_constrain in ("id"): + --constraint "$info_to_constrain: + ${constrain.constrained_information.constraint_type.type}: + ${constrain.constrained_information.constraint_type.value}" + #else: + #for $j, $sub_constrain in enumerate( $constrain.constrained_information.constraint_definition ) + --constraint "$info_to_constrain: + ${sub_constrain.type}: + ${sub_constrain.value}" + #end for + #end if + #end for + #end if + + --report $report_filepath + --format $sequence_file_format.format + ]]> + </command> + + <inputs> + <conditional name="sequence_file_format"> + <param name="format" type="select" display="radio" + label="Format of the sequence file" help=""> + <option value="fasta">Fasta</option> + <option value="fastq">FastQ</option> + </param> + <when value="fastq"> + <param name="sequence_file" type="data" format="fastq" + label="Sequence file" help=""/> + <conditional name="extraction"> + <expand macro="extraction_test"/> + + <when value="True"> + <expand macro="extraction_option"/> + </when> + <when value="False"> + <conditional name="split"> + <param name='split_test' type="select" label="Split file into sequence and quality files?" help="If yes is selected, a fasta and a quality file are generated. If no, a fastq file is generated"> + <option value="True" selected="true">Yes</option> + <option value="False">No</option> + </param> + + <when value="True"> + <param name="quality_format" type="select" display="radio" label="Coding of quality scores?" help=""> + <option value="sanger" selected="true">Sanger (Phred+33)</option> + <option value="solexa">Solexa (Solexa+64) </option> + <option value="illumina_1_3">Illumina 1.3+ (Phred+64) </option> + <option value="illumina_1_5">Illumina 1.5+ (Phred+64) </option> + <option value="illumina_1_8">Illumina 1.8+ (Phred+33) </option> + </param> + </when> + <when value="False" /> + </conditional> + </when> + </conditional> + </when> + + <when value="fasta"> + <param name="sequence_file" type="data" format="fasta" + label="Sequence file" help=""/> + <conditional name="extraction"> + <expand macro="extraction_test"/> + + <when value="True"> + <expand macro="extraction_option"/> + </when> + <when value="False" /> + </conditional> + </when> + </conditional> + + <conditional name="constraints"> + <param name='constrained_extraction' type='select' label="Constrain extraction?" help=""> + <option value="True">Yes</option> + <option value="False" selected="true">No</option> + </param> + + <when value="True"> + <repeat name="constraint_definition" title="Constraints on sequences" min="1"> + <conditional name="constrained_information"> + <param name="info_to_constrain" type="select" label="Information to constrain" help=""> + <option value="id">Identifiant</option> + <option value="length">Length</option> + </param> + <when value="id"> + <conditional name="constraint_type"> + <param name="type" type="select" display="radio" label="Type of constraint" help=""> + <option value="equal">Equal a value</option> + <option value="in">In a list</option> + <option value="not_in">Not in a list</option> + </param> + <when value="equal"> + <param name="value" type="text" size="200" label="Equal to" help=""/> + <validator type="empty_field" message="Give a value"/> + </when> + <when value="in"> + <param format="txt" name="value" type="data" label="List of constraint" help="Text file with a value per line and nothing else"/> + <validator type="unspecified_build" message="Select a file"/> + </when> + <when value="not_in"> + <param format="txt" name="value" type="data" label="List of constraint" help="Text file with a value per line and nothing else"/> + <validator type="unspecified_build" message="Select a file"/> + </when> + </conditional> + </when> + <when value="length"> + <repeat name="constraint_definition" title="Constraint on sequence length" min="1"> + <param name="type" type="select" label="Type of constraint" help=""> + <option value="equal">Equal to </option> + <option value="lower">Lower than </option> + <option value="strictly_lower">Strictly lower than </option> + <option value="greater">Greater than </option> + <option value="strictly_greater">Strictly greater than </option> + </param> + <param name="value" type="integer" min="0" max="3000" value="100" label="Value" help=""/> + </repeat> + </when> + </conditional> + </repeat> + </when> + <when value="False" /> + </conditional> /> + </inputs> + + <outputs> + <data format="txt" name="information_file" + label="${tool.name} on ${on_string}: Information"> + <filter>((sequence_file_format['extraction']['specific_extraction'] == "True" ))</filter> + </data> + + <data format="fasta" name="fasta_sequence_file" + label="${tool.name} on ${on_string}: Extracted sequences" > + <filter>((sequence_file_format['format'] == 'fasta' and not sequence_file_format['extraction']['specific_extraction']== "True" ))</filter> + </data> + + <data format="fastq" name="fastq_sequence_file" + label="${tool.name} on ${on_string}: Extracted sequences"> + <filter>((sequence_file_format['format'] == 'fastq' and sequence_file_format['extraction']['specific_extraction'] == "False" and sequence_file_format['extraction']['split']['split_test'] == "False" ))</filter> + </data> + + <data format="qual" name="quality_file" + label="${tool.name} on ${on_string}: Extracted quality"> + <filter>((sequence_file_format['format'] == 'fastq' and sequence_file_format['extraction']['specific_extraction'] == "False" and sequence_file_format['extraction']['split']['split_test'] == "True" ))</filter> + </data> + + <data format="fasta" name="fasta_sequence_file_from_fastq" + label="${tool.name} on ${on_string}: Extracted sequences"> + <filter>((sequence_file_format['format'] == 'fastq' and sequence_file_format['extraction']['specific_extraction'] == "False" and sequence_file_format['extraction']['split']['split_test'] == "True" ))</filter> + </data> + + <data format="txt" name="report_filepath" + label="${tool.name} on ${on_string}: Report"/> + </outputs> + + <tests> + <test> + <param name="format" value="fasta"/> + <param name="sequence_file" value="input_sequence_file.fasta"/> + <param name="specific_extraction" value="True" /> + <param name="to_extract" value="length" /> + <param name="constrained_extraction" value="False" /> + <output name="information_file" file="information_lenght_fasta_output.txt"/> + <output name="report_filepath" file="report_length_fasta_output.txt"/> + </test> + <test> + <param name="format" value="fastq"/> + <param name="sequence_file" value="input_sequence_file.fastq"/> + <param name="specific_extraction" value="False" /> + <param name="split_test" value="True" /> + <param name="quality_format" value="illumina_1_3" /> + <param name="constrained_extraction" value="False" /> + <output name="quality_file" file="extracted_quality_illumina_1_3_fastq_output.qual"/> + <output name="fasta_sequence_file_from_fastq" file="extracted_sequences_illumina_1_3_fastq_output.fasta"/> + <output name="report_filepath" file="report_illumina_1_3_fastq_output.txt"/> + </test> + </tests> + + <help><![CDATA[ + +**What it does** + +This tool extracts information (sequences, id, length, ...) from sequence files or convert a FastQ file to Fasta file. + +Some constraints could be added to extraction/conversion. For example, only sequences with more than 30 bp could be extracted. Or, a sequences whose the identifiant is in a list. + +The input is a sequence file in fasta or fastq format. The tool generates different outputs given the chosen parameters. +]]> + </help> + + <citations> + </citations> +</tool> +