Mercurial > repos > devteam > fastq_paired_end_deinterlacer
changeset 1:462abc5618ba draft
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/galaxy_sequence_utils/fastq_paired_end_deinterlacer commit f2582539542b33240234e8ea6093e25d0aee9b6a
author | devteam |
---|---|
date | Sat, 30 Sep 2017 14:58:47 -0400 |
parents | f0949bc49926 |
children | b7ce72b00e62 |
files | fastq_paired_end_deinterlacer.py fastq_paired_end_deinterlacer.xml tool_dependencies.xml |
diffstat | 3 files changed, 38 insertions(+), 106 deletions(-) [+] |
line wrap: on
line diff
--- a/fastq_paired_end_deinterlacer.py Mon Jan 27 09:27:16 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,66 +0,0 @@ -#Florent Angly -import sys -from galaxy_utils.sequence.fastq import fastqReader, fastqWriter, fastqNamedReader, fastqJoiner - -def main(): - input_filename = sys.argv[1] - input_type = sys.argv[2] or 'sanger' - mate1_filename = sys.argv[3] - mate2_filename = sys.argv[4] - single1_filename = sys.argv[5] - single2_filename = sys.argv[6] - - type = input_type - input = fastqNamedReader( open( input_filename, 'rb' ), format = type ) - mate1_out = fastqWriter( open( mate1_filename, 'wb' ), format = type ) - mate2_out = fastqWriter( open( mate2_filename, 'wb' ), format = type ) - single1_out = fastqWriter( open( single1_filename, 'wb' ), format = type ) - single2_out = fastqWriter( open( single2_filename, 'wb' ), format = type ) - joiner = fastqJoiner( type ) - - i = None - skip_count = 0 - found = {} - for i, read in enumerate( fastqReader( open( input_filename, 'rb' ), format = type ) ): - - if read.identifier in found: - del found[read.identifier] - continue - - mate1 = input.get( read.identifier ) - - mate2 = input.get( joiner.get_paired_identifier( mate1 ) ) - - if mate2: - # This is a mate pair - found[mate2.identifier] = None - if joiner.is_first_mate( mate1 ): - mate1_out.write( mate1 ) - mate2_out.write( mate2 ) - else: - mate1_out.write( mate2 ) - mate2_out.write( mate1 ) - else: - # This is a single - skip_count += 1 - if joiner.is_first_mate( mate1 ): - single1_out.write( mate1 ) - else: - single2_out.write( mate1 ) - - if i is None: - print "Your input file contained no valid FASTQ sequences." - else: - if skip_count: - print 'There were %i reads with no mate.' % skip_count - print 'De-interlaced %s pairs of sequences.' % ( (i - skip_count + 1)/2 ) - - input.close() - mate1_out.close() - mate2_out.close() - single1_out.close() - single2_out.close() - - -if __name__ == "__main__": - main()
--- a/fastq_paired_end_deinterlacer.xml Mon Jan 27 09:27:16 2014 -0500 +++ b/fastq_paired_end_deinterlacer.xml Sat Sep 30 14:58:47 2017 -0400 @@ -1,35 +1,37 @@ -<tool id="fastq_paired_end_deinterlacer" name="FASTQ de-interlacer" version="1.1"> - <description>on paired end reads</description> - <requirements> - <requirement type="package" version="1.0.0">galaxy_sequence_utils</requirement> - </requirements> - <command interpreter="python">fastq_paired_end_deinterlacer.py '$input_file' '${input_file.extension[len( 'fastq' ):]}' '$output1_pairs_file' '$output2_pairs_file' '$output1_singles_file' '$output2_singles_file'</command> - <inputs> - <param name="input_file" type="data" format="fastqsanger,fastqcssanger" label="FASTQ reads" /> - </inputs> - <outputs> - <data name="output1_pairs_file" format="input" label="FASTQ de-interlacer left mates from data ${input_file.hid}" /> - <data name="output2_pairs_file" format="input" label="FASTQ de-interlacer right mates from data ${input_file.hid}"/> - <data name="output1_singles_file" format="input" label="FASTQ de-interlacer left singles from data ${input_file.hid}"/> - <data name="output2_singles_file" format="input" label="FASTQ de-interlacer right singles from data ${input_file.hid}"/> - </outputs> - <tests> - <test> - <param name="input_file" value="paired_end_merged.fastqsanger" ftype="fastqsanger" /> - <output name="output1_pairs_file" file="paired_end_1.fastqsanger" /> - <output name="output2_pairs_file" file="paired_end_2.fastqsanger" /> - <output name="output1_singles_file" file="paired_end_1_singles.fastqsanger" /> - <output name="output2_singles_file" file="paired_end_2_singles.fastqsanger" /> - </test> - <test> - <param name="input_file" value="paired_end_merged_errors.fastqsanger" ftype="fastqsanger" /> - <output name="output1_pairs_file" file="paired_end_1_cleaned.fastqsanger" /> - <output name="output2_pairs_file" file="paired_end_2_cleaned.fastqsanger" /> - <output name="output1_singles_file" file="paired_end_1_cleaned_singles.fastqsanger" /> - <output name="output2_singles_file" file="paired_end_2_cleaned_singles.fastqsanger" /> - </test> - </tests> - <help> +<tool id="fastq_paired_end_deinterlacer" name="FASTQ de-interlacer" version="1.1.1"> + <description>on paired end reads</description> + <requirements> + <requirement type="package" version="1.1.1">galaxy_sequence_utils</requirement> + </requirements> + <command><![CDATA[ +gx-fastq-paired-end-deinterlacer '$input_file' '${input_file.extension[len('fastq'):]}' '$output1_pairs_file' '$output2_pairs_file' '$output1_singles_file' '$output2_singles_file' + ]]></command> + <inputs> + <param name="input_file" type="data" format="fastqsanger,fastqcssanger,fastqsanger.gz,fastqcssanger.gz,fastqsanger.bz2,fastqcssanger.bz2" label="FASTQ reads" /> + </inputs> + <outputs> + <data name="output1_pairs_file" format_source="input_file" label="FASTQ de-interlacer left mates from data ${input_file.hid}" /> + <data name="output2_pairs_file" format_source="input_file" label="FASTQ de-interlacer right mates from data ${input_file.hid}"/> + <data name="output1_singles_file" format_source="input_file" label="FASTQ de-interlacer left singles from data ${input_file.hid}"/> + <data name="output2_singles_file" format_source="input_file" label="FASTQ de-interlacer right singles from data ${input_file.hid}"/> + </outputs> + <tests> + <test> + <param name="input_file" value="paired_end_merged.fastqsanger" ftype="fastqsanger" /> + <output name="output1_pairs_file" file="paired_end_1.fastqsanger" ftype="fastqsanger" /> + <output name="output2_pairs_file" file="paired_end_2.fastqsanger" ftype="fastqsanger" /> + <output name="output1_singles_file" file="paired_end_1_singles.fastqsanger" ftype="fastqsanger" /> + <output name="output2_singles_file" file="paired_end_2_singles.fastqsanger" ftype="fastqsanger" /> + </test> + <test> + <param name="input_file" value="paired_end_merged_errors.fastqsanger" ftype="fastqsanger" /> + <output name="output1_pairs_file" file="paired_end_1_cleaned.fastqsanger" ftype="fastqsanger" /> + <output name="output2_pairs_file" file="paired_end_2_cleaned.fastqsanger" ftype="fastqsanger" /> + <output name="output1_singles_file" file="paired_end_1_cleaned_singles.fastqsanger" ftype="fastqsanger" /> + <output name="output2_singles_file" file="paired_end_2_cleaned_singles.fastqsanger" ftype="fastqsanger" /> + </test> + </tests> + <help><![CDATA[ **What it does** De-interlaces a single fastq dataset representing paired-end run into two fastq datasets containing only the first or second mate read. Reads without mate are saved in separate output files. @@ -68,6 +70,8 @@ CGCCATTCCGAATCGTAGTTGTCGGCGTCTTCCAGTGCGGCAAGGCATCGT +1539:931/2 WNUUZ\P^`BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB - - </help> + ]]></help> + <citations> + <citation type="doi">10.1093/bioinformatics/btq281</citation> + </citations> </tool>
--- a/tool_dependencies.xml Mon Jan 27 09:27:16 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,6 +0,0 @@ -<?xml version="1.0"?> -<tool_dependency> - <package name="galaxy_sequence_utils" version="1.0.0"> - <repository changeset_revision="0643676ad5f7" name="package_galaxy_utils_1_0" owner="devteam" prior_installation_required="False" toolshed="http://toolshed.g2.bx.psu.edu" /> - </package> -</tool_dependency>