Mercurial > repos > iuc > trycycler_partition
view trycycler_partition.xml @ 0:8fcec9049d68 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/trycycler commit 9d7c4277b0f96aacd466f2d497e08edcca3fa238"
| author | iuc |
|---|---|
| date | Thu, 11 Feb 2021 19:23:50 +0000 |
| parents | |
| children | 4688ae3b49b5 |
line wrap: on
line source
<tool id='trycycler_partition' name='Trycycler partition' version='@TOOL_VERSION@' profile='21.01'> <description>assign the reads to the clusters</description> <macros> <import>macros.xml</import> </macros> <expand macro='edam_ontology'/> <expand macro='requirements'/> <version_command>trycycler --version</version_command> <command detect_errors='exit_code'><![CDATA[ mkdir -p 'partitions' #for $i in $input_cluster #set $name = str($i.element_identifier) #set $number = (str($name).split('_')[-1]).strip('.fasta') #set $fullpath = '_'.join(['selected_cluster/cluster',str($number)]) mkdir -p $fullpath/ && ln -s '${i}' '$fullpath/2_all_seqs.fasta' && #end for trycycler partition --cluster_dir 'selected_cluster'/cluster_* --reads '$reads' --min_aligned_len $min_aligned_len --min_read_cov $min_read_cov --threads \${GALAXY_SLOTS:-2} && #for $i in $input_cluster #set $name = str($i.element_identifier) #set $number = (str($name).split('_')[-1]).strip('.fasta') #set $fullpath = '_'.join(['selected_cluster/cluster',str($number)]) mv '$fullpath/4_reads.fastq' 'partitions/partition_${number}.fastq' && #end for echo 'bye!' ]]></command> <inputs> <param name='input_cluster' type='data' format='fasta' multiple='true' label='Cluster datasets' help='Clustered contigs (multiple FASTA files)' /> <param name='reads' type='data' format='fastq,fastq.gz' label='Long-read datasets' help='Long reads (FASTQ format) used to generate the assemblies' /> <param argument='--min_aligned_len' type='integer' min='500' max='3500' value='1000' label='Min bases aligned' help='Reads with less than this many bases aligned (default = 1000) will be ignored.' /> <param argument='--min_read_cov' type='integer' min='0' max='100' value='90' label='Min read length covered by alignments' help='Reads with less than this percentage of their length covered by alignments (default = 90.0) will be ignored.' /> </inputs> <outputs> <collection name='partitions' type='list' label='${tool.name} on ${on_string}'> <discover_datasets pattern='__designation_and_ext__' format='fastq' directory='partitions'/> </collection> </outputs> <tests> <test> <param name='input_cluster' value='reconciled_cluster_01.fasta'/> <param name='reads' value='reads.fastq.gz'/> <param name='min_aligned_len' value='1000'/> <param name='min_read_cov' value='90'/> <output_collection name='partitions' type='list' count='1'> <element name='partition_01' file='partition_01.fastq' ftype='fastq'/> </output_collection> </test> <test> <param name='input_cluster' value='reconciled_cluster_01.fasta'/> <param name='reads' value='reads.fastq.gz'/> <param name='min_aligned_len' value='1200'/> <param name='min_read_cov' value='95'/> <output_collection name='partitions' type='list' count='1'> <element name='partition_01' file='partition_02.fastq' ftype='fastq'/> </output_collection> </test> <test> <param name='input_cluster' value='reconciled_cluster_01.fasta'/> <param name='reads' value='reads.fastq.gz'/> <param name='min_aligned_len' value='900'/> <param name='min_read_cov' value='93'/> <output_collection name='partitions' type='list' count='1'> <element name='partition_01' file='partition_03.fastq' ftype='fastq'/> </output_collection> </test> <test> <param name='input_cluster' value='reconciled_cluster_01.fasta'/> <param name='reads' value='reads.fastq.gz'/> <param name='min_aligned_len' value='1000'/> <param name='min_read_cov' value='90'/> <output_collection name='partitions' type='list' count='1'> <element name='partition_01' file='partition_04.fastq' ftype='fastq'/> </output_collection> </test> </tests> <help><![CDATA[ .. class:: infomark **Purpose** The *Trycycler partition* split the reads between the different clusters, i.e. each read will be assigned to whichever cluster it best aligns and saved into a file for that cluster. This step is run once for your entire genome (i.e. not on a per-cluster basis). ---- .. class:: infomark **Input** This tool requires as input the set of clustered considered valuable, and the long-read dataset used previously. ---- .. class:: infomark **Output** After **Trycycler partition** completes, if will generate a file per cluster, each of which contains its share of the total reads. ---- .. class:: infomark @PIPELINE@ ]]></help> <expand macro='citations'/> </tool>
