Mercurial > repos > iuc > crossmap_bed
diff crossmap_bed.xml @ 2:79f9e32b380b draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/crossmap commit e365f2835488e73b870c73502c24ff23d28b76a5
author | iuc |
---|---|
date | Fri, 20 Oct 2017 02:48:24 -0400 |
parents | bc72094f7ce9 |
children | b49e453e6f97 |
line wrap: on
line diff
--- a/crossmap_bed.xml Tue Sep 26 06:05:09 2017 -0400 +++ b/crossmap_bed.xml Fri Oct 20 02:48:24 2017 -0400 @@ -7,84 +7,96 @@ <expand macro="stdio"/> <expand macro="version_command"/> - <command><![CDATA[ - #set $input_file = str($seq_source.input) - - CrossMap.py bed - '${chain_source.input_chain}' +<!-- +1. CrossMap bed x.chain in.bed > out.bed +stdout/out.bed: valid and invalid combined - '${input_file}' +2. CrossMap bed x.chain in.bed out.bed +out.bed: valid only +out.bed.unmap: invalid only +--> + <command><![CDATA[ +CrossMap.py bed +'${chain_source.input_chain}' - #if str($include_fails) == "True" - > - #end if +'${input}' - '${output}' +#if $merge_unmapped_entries: + > '${output_combined}' +#else: + '${output_valid}' + && mv '${output_valid}.unmap' '${output_failed}' +#end if + ]]></command> <inputs> - <conditional name="seq_source"> - <expand macro="source" /> + <param name="input" type="data" format="bed" label="BED file" + help="BED format file must have at least 3 columns (chrom, start, end) and no more than 12 columns"/> - <when value="cached"> - <param format="bed" name="input" type="data" label="BED file" - help="BED format file must have at least 3 columns (chrom, start, end) and no more than 12 columns."> - <validator type="unspecified_build"/> - <!-- Gives error in tests - <validator type="dataset_metadata_in_file" filename="liftOver.loc" metadata_name="dbkey" metadata_column="0" message="LiftOver mapping (chain file) is not available for the specified build."/> - --> - </param> - </when> - <when value="history"> - <param type="data" format="bed" name="input" label="BED file" - help="BED format file must have at least 3 columns (chrom, start, end) and no more than 12 columns."/> - </when> - </conditional> <expand macro="chain" /> - <param name="include_fails" type="boolean" truevalue="True" checked="false" falsevalue="False" label="Include failed liftovers" - help="If a coordinate can not be lift over, do you want to include it in the output (it is still being marked 'fail')"/> + <param name="merge_unmapped_entries" type="boolean" checked="false" truevalue="true" falsevalue="false" label="Merge failed and converted entries into single file"/> </inputs> <outputs> - <data format="bed" name="output" label="${tool.name} on ${on_string}" /> - <data format="text" name="output2" label="${tool.name} (bedgraph) on ${on_string}" /> + <data name="output_valid" format="bed" label="${tool.name} (valid only) on ${on_string}"> + <filter>merge_unmapped_entries is False</filter> + </data> + <data name="output_failed" format="bed" label="${tool.name} (failed only) on ${on_string}"> + <filter>merge_unmapped_entries is False</filter> + </data> + + <data name="output_combined" format="bed" label="${tool.name} on ${on_string}"> + <filter>merge_unmapped_entries is True</filter> + </data> </outputs> <tests> - <!-- BED --> - <test> + <test><!-- this test only contains perfect entries that do get liftOvered (separate output) --> + <param name="input" value="test_bed_01_input_a.bed" ftype="bed"/> <param name="index_source" value="history"/> - <param name="input" value="test_bed_01_input_a.bed" ftype="bed"/> <param name="input_chain" value="aToB.over.chain" ftype="csv"/> - <param name="include_fails" value="False"/> + <param name="merge_unmapped_entries" value="false" /> - <output name="output" file="test_bed_01_output_a__only-matches.bed"/> + <output name="output_valid" file="test_bed_01_output_a__only-matches.bed"/> + <output name="output_failed" file="test_bed_01_output_a__only_fails.bed"/> </test> - <test> + <test><!-- this test only contains perfect entries that do get liftOvered (merged output) --> + <param name="input" value="test_bed_01_input_a.bed" ftype="bed"/> <param name="index_source" value="history"/> - <param name="input" value="test_bed_01_input_a.bed" ftype="bed"/> <param name="input_chain" value="aToB.over.chain" ftype="txt"/> - <param name="include_fails" value="True"/> + <param name="merge_unmapped_entries" value="true" /> + + <output name="output_combined" file="test_bed_01_output_a__all.bed"/> + </test> - <output name="output" file="test_bed_01_output_a__all.bed"/> - </test> - <test> + <test><!-- this test only contains imperfect entries that do get liftOvered (separate output) --> + <param name="input" value="test_bed_02_input_a.bed" ftype="bed"/> <param name="index_source" value="history"/> - <param name="input" value="test_bed_02_input_a.bed" ftype="bed"/> <param name="input_chain" value="aToB.over.chain" ftype="txt"/> - <param name="include_fails" value="False"/> + <param name="merge_unmapped_entries" value="false" /> - <output name="output" file="test_bed_02_output_a__only-matches.bed"/> + <output name="output_valid" file="test_bed_02_output_a__only-matches.bed"/> + <output name="output_failed" file="test_bed_02_output_a__only_fails.bed"/> </test> - <test> - <param name="index_source" value="history"/> + <test><!-- this test only contains imperfect entries that do get liftOvered (separate output) --> <param name="input" value="test_bed_02_input_a.bed" ftype="bed"/> + <param name="index_source" value="history"/> <param name="input_chain" value="aToB.over.chain" ftype="txt"/> - <param name="include_fails" value="True"/> + <param name="merge_unmapped_entries" value="true" /> + + <output name="output_combined" file="test_bed_02_output_a__all.bed"/> + </test> - <output name="output" file="test_bed_02_output_a__all.bed"/> + <test><!-- clone of first test: tests cached reference chain file --> + <param name="input" value="test_bed_01_input_a.bed" ftype="bed"/> + <param name="index_source" value="cached"/> + <param name="merge_unmapped_entries" value="false" /> + + <output name="output_valid" file="test_bed_01_output_a__only-matches.bed"/> + <output name="output_failed" file="test_bed_01_output_a__only_fails.bed"/> </test> </tests> <help><![CDATA[ @@ -96,36 +108,35 @@ BED format file must have at least 3 columns (chrom, start, end) and no more than 12 columns. BED format: http://genome.ucsc.edu/FAQ/FAQformat.html#format1 - A BED (Browser Extensible Data) file is a tab-delimited text file - describing genome regions or gene annotations. It is the standard file - format used by UCSC. It consists of one line per feature, each containing - 3-12 columns. CrossMap converts BED files with less than 12 columns to a - different assembly by updating the chromosome and genome coordinates only; - all other columns remain unchanged. Regions from old assembly mapping to - multiple locations to the new assembly will be split. For 12-columns BED - files, all columns will be updated accordingly except the 4th column (name - of bed line), 5th column (score value) and 9th column (RGB value describing - the display color). 12-column BED files usually define multiple blocks (eg. - exon); if any of the exons fails to map to a new assembly, the whole BED - line is skipped. +A BED (Browser Extensible Data) file is a tab-delimited text file +describing genome regions or gene annotations. It is the standard file +format used by UCSC. It consists of one line per feature, each containing +3-12 columns. CrossMap converts BED files with less than 12 columns to a +different assembly by updating the chromosome and genome coordinates only; +all other columns remain unchanged. Regions from old assembly mapping to +multiple locations to the new assembly will be split. For 12-columns BED +files, all columns will be updated accordingly except the 4th column (name +of bed line), 5th column (score value) and 9th column (RGB value describing +the display color). 12-column BED files usually define multiple blocks (eg. +exon); if any of the exons fails to map to a new assembly, the whole BED +line is skipped. - NOTE: +Notes: - 1. For BED-like formats mentioned above, CrossMap only updates “chrom (1st - column)”, “start (2nd column) ”, “end (3rd column) ” and “strand” (if - any). All other columns will keep AS-IS. - 2. Lines starting with ‘#’, ‘browser’, ‘track’ will be skipped. - 3. Lines will less than 3 columns will be skipped. - 4. 2nd-column and 3-column must be integer, otherwise skipped. - 5. “+” strand is assumed if no strand information was found. - 6. For standard BED format (12 columns). If any of the defined exon blocks - cannot be uniquely mapped to target assembly, the whole entry will be - skipped. - 7. If input region cannot be consecutively mapped target assembly, it will be split. - 8. *.unmap file contains regions that cannot be unambiguously converted. +1. For BED-like formats mentioned above, CrossMap only updates “chrom (1st + column)”, “start (2nd column) ”, “end (3rd column) ” and “strand” (if + any). All other columns will keep AS-IS. +2. Lines starting with ‘#’, ‘browser’, ‘track’ will be skipped. +3. Lines will less than 3 columns will be skipped. +4. 2nd-column and 3-column must be integer, otherwise skipped. +5. “+” strand is assumed if no strand information was found. +6. For standard BED format (12 columns). If any of the defined exon blocks + cannot be uniquely mapped to target assembly, the whole entry will be + skipped. +7. If input region cannot be consecutively mapped target assembly, it will be split. +8. \*.unmap file contains regions that cannot be unambiguously converted. -Please see `the manual <http://crossmap.sourceforge.net/#convert-bed-format-files>`__ for more details. -]]></help> + ]]></help> <citations> <citation type="doi">10.1093/bioinformatics/btt730</citation>