diff crossmap_bed.xml @ 2:79f9e32b380b draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/crossmap commit e365f2835488e73b870c73502c24ff23d28b76a5
author iuc
date Fri, 20 Oct 2017 02:48:24 -0400
parents bc72094f7ce9
children b49e453e6f97
line wrap: on
line diff
--- a/crossmap_bed.xml	Tue Sep 26 06:05:09 2017 -0400
+++ b/crossmap_bed.xml	Fri Oct 20 02:48:24 2017 -0400
@@ -7,84 +7,96 @@
     <expand macro="stdio"/>
     <expand macro="version_command"/>
 
-    <command><![CDATA[
-        #set $input_file = str($seq_source.input)
-
-        CrossMap.py bed
-            '${chain_source.input_chain}'
+<!--
+1. CrossMap bed x.chain in.bed > out.bed
+stdout/out.bed: valid and invalid combined
 
-            '${input_file}'
+2. CrossMap bed x.chain in.bed out.bed
+out.bed: valid only
+out.bed.unmap: invalid only
+-->
+    <command><![CDATA[
+CrossMap.py bed
+'${chain_source.input_chain}'
 
-            #if str($include_fails) == "True"
-            >
-            #end if
+'${input}'
 
-            '${output}'
+#if $merge_unmapped_entries:
+    > '${output_combined}'
+#else:
+    '${output_valid}'
+    && mv '${output_valid}.unmap' '${output_failed}'
+#end if
+
     ]]></command>
 
 
     <inputs>
-       <conditional name="seq_source">
-            <expand macro="source" />
+        <param name="input" type="data" format="bed" label="BED file"
+               help="BED format file must have at least 3 columns (chrom, start, end) and no more than 12 columns"/>
 
-            <when value="cached">
-                <param format="bed" name="input" type="data" label="BED file"
-                       help="BED format file must have at least 3 columns (chrom, start, end) and no more than 12 columns.">
-                    <validator type="unspecified_build"/>
-                    <!-- Gives error in tests
-                    <validator type="dataset_metadata_in_file" filename="liftOver.loc" metadata_name="dbkey" metadata_column="0" message="LiftOver mapping (chain file) is not available for the specified build."/>
-                    -->
-                </param>
-            </when>
-            <when value="history">
-                <param type="data" format="bed" name="input" label="BED file"
-                       help="BED format file must have at least 3 columns (chrom, start, end) and no more than 12 columns."/>
-            </when>
-        </conditional>
         <expand macro="chain" />
 
-        <param name="include_fails" type="boolean" truevalue="True" checked="false" falsevalue="False" label="Include failed liftovers"
-               help="If a coordinate can not be lift over, do you want to include it in the output (it is still being marked 'fail')"/>
+        <param name="merge_unmapped_entries" type="boolean" checked="false" truevalue="true" falsevalue="false" label="Merge failed and converted entries into single file"/>
     </inputs>
 
     <outputs>
-        <data format="bed" name="output" label="${tool.name} on ${on_string}" />
-        <data format="text" name="output2" label="${tool.name} (bedgraph) on ${on_string}" />
+        <data name="output_valid" format="bed" label="${tool.name} (valid only) on ${on_string}">
+            <filter>merge_unmapped_entries is False</filter>
+        </data>
+        <data name="output_failed" format="bed" label="${tool.name} (failed only) on ${on_string}">
+            <filter>merge_unmapped_entries is False</filter>
+        </data>
+
+        <data name="output_combined" format="bed" label="${tool.name} on ${on_string}">
+            <filter>merge_unmapped_entries is True</filter>
+        </data>
     </outputs>
 
     <tests>
-    <!-- BED -->
-        <test>
+        <test><!-- this test only contains perfect entries that do get liftOvered (separate output) -->
+            <param name="input" value="test_bed_01_input_a.bed" ftype="bed"/>
             <param name="index_source" value="history"/>
-            <param name="input" value="test_bed_01_input_a.bed" ftype="bed"/>
             <param name="input_chain" value="aToB.over.chain" ftype="csv"/>
-            <param name="include_fails" value="False"/>
+            <param name="merge_unmapped_entries" value="false" />
 
-            <output name="output" file="test_bed_01_output_a__only-matches.bed"/>
+            <output name="output_valid" file="test_bed_01_output_a__only-matches.bed"/>
+            <output name="output_failed" file="test_bed_01_output_a__only_fails.bed"/>
         </test>
-        <test>
+        <test><!-- this test only contains perfect entries that do get liftOvered (merged output) -->
+            <param name="input" value="test_bed_01_input_a.bed" ftype="bed"/>
             <param name="index_source" value="history"/>
-            <param name="input" value="test_bed_01_input_a.bed" ftype="bed"/>
             <param name="input_chain" value="aToB.over.chain" ftype="txt"/>
-            <param name="include_fails" value="True"/>
+            <param name="merge_unmapped_entries" value="true" />
+
+            <output name="output_combined" file="test_bed_01_output_a__all.bed"/>
+        </test>
 
-            <output name="output" file="test_bed_01_output_a__all.bed"/>
-        </test>
-        <test>
+        <test><!-- this test only contains imperfect entries that do get liftOvered (separate output) -->
+            <param name="input" value="test_bed_02_input_a.bed" ftype="bed"/>
             <param name="index_source" value="history"/>
-            <param name="input" value="test_bed_02_input_a.bed" ftype="bed"/>
             <param name="input_chain" value="aToB.over.chain" ftype="txt"/>
-            <param name="include_fails" value="False"/>
+            <param name="merge_unmapped_entries" value="false" />
 
-            <output name="output" file="test_bed_02_output_a__only-matches.bed"/>
+            <output name="output_valid" file="test_bed_02_output_a__only-matches.bed"/>
+            <output name="output_failed" file="test_bed_02_output_a__only_fails.bed"/>
         </test>
-        <test>
-            <param name="index_source" value="history"/>
+        <test><!-- this test only contains imperfect entries that do get liftOvered (separate output) -->
             <param name="input" value="test_bed_02_input_a.bed" ftype="bed"/>
+            <param name="index_source" value="history"/>
             <param name="input_chain" value="aToB.over.chain" ftype="txt"/>
-            <param name="include_fails" value="True"/>
+            <param name="merge_unmapped_entries" value="true" />
+
+            <output name="output_combined" file="test_bed_02_output_a__all.bed"/>
+        </test>
 
-            <output name="output" file="test_bed_02_output_a__all.bed"/>
+        <test><!-- clone of first test: tests cached reference chain file -->
+            <param name="input" value="test_bed_01_input_a.bed" ftype="bed"/>
+            <param name="index_source" value="cached"/>
+            <param name="merge_unmapped_entries" value="false" />
+
+            <output name="output_valid" file="test_bed_01_output_a__only-matches.bed"/>
+            <output name="output_failed" file="test_bed_01_output_a__only_fails.bed"/>
         </test>
     </tests>
     <help><![CDATA[
@@ -96,36 +108,35 @@
 BED format file must have at least 3 columns (chrom, start, end) and no more than 12 columns.
 BED format: http://genome.ucsc.edu/FAQ/FAQformat.html#format1
 
-    A BED (Browser Extensible Data) file is a tab-delimited text file
-    describing genome regions or gene annotations. It is the standard file
-    format used by UCSC. It consists of one line per feature, each containing
-    3-12 columns. CrossMap converts BED files with less than 12 columns to a
-    different assembly by updating the chromosome and genome coordinates only;
-    all other columns remain unchanged. Regions from old assembly mapping to
-    multiple locations to the new assembly will be split. For 12-columns BED
-    files, all columns will be updated accordingly except the 4th column (name
-    of bed line), 5th column (score value) and 9th column (RGB value describing
-    the display color). 12-column BED files usually define multiple blocks (eg.
-    exon); if any of the exons fails to map to a new assembly, the whole BED
-    line is skipped.
+A BED (Browser Extensible Data) file is a tab-delimited text file
+describing genome regions or gene annotations. It is the standard file
+format used by UCSC. It consists of one line per feature, each containing
+3-12 columns. CrossMap converts BED files with less than 12 columns to a
+different assembly by updating the chromosome and genome coordinates only;
+all other columns remain unchanged. Regions from old assembly mapping to
+multiple locations to the new assembly will be split. For 12-columns BED
+files, all columns will be updated accordingly except the 4th column (name
+of bed line), 5th column (score value) and 9th column (RGB value describing
+the display color). 12-column BED files usually define multiple blocks (eg.
+exon); if any of the exons fails to map to a new assembly, the whole BED
+line is skipped.
 
-    NOTE:
+Notes:
 
-    1. For BED-like formats mentioned above, CrossMap only updates “chrom (1st
-       column)”, “start (2nd column) ”, “end (3rd column) ” and “strand” (if
-       any). All other columns will keep AS-IS.
-    2. Lines starting with ‘#’, ‘browser’, ‘track’ will be skipped.
-    3. Lines will less than 3 columns will be skipped.
-    4. 2nd-column and 3-column must be integer, otherwise skipped.
-    5. “+” strand is assumed if no strand information was found.
-    6. For standard BED format (12 columns). If any of the defined exon blocks
-       cannot be uniquely mapped to target assembly, the whole entry will be
-       skipped.
-    7. If input region cannot be consecutively mapped target assembly, it will be split.
-    8. *.unmap file contains regions that cannot be unambiguously converted.
+1. For BED-like formats mentioned above, CrossMap only updates “chrom (1st
+   column)”, “start (2nd column) ”, “end (3rd column) ” and “strand” (if
+   any). All other columns will keep AS-IS.
+2. Lines starting with ‘#’, ‘browser’, ‘track’ will be skipped.
+3. Lines will less than 3 columns will be skipped.
+4. 2nd-column and 3-column must be integer, otherwise skipped.
+5. “+” strand is assumed if no strand information was found.
+6. For standard BED format (12 columns). If any of the defined exon blocks
+   cannot be uniquely mapped to target assembly, the whole entry will be
+   skipped.
+7. If input region cannot be consecutively mapped target assembly, it will be split.
+8. \*.unmap file contains regions that cannot be unambiguously converted.
 
-Please see `the manual <http://crossmap.sourceforge.net/#convert-bed-format-files>`__ for more details.
-]]></help>
+    ]]></help>
 
     <citations>
         <citation type="doi">10.1093/bioinformatics/btt730</citation>