Mercurial > repos > earlhaminst > hcluster_sg_parser
changeset 0:dbc49bd1a3e9 draft
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
| author | earlhaminst | 
|---|---|
| date | Mon, 12 Dec 2016 07:12:23 -0500 | 
| parents | |
| children | 17aa68582a05 | 
| files | hcluster_sg_parser.pl hcluster_sg_parser.xml test-data/0_output.txt test-data/1_output.txt test-data/2_output.txt test-data/3_output.txt test-data/hcluster_sg.tabular | 
| diffstat | 7 files changed, 104 insertions(+), 0 deletions(-) [+] | 
line wrap: on
 line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hcluster_sg_parser.pl Mon Dec 12 07:12:23 2016 -0500 @@ -0,0 +1,25 @@ +#!/usr/bin/perl +# +use strict; +use warnings; +# A simple perl parser to convert hcluster_sg 3-column output into list of ids in separate files +# hcluster_sg_parser.pl <file> + +my $file1 = $ARGV[0]; +open my $fh1, '<', $file1; + +while (my $line = <$fh1>) { + chomp $line; + my @row = split(/\t/, $line); + + my $cluster_id = $row[0]; + my $id_list = $row[2]; + # Change commas to newlines + $id_list =~ s/\,/\n/g; + + my $outfile = $cluster_id."_output.txt"; + open(my $fh, '>', $outfile) or die "Could not open file '$outfile' for writing: $!"; + print $fh $id_list; + close $fh; +} +close $fh1;
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hcluster_sg_parser.xml Mon Dec 12 07:12:23 2016 -0500 @@ -0,0 +1,35 @@ +<tool id="hcluster_sg_parser" name="hcluster_sg_parser" version="0.1.1"> + <description>Converts hcluster_sg 3-column output into lists of ids</description> + <command> +<![CDATA[ +perl $__tool_directory__/hcluster_sg_parser.pl +$inputFile +]]> + </command> + <inputs> + <param name="inputFile" type="data" format="tabular" label="hcluster output file in 3-column format" help="3-columns format: cluster_id cluster-size cluster-members" /> + </inputs> + <outputs> + <collection name="ids_lists" type="list" label="${tool.name} on ${on_string}"> + <discover_datasets pattern="(?P<designation>.+)_output\.txt" ext="txt" /> + </collection> + </outputs> + <tests> + <test> + <param name="inputFile" ftype="tabular" value="hcluster_sg.tabular" /> + <output_collection name="ids_lists" type="list"> + <element name="0" file="0_output.txt" ftype="txt" /> + <element name="1" file="1_output.txt" ftype="txt" /> + <element name="2" file="2_output.txt" ftype="txt" /> + <element name="3" file="3_output.txt" ftype="txt" /> + </output_collection> + </test> + </tests> + <help> +<![CDATA[ +Simple wrapper for hcluster_sg output parser. +]]> + </help> + <citations> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/0_output.txt Mon Dec 12 07:12:23 2016 -0500 @@ -0,0 +1,20 @@ +90 +52 +76 +48 +88 +78 +31 +46 +4 +29 +6 +60 +80 +37 +33 +64 +66 +62 +42 +57
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/1_output.txt Mon Dec 12 07:12:23 2016 -0500 @@ -0,0 +1,10 @@ +70 +21 +72 +84 +26 +86 +14 +10 +19 +53
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/2_output.txt Mon Dec 12 07:12:23 2016 -0500 @@ -0,0 +1,5 @@ +74 +68 +2 +24 +58
