annotate split_file_on_column.xml @ 0:ad6a989924ac draft

Uploaded
author bgruening
date Tue, 17 Dec 2013 15:24:50 -0500
parents
children 0ba6d09a71d6
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
ad6a989924ac Uploaded
bgruening
parents:
diff changeset
1 <tool id="tp_split_on_column" name="Split file" force_history_refresh="True" version="0.1.1">
ad6a989924ac Uploaded
bgruening
parents:
diff changeset
2 <description>according to the values of a column</description>
ad6a989924ac Uploaded
bgruening
parents:
diff changeset
3 <requirements>
ad6a989924ac Uploaded
bgruening
parents:
diff changeset
4 <requirement type="package" version="4.1.0">gnu_awk</requirement>
ad6a989924ac Uploaded
bgruening
parents:
diff changeset
5 </requirements>
ad6a989924ac Uploaded
bgruening
parents:
diff changeset
6 <command>
ad6a989924ac Uploaded
bgruening
parents:
diff changeset
7 awk '{print > "primary_${outfile.id}_" \$$column "_visible_${infile.ext}" }' $infile;
ad6a989924ac Uploaded
bgruening
parents:
diff changeset
8 echo 'Created' `ls -l | wc -l` 'files:' > $outfile;
ad6a989924ac Uploaded
bgruening
parents:
diff changeset
9 ls -1 --hide="*_stdout" --hide="*_stderr" >> $outfile;
ad6a989924ac Uploaded
bgruening
parents:
diff changeset
10 </command>
ad6a989924ac Uploaded
bgruening
parents:
diff changeset
11
ad6a989924ac Uploaded
bgruening
parents:
diff changeset
12 <inputs>
ad6a989924ac Uploaded
bgruening
parents:
diff changeset
13 <param format="txt" name="infile" type="data" label="File to select" />
ad6a989924ac Uploaded
bgruening
parents:
diff changeset
14 <param name="column" label="on column" type="data_column" data_ref="infile" accept_default="true" />
ad6a989924ac Uploaded
bgruening
parents:
diff changeset
15 </inputs>
ad6a989924ac Uploaded
bgruening
parents:
diff changeset
16
ad6a989924ac Uploaded
bgruening
parents:
diff changeset
17 <outputs>
ad6a989924ac Uploaded
bgruening
parents:
diff changeset
18 <data format="input" name="outfile" metadata_source="infile" label="${tool.name} on ${on_string}"/>
ad6a989924ac Uploaded
bgruening
parents:
diff changeset
19 </outputs>
ad6a989924ac Uploaded
bgruening
parents:
diff changeset
20 <tests>
ad6a989924ac Uploaded
bgruening
parents:
diff changeset
21 <test>
ad6a989924ac Uploaded
bgruening
parents:
diff changeset
22 </test>
ad6a989924ac Uploaded
bgruening
parents:
diff changeset
23 </tests>
ad6a989924ac Uploaded
bgruening
parents:
diff changeset
24 <help>
ad6a989924ac Uploaded
bgruening
parents:
diff changeset
25
ad6a989924ac Uploaded
bgruening
parents:
diff changeset
26 **What it does**
ad6a989924ac Uploaded
bgruening
parents:
diff changeset
27
ad6a989924ac Uploaded
bgruening
parents:
diff changeset
28 This tool splits a file into different smaller files using a specific column.
ad6a989924ac Uploaded
bgruening
parents:
diff changeset
29 It will work like the group tool, but every group is saved to its own file.
ad6a989924ac Uploaded
bgruening
parents:
diff changeset
30
ad6a989924ac Uploaded
bgruening
parents:
diff changeset
31 -----
ad6a989924ac Uploaded
bgruening
parents:
diff changeset
32
ad6a989924ac Uploaded
bgruening
parents:
diff changeset
33 **Example**
ad6a989924ac Uploaded
bgruening
parents:
diff changeset
34
ad6a989924ac Uploaded
bgruening
parents:
diff changeset
35 Splitting on column 4 from this::
ad6a989924ac Uploaded
bgruening
parents:
diff changeset
36
ad6a989924ac Uploaded
bgruening
parents:
diff changeset
37 chr7 56632 56652 cluster 1
ad6a989924ac Uploaded
bgruening
parents:
diff changeset
38 chr7 56736 56756 cluster 1
ad6a989924ac Uploaded
bgruening
parents:
diff changeset
39 chr7 56761 56781 cluster 2
ad6a989924ac Uploaded
bgruening
parents:
diff changeset
40 chr7 56772 56792 cluster 2
ad6a989924ac Uploaded
bgruening
parents:
diff changeset
41 chr7 56775 56795 cluster 2
ad6a989924ac Uploaded
bgruening
parents:
diff changeset
42
ad6a989924ac Uploaded
bgruening
parents:
diff changeset
43 will produce 2 files with different clusters::
ad6a989924ac Uploaded
bgruening
parents:
diff changeset
44
ad6a989924ac Uploaded
bgruening
parents:
diff changeset
45 chr7 56632 56652 cluster 1
ad6a989924ac Uploaded
bgruening
parents:
diff changeset
46 chr7 56736 56756 cluster 1
ad6a989924ac Uploaded
bgruening
parents:
diff changeset
47
ad6a989924ac Uploaded
bgruening
parents:
diff changeset
48
ad6a989924ac Uploaded
bgruening
parents:
diff changeset
49 chr7 56761 56781 cluster 2
ad6a989924ac Uploaded
bgruening
parents:
diff changeset
50 chr7 56772 56792 cluster 2
ad6a989924ac Uploaded
bgruening
parents:
diff changeset
51 chr7 56775 56795 cluster 2
ad6a989924ac Uploaded
bgruening
parents:
diff changeset
52
ad6a989924ac Uploaded
bgruening
parents:
diff changeset
53
ad6a989924ac Uploaded
bgruening
parents:
diff changeset
54 </help>
ad6a989924ac Uploaded
bgruening
parents:
diff changeset
55 </tool>