0
|
1 <tool id="sampline" name="sample">
|
|
2 <description>records from a file</description>
|
|
3 <command interpreter="python">sampline.py --input=$input --output=$out_file1 --nSample=$nSample --recSize=$recSize --nSkip=$nSkip $replacement</command>
|
|
4 <inputs>
|
|
5 <param name="input" format="txt" type="data" label="Original file"/>
|
|
6 <param name="nSample" size="10" type="integer" value="100" label="Number of records to sample"/>
|
|
7 <param name="recSize" size="10" type="integer" value="1" label="Number of lines per record"/>
|
|
8 <param name="nSkip" size="10" type="integer" value="0" label="Number of top lines to output directly (without sampling)"/>
|
|
9 <param name="replacement" label="Sampling with replacement" type="boolean" truevalue="--replacement" falsevalue="" checked="False"/>
|
|
10 </inputs>
|
|
11 <outputs>
|
|
12 <data format="input" name="out_file1" />
|
|
13 </outputs>
|
|
14 <tests>
|
|
15 <test>
|
|
16 <output name="out_file1" file="testmap.sampled"/>
|
|
17 <param name="input" value="test.map" ftype="TXT"/>
|
|
18 <param name="nSample" value="100"/>
|
|
19 <param name="recSize" value="1" />
|
|
20 <param name="nSkip" value="0" />
|
|
21 <param name="replacement" value=""/>
|
|
22 </test>
|
|
23 </tests>
|
|
24 <help>
|
|
25
|
|
26 **What it does**
|
|
27
|
|
28 This tool selects random records from a file. Each record is defined by a fixed number of lines.
|
|
29
|
|
30 - When doing over-sampling, --replacement option is enforced by default.
|
|
31
|
|
32 -----
|
|
33
|
|
34 **Example 1: sampling from a BED file**
|
|
35
|
|
36 parameters::
|
|
37
|
|
38 1 line per record, sampling 5 lines, without replacement, output line 1 (track name) directly
|
|
39
|
|
40 Input::
|
|
41
|
|
42 track name=test.bed
|
|
43 chr1 148078400 148078582 CCDS993.1_cds_0_0_chr1_148078401_r 0 -
|
|
44 chr11 116124407 116124501 CCDS8374.1_cds_0_0_chr11_116124408_r 0 -
|
|
45 chr15 41826029 41826196 CCDS10101.1_cds_0_0_chr15_41826030_f 0 +
|
|
46 chr16 142908 143003 CCDS10397.1_cds_0_0_chr16_142909_f 0 +
|
|
47 chr2 220229609 220230869 CCDS2443.1_cds_0_0_chr2_220229610_r 0 -
|
|
48 chr20 33579500 33579527 CCDS13256.1_cds_0_0_chr20_33579501_r 0 -
|
|
49 chr20 33593260 33593348 CCDS13257.1_cds_0_0_chr20_33593261_f 0 +
|
|
50 chr5 131621326 131621419 CCDS4152.1_cds_0_0_chr5_131621327_f 0 +
|
|
51 chr7 113660517 113660685 CCDS5760.1_cds_0_0_chr7_113660518_f 0 +
|
|
52 chrX 152648964 152649196 CCDS14733.1_cds_0_0_chrX_152648965_r 0 -
|
|
53
|
|
54 Output::
|
|
55
|
|
56 track name=test.bed
|
|
57 chr11 116124407 116124501 CCDS8374.1_cds_0_0_chr11_116124408_r 0 -
|
|
58 chr16 142908 143003 CCDS10397.1_cds_0_0_chr16_142909_f 0 +
|
|
59 chr20 33579500 33579527 CCDS13256.1_cds_0_0_chr20_33579501_r 0 -
|
|
60 chr20 33593260 33593348 CCDS13257.1_cds_0_0_chr20_33593261_f 0 +
|
|
61 chr5 131621326 131621419 CCDS4152.1_cds_0_0_chr5_131621327_f 0 +
|
|
62
|
|
63 **Example 2: sampling reads from a fastq file**
|
|
64
|
|
65 parameters::
|
|
66
|
|
67 4 line per record, sampling 3 records, without replacement
|
|
68
|
|
69 Input::
|
|
70
|
|
71 @SRR066787.2496 WICMT-SOLEXA:8:1:28:2047 length=36
|
|
72 NNANNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
|
|
73 +SRR066787.2496 WICMT-SOLEXA:8:1:28:2047 length=36
|
|
74 !!%!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
|
75 @SRR066787.2497 WICMT-SOLEXA:8:1:28:463 length=36
|
|
76 GTGATTAAGAAGAGACTGGCATCACTAAGGTGACAT
|
|
77 +SRR066787.2497 WICMT-SOLEXA:8:1:28:463 length=36
|
|
78 @A=BBCBBAA@:@:@@@:,?AB:B?BB=*2:@=?AA
|
|
79 @SRR066787.2498 WICMT-SOLEXA:8:1:28:704 length=36
|
|
80 GAACCCAATTTTCAAAGAAGTGTGACTGCTTGTTTC
|
|
81 +SRR066787.2498 WICMT-SOLEXA:8:1:28:704 length=36
|
|
82 =?BAABBACCCCAA9>>A=>A?A;;@A>ABBABBB:
|
|
83 @SRR066787.2499 WICMT-SOLEXA:8:1:28:997 length=36
|
|
84 CGACTTCAGGCTCTCGCTAGCCTTCGCTTGACTGAC
|
|
85 +SRR066787.2499 WICMT-SOLEXA:8:1:28:997 length=36
|
|
86 BCCBCCB?A1ACAC>;@CCAAABB?8=BA>@?B?@:
|
|
87 @SRR066787.2500 WICMT-SOLEXA:8:1:28:582 length=36
|
|
88 TCTCTCTCTTTCTCTCTCTCTCTCTCTCTCTCTCTC
|
|
89 +SRR066787.2500 WICMT-SOLEXA:8:1:28:582 length=36
|
|
90 ?.?.=9C8CCC:BACBCBC?CCC@CBBBCBBACAC8
|
|
91
|
|
92 Output::
|
|
93
|
|
94 @SRR066787.2497 WICMT-SOLEXA:8:1:28:463 length=36
|
|
95 GTGATTAAGAAGAGACTGGCATCACTAAGGTGACAT
|
|
96 +SRR066787.2497 WICMT-SOLEXA:8:1:28:463 length=36
|
|
97 @A=BBCBBAA@:@:@@@:,?AB:B?BB=*2:@=?AA
|
|
98 @SRR066787.2499 WICMT-SOLEXA:8:1:28:997 length=36
|
|
99 CGACTTCAGGCTCTCGCTAGCCTTCGCTTGACTGAC
|
|
100 +SRR066787.2499 WICMT-SOLEXA:8:1:28:997 length=36
|
|
101 BCCBCCB?A1ACAC>;@CCAAABB?8=BA>@?B?@:
|
|
102 @SRR066787.2500 WICMT-SOLEXA:8:1:28:582 length=36
|
|
103 TCTCTCTCTTTCTCTCTCTCTCTCTCTCTCTCTCTC
|
|
104 +SRR066787.2500 WICMT-SOLEXA:8:1:28:582 length=36
|
|
105 ?.?.=9C8CCC:BACBCBC?CCC@CBBBCBBACAC8
|
|
106
|
|
107 </help>
|
|
108 </tool>
|