annotate test-data.sh @ 3:52a911972c02 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit 8533fe71d1d50f09348da2dc34941724407a1ffe"
author iuc
date Tue, 14 Jul 2020 07:40:31 -0400
parents 2a90d2fd3336
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
2a90d2fd3336 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff changeset
1 #!/usr/bin/env bash
2a90d2fd3336 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff changeset
2
2a90d2fd3336 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff changeset
3 # install conda
2a90d2fd3336 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff changeset
4 if type conda > /dev/null; then
2a90d2fd3336 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff changeset
5 true
2a90d2fd3336 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff changeset
6 else
2a90d2fd3336 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff changeset
7 tmp=$(mktemp -d)
2a90d2fd3336 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff changeset
8 wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
2a90d2fd3336 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff changeset
9 bash Miniconda3-latest-Linux-x86_64.sh -b -p "$tmp/miniconda"
2a90d2fd3336 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff changeset
10 source "$tmp/miniconda/bin/activate"
2a90d2fd3336 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff changeset
11 fi
2a90d2fd3336 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff changeset
12
2a90d2fd3336 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff changeset
13 eval "$(conda shell.bash hook)"
2a90d2fd3336 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff changeset
14
2a90d2fd3336 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff changeset
15 # install conda env
2a90d2fd3336 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff changeset
16 if grep -Fq __bioconductor-dada2@1.14 <<< $(conda env list); then
2a90d2fd3336 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff changeset
17 true
2a90d2fd3336 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff changeset
18 else
2a90d2fd3336 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff changeset
19 conda create -y --quiet --override-channels --channel conda-forge --channel bioconda --channel defaults --name __bioconductor-dada2@1.14 bioconductor-dada2=1.14
2a90d2fd3336 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff changeset
20 fi
2a90d2fd3336 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff changeset
21
2a90d2fd3336 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff changeset
22 conda activate __bioconductor-dada2@1.14
2a90d2fd3336 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff changeset
23
2a90d2fd3336 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff changeset
24 # create test data
2a90d2fd3336 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff changeset
25 cd test-data/
2a90d2fd3336 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff changeset
26
2a90d2fd3336 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff changeset
27 # download Mothur SOP data from zenodo (GTN), same as
2a90d2fd3336 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff changeset
28 # http://www.mothur.org/w/images/d/d6/MiSeqSOPData.zip but stable links
2a90d2fd3336 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff changeset
29 # but file names need to be fixed
2a90d2fd3336 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff changeset
30 wget -nc -O F3D0_S188_L001_R1_001.fastq https://zenodo.org/record/800651/files/F3D0_R1.fastq?download=1
2a90d2fd3336 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff changeset
31 wget -nc -O F3D0_S188_L001_R2_001.fastq https://zenodo.org/record/800651/files/F3D0_R2.fastq?download=1
2a90d2fd3336 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff changeset
32 wget -nc -O F3D141_S207_L001_R1_001.fastq https://zenodo.org/record/800651/files/F3D141_R1.fastq?download=1
2a90d2fd3336 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff changeset
33 wget -nc -O F3D141_S207_L001_R2_001.fastq https://zenodo.org/record/800651/files/F3D141_R2.fastq?download=1
2a90d2fd3336 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff changeset
34
2a90d2fd3336 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff changeset
35 # zip and reduce data to ~ 10% (for speed)
2a90d2fd3336 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff changeset
36 for i in *fastq
2a90d2fd3336 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff changeset
37 do
2a90d2fd3336 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff changeset
38 head -n 3000 "$i" | gzip -c > "$i.gz"
2a90d2fd3336 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff changeset
39 done
2a90d2fd3336 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff changeset
40 rm *fastq
2a90d2fd3336 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff changeset
41
2a90d2fd3336 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff changeset
42 # download data bases from https://zenodo.org/record/158955
2a90d2fd3336 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff changeset
43 # as mentioned in https://benjjneb.github.io/dada2/training.html
2a90d2fd3336 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff changeset
44 wget -nc -O reference.fa.gz https://zenodo.org/record/158955/files/rdp_train_set_14.fa.gz?download=1
2a90d2fd3336 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff changeset
45 wget -nc -O reference_species.fa.gz https://zenodo.org/record/158955/files/rdp_species_assignment_14.fa.gz?download=1
2a90d2fd3336 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff changeset
46
2a90d2fd3336 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff changeset
47 # take ~ 5% of the reference (for speed)
2a90d2fd3336 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff changeset
48 zcat reference.fa.gz | head -n 1000 | gzip -c > t && mv t reference.fa.gz
2a90d2fd3336 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff changeset
49 zcat reference_species.fa.gz | head -n 1000 | gzip -c > t && mv t reference_species.fa.gz
2a90d2fd3336 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff changeset
50
2a90d2fd3336 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff changeset
51
2a90d2fd3336 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff changeset
52 # generate outputs
2a90d2fd3336 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff changeset
53 Rscript gentest.R
2a90d2fd3336 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff changeset
54
2a90d2fd3336 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff changeset
55 conda deactivate
2a90d2fd3336 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff changeset
56
2a90d2fd3336 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff changeset
57 # # remove files only needed for test generation
2a90d2fd3336 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f2a33fe115fef9d711112b53136cf7619f1b19be"
iuc
parents:
diff changeset
58 # rm learnErrors_F3D0_R2.pdf dada_F3D0_R2.Rdata