Mercurial > repos > iuc > kma
annotate test-data/generate_test_data.sh @ 2:98099e4da1fd draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kma commit 812bb0a31f030096bf1b0836f64e76b820c8e2c2
author | iuc |
---|---|
date | Fri, 14 Jun 2024 21:15:44 +0000 |
parents | 2595c27071c2 |
children |
rev | line source |
---|---|
0
2595c27071c2
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kma commit 43bbde4f8f8671284b2acb21dfd2657de4ba967f"
iuc
parents:
diff
changeset
|
1 #!/usr/bin/bash |
2595c27071c2
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kma commit 43bbde4f8f8671284b2acb21dfd2657de4ba967f"
iuc
parents:
diff
changeset
|
2 |
2595c27071c2
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kma commit 43bbde4f8f8671284b2acb21dfd2657de4ba967f"
iuc
parents:
diff
changeset
|
3 # E. coli locus b0842 (b0842.fasta.gz) downloaded from Enterobase E. coli cgMLST scheme |
2595c27071c2
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kma commit 43bbde4f8f8671284b2acb21dfd2657de4ba967f"
iuc
parents:
diff
changeset
|
4 # requires: wget, kma, bwa, samtools, bedtools |
2595c27071c2
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kma commit 43bbde4f8f8671284b2acb21dfd2657de4ba967f"
iuc
parents:
diff
changeset
|
5 |
2595c27071c2
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kma commit 43bbde4f8f8671284b2acb21dfd2657de4ba967f"
iuc
parents:
diff
changeset
|
6 gunzip b0842.fasta.gz |
2595c27071c2
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kma commit 43bbde4f8f8671284b2acb21dfd2657de4ba967f"
iuc
parents:
diff
changeset
|
7 |
2595c27071c2
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kma commit 43bbde4f8f8671284b2acb21dfd2657de4ba967f"
iuc
parents:
diff
changeset
|
8 # Take first 5 alleles to reduce size of test data |
2595c27071c2
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kma commit 43bbde4f8f8671284b2acb21dfd2657de4ba967f"
iuc
parents:
diff
changeset
|
9 mkdir ecoli_cgMLST |
2595c27071c2
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kma commit 43bbde4f8f8671284b2acb21dfd2657de4ba967f"
iuc
parents:
diff
changeset
|
10 head -n 10 b0842.fasta > ecoli_cgMLST/ecoli_b0842_1to5.fasta |
2595c27071c2
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kma commit 43bbde4f8f8671284b2acb21dfd2657de4ba967f"
iuc
parents:
diff
changeset
|
11 |
2595c27071c2
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kma commit 43bbde4f8f8671284b2acb21dfd2657de4ba967f"
iuc
parents:
diff
changeset
|
12 kma index -k 8 -i ecoli_cgMLST/ecoli_b0842_1to5.fasta -o ecoli_cgMLST/ecoli_b0842_1to5 |
2595c27071c2
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kma commit 43bbde4f8f8671284b2acb21dfd2657de4ba967f"
iuc
parents:
diff
changeset
|
13 |
2595c27071c2
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kma commit 43bbde4f8f8671284b2acb21dfd2657de4ba967f"
iuc
parents:
diff
changeset
|
14 wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR884/ERR884056/ERR884056_1.fastq.gz |
2595c27071c2
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kma commit 43bbde4f8f8671284b2acb21dfd2657de4ba967f"
iuc
parents:
diff
changeset
|
15 |
2595c27071c2
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kma commit 43bbde4f8f8671284b2acb21dfd2657de4ba967f"
iuc
parents:
diff
changeset
|
16 # Use bwa to map reads to reduced E. coli locus b0842 |
2595c27071c2
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kma commit 43bbde4f8f8671284b2acb21dfd2657de4ba967f"
iuc
parents:
diff
changeset
|
17 # and extract only mapped reads (to reduce size of test dataset) |
2595c27071c2
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kma commit 43bbde4f8f8671284b2acb21dfd2657de4ba967f"
iuc
parents:
diff
changeset
|
18 bwa index ecoli_cgMLST/ecoli_b0842_1to5.fasta |
2595c27071c2
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kma commit 43bbde4f8f8671284b2acb21dfd2657de4ba967f"
iuc
parents:
diff
changeset
|
19 |
2595c27071c2
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kma commit 43bbde4f8f8671284b2acb21dfd2657de4ba967f"
iuc
parents:
diff
changeset
|
20 bwa mem ecoli_cgMLST/ecoli_b0842_1to5.fasta ERR884056_1.fastq.gz -o ERR884056_1_ecoli_b0842_1to5.sam |
2595c27071c2
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kma commit 43bbde4f8f8671284b2acb21dfd2657de4ba967f"
iuc
parents:
diff
changeset
|
21 |
2595c27071c2
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kma commit 43bbde4f8f8671284b2acb21dfd2657de4ba967f"
iuc
parents:
diff
changeset
|
22 samtools view ERR884056_1_ecoli_b0842_1to5.sam -bo ERR884056_1_ecoli_b0842_1to5.bam |
2595c27071c2
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kma commit 43bbde4f8f8671284b2acb21dfd2657de4ba967f"
iuc
parents:
diff
changeset
|
23 |
2595c27071c2
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kma commit 43bbde4f8f8671284b2acb21dfd2657de4ba967f"
iuc
parents:
diff
changeset
|
24 # Select mapped reads |
2595c27071c2
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kma commit 43bbde4f8f8671284b2acb21dfd2657de4ba967f"
iuc
parents:
diff
changeset
|
25 samtools view -b -F 4 ERR884056_1_ecoli_b0842_1to5.bam > ERR884056_1_ecoli_b0842_1to5.mapped.bam |
2595c27071c2
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kma commit 43bbde4f8f8671284b2acb21dfd2657de4ba967f"
iuc
parents:
diff
changeset
|
26 |
2595c27071c2
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kma commit 43bbde4f8f8671284b2acb21dfd2657de4ba967f"
iuc
parents:
diff
changeset
|
27 samtools sort -n ERR884056_1_ecoli_b0842_1to5.mapped.bam -o ERR884056_1_ecoli_b0842_1to5.mapped.sort.bam |
2595c27071c2
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kma commit 43bbde4f8f8671284b2acb21dfd2657de4ba967f"
iuc
parents:
diff
changeset
|
28 |
2595c27071c2
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kma commit 43bbde4f8f8671284b2acb21dfd2657de4ba967f"
iuc
parents:
diff
changeset
|
29 bedtools bamtofastq -i ERR884056_1_ecoli_b0842_1to5.mapped.sort.bam -fq ERR884056_ecoli_b0842.mapped_R1.fastq |
2595c27071c2
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kma commit 43bbde4f8f8671284b2acb21dfd2657de4ba967f"
iuc
parents:
diff
changeset
|
30 |