Mercurial > repos > iuc > homer_gtf_to_annotations
annotate test-data/generate_tests.sh @ 0:8ebb6520eef4 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/homer commit e49d856e0181edc6120220a1b819cba2466a4289"
author | iuc |
---|---|
date | Sun, 08 Aug 2021 11:04:17 +0000 |
parents | |
children | 396cf2c12173 |
rev | line source |
---|---|
0
8ebb6520eef4
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/homer commit e49d856e0181edc6120220a1b819cba2466a4289"
iuc
parents:
diff
changeset
|
1 #! /usr/bin/bash |
8ebb6520eef4
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/homer commit e49d856e0181edc6120220a1b819cba2466a4289"
iuc
parents:
diff
changeset
|
2 ## Generate input data: |
8ebb6520eef4
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/homer commit e49d856e0181edc6120220a1b819cba2466a4289"
iuc
parents:
diff
changeset
|
3 if [ ! -e test-data/small_simplified.gtf ]; then |
8ebb6520eef4
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/homer commit e49d856e0181edc6120220a1b819cba2466a4289"
iuc
parents:
diff
changeset
|
4 wget http://ftp.ensembl.org/pub/release-102/gtf/mus_musculus/Mus_musculus.GRCm38.102.gtf.gz -O /tmp/Mus_musculus.GRCm38.102.gtf.gz |
8ebb6520eef4
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/homer commit e49d856e0181edc6120220a1b819cba2466a4289"
iuc
parents:
diff
changeset
|
5 zcat /tmp/Mus_musculus.GRCm38.102.gtf.gz | head -n 5 > test-data/small.gtf |
8ebb6520eef4
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/homer commit e49d856e0181edc6120220a1b819cba2466a4289"
iuc
parents:
diff
changeset
|
6 zcat /tmp/Mus_musculus.GRCm38.102.gtf.gz | awk -v OFS="\t" -v start=74667792 -v end=74748393 '$1 == "2" && $5 > start && $4 < end{print "chr"$0}' >> test-data/small.gtf |
8ebb6520eef4
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/homer commit e49d856e0181edc6120220a1b819cba2466a4289"
iuc
parents:
diff
changeset
|
7 # annotatePeaks.pl gives different results all time. I need to simplify the gtf. |
8ebb6520eef4
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/homer commit e49d856e0181edc6120220a1b819cba2466a4289"
iuc
parents:
diff
changeset
|
8 cat test-data/small.gtf | grep -v -P "ENSMUST00000152027|ENSMUST00000156342|ENSMUST00000139005|ENSMUST00000144544|ENSMUST00000111982|ENSMUST00000140666|ENSMUST00000190553|ENSMUST00000132326|ENSMUST00000047830|ENSMUST00000047904|ENSMUST00000111980|ENSMUSG00000065500|ENSMUSG00000100642" > test-data/small_simplified.gtf |
8ebb6520eef4
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/homer commit e49d856e0181edc6120220a1b819cba2466a4289"
iuc
parents:
diff
changeset
|
9 fi |
8ebb6520eef4
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/homer commit e49d856e0181edc6120220a1b819cba2466a4289"
iuc
parents:
diff
changeset
|
10 if [ ! -e test-data/CTCF_peaks.bed ]; then |
8ebb6520eef4
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/homer commit e49d856e0181edc6120220a1b819cba2466a4289"
iuc
parents:
diff
changeset
|
11 wget https://raw.githubusercontent.com/lldelisle/scriptsForWilleminEtAl2021/main/CTCF/E12_Limbs_Wt_CTCF_colored.bed -O test-data/CTCF_peaks.bed |
8ebb6520eef4
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/homer commit e49d856e0181edc6120220a1b819cba2466a4289"
iuc
parents:
diff
changeset
|
12 fi |
8ebb6520eef4
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/homer commit e49d856e0181edc6120220a1b819cba2466a4289"
iuc
parents:
diff
changeset
|
13 if [ ! -e test-data/CTCF_peaks_shifted.bed ]; then |
8ebb6520eef4
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/homer commit e49d856e0181edc6120220a1b819cba2466a4289"
iuc
parents:
diff
changeset
|
14 cat test-data/CTCF_peaks.bed | grep "chr2" | awk -v OFS="\t" '{$1="mm10_dna"; $2-=73740000; $3-=73740000; print}' > test-data/CTCF_peaks_shifted.bed |
8ebb6520eef4
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/homer commit e49d856e0181edc6120220a1b819cba2466a4289"
iuc
parents:
diff
changeset
|
15 fi |
8ebb6520eef4
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/homer commit e49d856e0181edc6120220a1b819cba2466a4289"
iuc
parents:
diff
changeset
|
16 # chr2_subset.fa was downloaded from UCSC |
8ebb6520eef4
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/homer commit e49d856e0181edc6120220a1b819cba2466a4289"
iuc
parents:
diff
changeset
|
17 # https://genome.ucsc.edu/cgi-bin/hgc?hgsid=1136019667_XgAJOvV4a3CY4ibCu6RrUcvGxLNo&g=htcGetDna2&table=&i=mixed&o=56694975&l=56694975&r=56714605&getDnaPos=chr2%3A73740000-75787000&db=mm10&hgSeq.cdsExon=1&hgSeq.padding5=0&hgSeq.padding3=0&hgSeq.casing=upper&hgSeq.maskRepeats=on&boolshad.hgSeq.maskRepeats=0&hgSeq.repMasking=lower&boolshad.hgSeq.revComp=0&submit=get+DNA |
8ebb6520eef4
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/homer commit e49d856e0181edc6120220a1b819cba2466a4289"
iuc
parents:
diff
changeset
|
18 |
8ebb6520eef4
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/homer commit e49d856e0181edc6120220a1b819cba2466a4289"
iuc
parents:
diff
changeset
|
19 . <(planemo conda_env homer_gtf_to_annotation.xml) |
8ebb6520eef4
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/homer commit e49d856e0181edc6120220a1b819cba2466a4289"
iuc
parents:
diff
changeset
|
20 echo "$(which homer)" |
8ebb6520eef4
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/homer commit e49d856e0181edc6120220a1b819cba2466a4289"
iuc
parents:
diff
changeset
|
21 ## homer_gtf_to_annotation |
8ebb6520eef4
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/homer commit e49d856e0181edc6120220a1b819cba2466a4289"
iuc
parents:
diff
changeset
|
22 ## First test |
8ebb6520eef4
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/homer commit e49d856e0181edc6120220a1b819cba2466a4289"
iuc
parents:
diff
changeset
|
23 parseGTF.pl test-data/small.gtf ann -features exon start_codon stop_codon > /tmp/annotations.txt |
8ebb6520eef4
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/homer commit e49d856e0181edc6120220a1b819cba2466a4289"
iuc
parents:
diff
changeset
|
24 assignGenomeAnnotation /tmp/annotations.txt /tmp/annotations.txt -prioritize test-data/annotations_default.txt > test-data/annotations_default_stats.txt |
8ebb6520eef4
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/homer commit e49d856e0181edc6120220a1b819cba2466a4289"
iuc
parents:
diff
changeset
|
25 ## Second test |
8ebb6520eef4
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/homer commit e49d856e0181edc6120220a1b819cba2466a4289"
iuc
parents:
diff
changeset
|
26 parseGTF.pl test-data/small.gtf ann -features exon start_codon > /tmp/annotations2.txt |
8ebb6520eef4
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/homer commit e49d856e0181edc6120220a1b819cba2466a4289"
iuc
parents:
diff
changeset
|
27 assignGenomeAnnotation /tmp/annotations2.txt /tmp/annotations2.txt -prioritize test-data/annotations_exon_start.txt > test-data/annotations_exon_start_stats.txt |
8ebb6520eef4
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/homer commit e49d856e0181edc6120220a1b819cba2466a4289"
iuc
parents:
diff
changeset
|
28 ## Third test |
8ebb6520eef4
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/homer commit e49d856e0181edc6120220a1b819cba2466a4289"
iuc
parents:
diff
changeset
|
29 parseGTF.pl test-data/small.gtf ann -features exon start_codon stop_codon -annTSSstartOffset -50 -annTSSendOffset 50 -annTTSstartOffset -50 -annTTSendOffset 50 > /tmp/annotations3.txt |
8ebb6520eef4
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/homer commit e49d856e0181edc6120220a1b819cba2466a4289"
iuc
parents:
diff
changeset
|
30 assignGenomeAnnotation /tmp/annotations3.txt /tmp/annotations3.txt -prioritize test-data/annotations_small_TSSTTS.txt > test-data/annotations_small_TSSTTS_stats.txt |
8ebb6520eef4
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/homer commit e49d856e0181edc6120220a1b819cba2466a4289"
iuc
parents:
diff
changeset
|
31 |
8ebb6520eef4
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/homer commit e49d856e0181edc6120220a1b819cba2466a4289"
iuc
parents:
diff
changeset
|
32 ## For annotatePeaks.pl |
8ebb6520eef4
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/homer commit e49d856e0181edc6120220a1b819cba2466a4289"
iuc
parents:
diff
changeset
|
33 parseGTF.pl test-data/small_simplified.gtf ann -features exon start_codon stop_codon > /tmp/annotations.txt |
8ebb6520eef4
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/homer commit e49d856e0181edc6120220a1b819cba2466a4289"
iuc
parents:
diff
changeset
|
34 assignGenomeAnnotation /tmp/annotations.txt /tmp/annotations.txt -prioritize test-data/annotations_default_simplified.txt |
8ebb6520eef4
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/homer commit e49d856e0181edc6120220a1b819cba2466a4289"
iuc
parents:
diff
changeset
|
35 |
8ebb6520eef4
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/homer commit e49d856e0181edc6120220a1b819cba2466a4289"
iuc
parents:
diff
changeset
|
36 ## homer_annotatePeaks |
8ebb6520eef4
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/homer commit e49d856e0181edc6120220a1b819cba2466a4289"
iuc
parents:
diff
changeset
|
37 ## First test |
8ebb6520eef4
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/homer commit e49d856e0181edc6120220a1b819cba2466a4289"
iuc
parents:
diff
changeset
|
38 annotatePeaks.pl test-data/CTCF_peaks.bed none -gtf test-data/small_simplified.gtf -ann test-data/annotations_default_simplified.txt > test-data/CTCF_peaks_first.txt |
8ebb6520eef4
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/homer commit e49d856e0181edc6120220a1b819cba2466a4289"
iuc
parents:
diff
changeset
|
39 ## Second test |
8ebb6520eef4
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/homer commit e49d856e0181edc6120220a1b819cba2466a4289"
iuc
parents:
diff
changeset
|
40 annotatePeaks.pl test-data/CTCF_peaks.bed none -ann test-data/annotations_default.txt > test-data/CTCF_peaks_second.txt |
8ebb6520eef4
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/homer commit e49d856e0181edc6120220a1b819cba2466a4289"
iuc
parents:
diff
changeset
|
41 ## Third test |
8ebb6520eef4
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/homer commit e49d856e0181edc6120220a1b819cba2466a4289"
iuc
parents:
diff
changeset
|
42 annotatePeaks.pl test-data/CTCF_peaks.bed none -gtf test-data/small_simplified.gtf > test-data/CTCF_peaks_third.txt |
8ebb6520eef4
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/homer commit e49d856e0181edc6120220a1b819cba2466a4289"
iuc
parents:
diff
changeset
|
43 ## Fourth test |
8ebb6520eef4
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/homer commit e49d856e0181edc6120220a1b819cba2466a4289"
iuc
parents:
diff
changeset
|
44 annotatePeaks.pl test-data/fake_phix_peaks.bed test-data/phiX174.fasta -CpG > test-data/phiXcpg.txt |
8ebb6520eef4
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/homer commit e49d856e0181edc6120220a1b819cba2466a4289"
iuc
parents:
diff
changeset
|
45 ## Fifth test |
8ebb6520eef4
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/homer commit e49d856e0181edc6120220a1b819cba2466a4289"
iuc
parents:
diff
changeset
|
46 annotatePeaks.pl test-data/fake_phix_peaks.bed none > test-data/phiX_nothing.txt |
8ebb6520eef4
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/homer commit e49d856e0181edc6120220a1b819cba2466a4289"
iuc
parents:
diff
changeset
|
47 |
8ebb6520eef4
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/homer commit e49d856e0181edc6120220a1b819cba2466a4289"
iuc
parents:
diff
changeset
|
48 ## findMotifsGenome |
8ebb6520eef4
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/homer commit e49d856e0181edc6120220a1b819cba2466a4289"
iuc
parents:
diff
changeset
|
49 # ! Genome preparsing is giving different results... |
8ebb6520eef4
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/homer commit e49d856e0181edc6120220a1b819cba2466a4289"
iuc
parents:
diff
changeset
|
50 findMotifsGenome.pl test-data/fake_phix_peaks.bed test-data/phiX174.fasta fake_phix_peaks_bed_motif |
8ebb6520eef4
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/homer commit e49d856e0181edc6120220a1b819cba2466a4289"
iuc
parents:
diff
changeset
|
51 mv fake_phix_peaks_bed_motif test-data/motif_test1 |
8ebb6520eef4
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/homer commit e49d856e0181edc6120220a1b819cba2466a4289"
iuc
parents:
diff
changeset
|
52 # Thus I needed to use has_text for the other outputs |
8ebb6520eef4
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/homer commit e49d856e0181edc6120220a1b819cba2466a4289"
iuc
parents:
diff
changeset
|
53 # findMotifsGenome.pl test-data/CTCF_peaks_shifted.bed test-data/chr2_subset.fa CTCF_peaks_shifted_bed_motif |
8ebb6520eef4
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/homer commit e49d856e0181edc6120220a1b819cba2466a4289"
iuc
parents:
diff
changeset
|
54 # mv CTCF_peaks_shifted_bed_motif test-data/motif_test2 |
8ebb6520eef4
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/homer commit e49d856e0181edc6120220a1b819cba2466a4289"
iuc
parents:
diff
changeset
|
55 # findMotifsGenome.pl test-data/CTCF_peaks_shifted.bed test-data/chr2_subset.fa CTCF_peaks_shifted_bed_motif -mask |
8ebb6520eef4
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/homer commit e49d856e0181edc6120220a1b819cba2466a4289"
iuc
parents:
diff
changeset
|
56 # mv CTCF_peaks_shifted_bed_motif test-data/motif_test3 |
8ebb6520eef4
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/homer commit e49d856e0181edc6120220a1b819cba2466a4289"
iuc
parents:
diff
changeset
|
57 # findMotifsGenome.pl test-data/CTCF_peaks_shifted.bed test-data/chr2_subset.fa CTCF_peaks_shifted_bed_motif -mset plants -nomotif |
8ebb6520eef4
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/homer commit e49d856e0181edc6120220a1b819cba2466a4289"
iuc
parents:
diff
changeset
|
58 # mv CTCF_peaks_shifted_bed_motif test-data/motif_test4 |
8ebb6520eef4
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/homer commit e49d856e0181edc6120220a1b819cba2466a4289"
iuc
parents:
diff
changeset
|
59 |