annotate commandline_sample_STR-FM_shortread_profiling @ 8:808bd6c3ac71 draft

Uploaded
author arkarachai-fungtammasan
date Mon, 24 Aug 2015 13:53:50 -0400
parents d5ed5c2e25c3
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
1 ## This is a sample PBS script for profiling STR from short read using STR-FM version 2.0.0 (April 20, 2015)
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
2 ##
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
3 ##requirement
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
4 ##1 fastq input in sangerfq Phred scale --> ${INPUT}.fastq
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
5 ##2 index of mapping program (bwa, bowtie, etc)
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
6 ##3 location of all STR in reference genome (use PBS script name "sampleSTR_reference_profiling.txt) --> /path/to/STR/in/reference/genome.TR (you can make 4 separated TR files for 4 types of STRs)
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
7 ##4 reference genome in FASTA and in 2bit file --> /path/to/2bit/ref.2bit (use utility from UCSC genome browser to create 2bit file version of reference genome)
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
8 ##5 local Galaxy (available from Galaxy website for Mac and Unix computer)
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
9 ##6 STR error rates (can be downloaded from https://usegalaxy.org/u/guru%40psu.edu/h/error-rates-files) --> errorrate.bymajorallele
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
10 ##
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
11 echo " "
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
12 echo " "
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
13 echo "Job started on `hostname` at `date`"
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
14 ref=/path/to/reference/sequence/and/bwa/index/ref.fa
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
15 export PYTHONPATH=/path/to/galaxy-dist/lib/
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
16 galaxydir=/path/to/galaxy-dist/tools
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
17 cd /working/directory/
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
18 echo " "
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
19 echo " detect STR in short read" ## See detail in microsatellite.xml on https://github.com/Arkarachai/STR-FM
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
20 python microsatellite.py ${INPUT}.fastq --fastq --period=1 --partialmotifs --minlength=5 --prefix=20 --suffix=20 --hamming=0 --multipleruns >${INPUT}.mono.out
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
21 python microsatellite.py ${INPUT}.fastq --fastq --period=2 --partialmotifs --minlength=6 --prefix=20 --suffix=20 --hamming=0 --multipleruns >${INPUT}.di.out
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
22 python microsatellite.py ${INPUT}.fastq --fastq --period=3 --partialmotifs --minlength=9 --prefix=20 --suffix=20 --hamming=0 --multipleruns >${INPUT}.tri.out
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
23 python microsatellite.py ${INPUT}.fastq --fastq --period=4 --partialmotifs --minlength=12 --prefix=20 --suffix=20 --hamming=0 --multipleruns >${INPUT}.tetra.out
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
24
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
25 echo "change read name at " ## See detail in space2underscore_readname.xml on https://github.com/Arkarachai/STR-FM
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
26 python changespacetounderscore_readname.py ${INPUT}.mono.out ${INPUT}.mono.new 6
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
27 python changespacetounderscore_readname.py ${INPUT}.di.out ${INPUT}.di.new 6
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
28 python changespacetounderscore_readname.py ${INPUT}.tri.out ${INPUT}.tri.new 6
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
29 python changespacetounderscore_readname.py ${INPUT}.tetra.out ${INPUT}.tetra.new 6
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
30
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
31 echo "start fetch flanking at `date`" ## See detail in fetchflank.xml on https://github.com/Arkarachai/STR-FM
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
32 python pair_fetch_DNA_ff.py ${INPUT}.mono.new ${INPUT}.mono_ff_L.txt ${INPUT}.mono_ff_R.txt 20 20
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
33 python pair_fetch_DNA_ff.py ${INPUT}.di.new ${INPUT}.di_ff_L.txt ${INPUT}.di_ff_R.txt 20 20
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
34 python pair_fetch_DNA_ff.py ${INPUT}.tri.new ${INPUT}.tri_ff_L.txt ${INPUT}.tri_ff_R.txt 20 20
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
35 python pair_fetch_DNA_ff.py ${INPUT}.tetra.new ${INPUT}.tetra_ff_L.txt ${INPUT}.tetra_ff_R.txt 20 20
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
36
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
37 echo "BWA uniquely mapped no indel no deletion "
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
38 bwa aln -n 0 -o 0 ${ref} ${INPUT}.mono_ff_L.txt > ${INPUT}.mono_ff_L.sai
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
39 bwa aln -n 0 -o 0 ${ref} ${INPUT}.mono_ff_R.txt > ${INPUT}.mono_ff_R.sai
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
40 bwa sampe ${ref} ${INPUT}.mono_ff_L.sai ${INPUT}.mono_ff_R.sai ${INPUT}.mono_ff_L.txt ${INPUT}.mono_ff_R.txt > ${INPUT}.mono.sam
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
41 samtools view -Sb -F 12 -q 1 ${INPUT}.mono.sam > ${INPUT}.mono.n.all.bam
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
42 bwa aln -n 0 -o 0 ${ref} ${INPUT}.di_ff_L.txt > ${INPUT}.di_ff_L.sai
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
43 bwa aln -n 0 -o 0 ${ref} ${INPUT}.di_ff_R.txt > ${INPUT}.di_ff_R.sai
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
44 bwa sampe ${ref} ${INPUT}.di_ff_L.sai ${INPUT}.di_ff_R.sai ${INPUT}.di_ff_L.txt ${INPUT}.di_ff_R.txt > ${INPUT}.di.sam
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
45 samtools view -Sb -F 12 -q 1 ${INPUT}.di.sam > ${INPUT}.di.n.all.bam
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
46 bwa aln -n 0 -o 0 ${ref} ${INPUT}.tri_ff_L.txt > ${INPUT}.tri_ff_L.sai
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
47 bwa aln -n 0 -o 0 ${ref} ${INPUT}.tri_ff_R.txt > ${INPUT}.tri_ff_R.sai
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
48 bwa sampe ${ref} ${INPUT}.tri_ff_L.sai ${INPUT}.tri_ff_R.sai ${INPUT}.tri_ff_L.txt ${INPUT}.tri_ff_R.txt > ${INPUT}.tri.sam
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
49 samtools view -Sb -F 12 -q 1 ${INPUT}.tri.sam > ${INPUT}.tri.n.all.bam
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
50 bwa aln -n 0 -o 0 ${ref} ${INPUT}.tetra_ff_L.txt > ${INPUT}.tetra_ff_L.sai
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
51 bwa aln -n 0 -o 0 ${ref} ${INPUT}.tetra_ff_R.txt > ${INPUT}.tetra_ff_R.sai
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
52 bwa sampe ${ref} ${INPUT}.tetra_ff_L.sai ${INPUT}.tetra_ff_R.sai ${INPUT}.tetra_ff_L.txt ${INPUT}.tetra_ff_R.txt > ${INPUT}.tetra.sam
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
53 samtools view -Sb -F 12 -q 1 ${INPUT}.tetra.sam > ${INPUT}.tetra.n.all.bam
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
54
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
55 echo "sort result by read name"
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
56 samtools sort -n ${INPUT}.mono.n.all.bam ${INPUT}.mono.n.sorted.all
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
57 samtools sort -n ${INPUT}.di.n.all.bam ${INPUT}.di.n.sorted.all
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
58 samtools sort -n ${INPUT}.tri.n.all.bam ${INPUT}.tri.n.sorted.all
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
59 samtools sort -n ${INPUT}.tetra.n.all.bam ${INPUT}.tetra.n.sorted.all
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
60 samtools view -h -o ${INPUT}.mono.n.sorted.all.sam ${INPUT}.mono.n.sorted.all.bam
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
61 samtools view -h -o ${INPUT}.di.n.sorted.all.sam ${INPUT}.di.n.sorted.all.bam
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
62 samtools view -h -o ${INPUT}.tri.n.sorted.all.sam ${INPUT}.tri.n.sorted.all.bam
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
63 samtools view -h -o ${INPUT}.tetra.n.sorted.all.sam ${INPUT}.tetra.n.sorted.all.bam
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
64
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
65 echo "merge faux paired end reads" ## See detail in PEsortedSAM2readprofile.xml on https://github.com/Arkarachai/STR-FM
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
66 python PEsortedSAM2readprofile.py ${INPUT}.mono.n.sorted.all.sam /path/to/2bit/ref.2bit 100 250 ${INPUT}.mono.RF
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
67 python PEsortedSAM2readprofile.py ${INPUT}.di.n.sorted.all.sam /path/to/2bit/ref.2bit 100 250 ${INPUT}.mono.RF
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
68 python PEsortedSAM2readprofile.py ${INPUT}.tri.n.sorted.all.sam /path/to/2bit/ref.2bit 100 250 ${INPUT}.mono.RF
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
69 python PEsortedSAM2readprofile.py ${INPUT}.tetra.n.sorted.all.sam /path/to/2bit/ref.2bit 100 250 ${INPUT}.mono.RF
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
70
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
71 echo "join mapped coordinate with STR length using read name"
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
72 python ${galaxydir}/filters/join.py ${INPUT}.mono.new ${INPUT}.mono.RF 6 1 ${INPUT}.mono.RF.j "" "" --index_depth=3 --buffer=50000000 --fill_options_file='None'
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
73 python ${galaxydir}/filters/join.py ${INPUT}.di.new ${INPUT}.di.RF 6 1 ${INPUT}.mono.RF.j "" "" --index_depth=3 --buffer=50000000 --fill_options_file='None'
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
74 python ${galaxydir}/filters/join.py ${INPUT}.tri.new ${INPUT}.tri.RF 6 1 ${INPUT}.mono.RF.j "" "" --index_depth=3 --buffer=50000000 --fill_options_file='None'
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
75 python ${galaxydir}/filters/join.py ${INPUT}.tetra.new ${INPUT}.tetra.RF 6 1 ${INPUT}.mono.RF.j "" "" --index_depth=3 --buffer=50000000 --fill_options_file='None'
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
76
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
77 echo "join mapped coordinate and STR length with STR location in genome"
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
78 python ${galaxydir}/new_operations/gops_join.py /path/to/STR/in/reference/genome.TR ${INPUT}.mono.RF.j ${INPUT}.mono.gop -1 1,2,3,0 -2 10,13,14,0 -m 1 -f
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
79 python ${galaxydir}/new_operations/gops_join.py /path/to/STR/in/reference/genome.TR ${INPUT}.di.RF.j ${INPUT}.di.gop -1 1,2,3,0 -2 10,13,14,0 -m 1 -f
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
80 python ${galaxydir}/new_operations/gops_join.py /path/to/STR/in/reference/genome.TR ${INPUT}.tri.RF.j ${INPUT}.tri.gop -1 1,2,3,0 -2 10,13,14,0 -m 1 -f
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
81 python ${galaxydir}/new_operations/gops_join.py /path/to/STR/in/reference/genome.TR ${INPUT}.tetra.RF.j ${INPUT}.tetra.gop -1 1,2,3,0 -2 10,13,14,0 -m 1 -f
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
82
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
83 echo "remove incompatible motif (remove incorrect mapped reads given that there is no STR motif difference from reference genome)" ## See detail in microsatcompat.xml on https://github.com/Arkarachai/STR-FM
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
84 python microsatcompat.py ${INPUT}.mono.gop 4 10 > ${INPUT}.mono.fulltable1
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
85 python microsatcompat.py ${INPUT}.di.gop 4 10 > ${INPUT}.di.fulltable1
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
86 python microsatcompat.py ${INPUT}.tri.gop 4 10 > ${INPUT}.tri.fulltable1
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
87 python microsatcompat.py ${INPUT}.tetra.gop 4 10 > ${INPUT}.tetra.fulltable1
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
88
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
89 echo "remove shifting flanking location (remove cases that come from STR interruption or flanking bases are misread as STRs)"
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
90 cat ${INPUT}.mono.fulltable1 | awk '($19==$2) && ($20==$3) {print $0}' > ${INPUT}.mono.fulltable2
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
91 cat ${INPUT}.di.fulltable1 | awk '($19==$2) && ($20==$3) {print $0}' > ${INPUT}.di.fulltable2
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
92 cat ${INPUT}.tri.fulltable1 | awk '($19==$2) && ($20==$3) {print $0}' > ${INPUT}.tri.fulltable2
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
93 cat ${INPUT}.tetra.fulltable1 | awk '($19==$2) && ($20==$3) {print $0}' > ${INPUT}.tetra.fulltable2
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
94
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
95 echo "keep only column that are necessary for profiling"
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
96 cat ${INPUT}.mono.fulltable2| cut -f 1,2,3,4,5,7 | sort -k 1n,1 -k 2n,2 -k 3n,3 > ${INPUT}.mono.cuttmp0
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
97 cat ${INPUT}.di.fulltable2| cut -f 1,2,3,4,5,7 | sort -k 1n,1 -k 2n,2 -k 3n,3 > ${INPUT}.di.cuttmp0
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
98 cat ${INPUT}.tri.fulltable2| cut -f 1,2,3,4,5,7 | sort -k 1n,1 -k 2n,2 -k 3n,3 > ${INPUT}.tri.cuttmp0
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
99 cat ${INPUT}.tetra.fulltable2| cut -f 1,2,3,4,5,7 | sort -k 1n,1 -k 2n,2 -k 3n,3 > ${INPUT}.tetra.cuttmp0
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
100
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
101 echo "If you multiple analysis by splitting initial fastq, you should merge (cat) all results from the same sample after this step"
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
102
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
103 echo "create genomic coordinate column and group by that column"
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
104 perl ${galaxydir}/filters/fixedValueColumn.pl ${INPUT}.mono.cuttmp0 ${INPUT}.mono.cuttmp1 "_" "no"
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
105 python ${galaxydir}/filters/mergeCols.py ${INPUT}.mono.cuttmp1 ${INPUT}.mono.cuttmp2 1 7 2 7 3
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
106 python ${galaxydir}/stats/grouping.py ${INPUT}.mono.cuttmp3 ${INPUT}.mono.cuttmp2 8 0 'cat 6 0' 'cat_uniq 4 0'
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
107 perl ${galaxydir}/filters/fixedValueColumn.pl ${INPUT}.di.cuttmp0 ${INPUT}.di.cuttmp1 "_" "no"
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
108 python ${galaxydir}/filters/mergeCols.py ${INPUT}.di.cuttmp1 ${INPUT}.di.cuttmp2 1 7 2 7 3
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
109 python ${galaxydir}/stats/grouping.py ${INPUT}.di.cuttmp3 ${INPUT}.di.cuttmp2 8 0 'cat 6 0' 'cat_uniq 4 0'
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
110 perl ${galaxydir}/filters/fixedValueColumn.pl ${INPUT}.tri.cuttmp0 ${INPUT}.tri.cuttmp1 "_" "no"
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
111 python ${galaxydir}/filters/mergeCols.py ${INPUT}.tri.cuttmp1 ${INPUT}.tri.cuttmp2 1 7 2 7 3
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
112 python ${galaxydir}/stats/grouping.py ${INPUT}.tri.cuttmp3 ${INPUT}.tri.cuttmp2 8 0 'cat 6 0' 'cat_uniq 4 0'
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
113 perl ${galaxydir}/filters/fixedValueColumn.pl ${INPUT}.tetra.cuttmp0 ${INPUT}.tetra.cuttmp1 "_" "no"
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
114 python ${galaxydir}/filters/mergeCols.py ${INPUT}.tetra.cuttmp1 ${INPUT}.tetra.cuttmp2 1 7 2 7 3
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
115 python ${galaxydir}/stats/grouping.py ${INPUT}.tetra.cuttmp3 ${INPUT}.tetra.cuttmp2 8 0 'cat 6 0' 'cat_uniq 4 0'
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
116
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
117 echo "you may filter for minimum sequencing depth here"
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
118
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
119 echo "genotyping using error correction model" ## See detail in GenotypingSTR.xml on https://github.com/Arkarachai/STR-FM
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
120 cat ${INPUT}.mono.cuttmp2 ${INPUT}.di.cuttmp2 ${INPUT}.tri.cuttmp2 ${INPUT}.tetra.cuttmp2 > ${INPUT}.step5
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
121 python GenotypeTRcorrection.py ${INPUT}.step5 errorrate.bymajorallele ${INPUT}.step5.result 0.5
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
122 ## final output is ${INPUT}.step5.result
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
123
d5ed5c2e25c3 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
124 echo "Job end on `hostname` at `date`"