3
|
1 #!/bin/bash
|
|
2 # Converts macs xls output to narrowPeak output
|
|
3
|
|
4 # Command Usage: macs2npk.sh INPUTFILE OUTPUTFILE
|
|
5
|
|
6 if [[ "$#" -lt 1 ]]
|
|
7 then
|
|
8 echo $(basename $0) 1>&2
|
|
9 echo "Converts MACS peak caller xls output file to narrowPeak format" 1>&2
|
|
10 echo "USAGE:" 1>&2
|
|
11 echo "$(basename $0) <MACSXlsFile> <outputDir>" 1>&2
|
|
12 exit 1
|
|
13 fi
|
|
14
|
|
15 MACSFILE=$1
|
|
16 if [[ ! -e ${MACSFILE} ]]
|
|
17 then
|
|
18 echo "MACS xls file ${MACSFILE} does not exist" 1>&2
|
|
19 exit 1
|
|
20 fi
|
|
21 # ODIR=$(dirname ${MACSFILE})
|
|
22 # [[ $# -gt 1 ]] && ODIR=$2
|
|
23 # if [[ ! -d ${ODIR} ]]
|
|
24 # then
|
|
25 # echo "Output directory ${ODIR} does not exist" 1>&2
|
|
26 # exit 1
|
|
27 # fi
|
|
28
|
|
29 # OFILE="${ODIR}/$(echo $(basename ${MACSFILE} '_peaks.xls')).regionPeak.gz"
|
|
30 OFILE="${2}"
|
|
31
|
|
32 # XLS format
|
|
33 # chr start stop length summit tags -10log10(pvalue) fold_enrichment %FDR
|
|
34
|
|
35 # narrowPeak format
|
|
36 # chr start stop name score strand signalValue -log10(pValue) -log10(qValue) summit
|
|
37
|
|
38 # Remove comments #
|
|
39 # Remove empty lines
|
|
40 # Remove header
|
|
41 # Sort by p-value and then rearrange columns
|
|
42 # adjust start coordinates
|
|
43
|
|
44 # Check if header has FDR column
|
|
45 header=$(sed -r -e '/^#/d' -e '/^$/d' "${MACSFILE}" | head -1)
|
|
46 hasFdr=0
|
|
47 echo ${header} | grep -q 'FDR' && hasFDR=1
|
|
48
|
|
49 if [[ ${hasFDR} -eq 1 ]]
|
|
50 then
|
|
51 sed -r -e '/^#/d' -e '/^$/d' "${MACSFILE}" | \
|
|
52 sed 1d | \
|
|
53 sort -k7nr,7nr | \
|
|
54 awk '$2 < 1 {$2=1} {printf "%s\t%d\t%d\t%d\t%s\t.\t%s\t%s\t%f\t%d\n",$1,$2-1,$3,NR,$6,$8,$7/10,-log(($9+1e-30)/100)/log(10),$5}' > ${OFILE}
|
|
55 else
|
|
56 sed -r -e '/^#/d' -e '/^$/d' "${MACSFILE}" | \
|
|
57 sed 1d | \
|
|
58 sort -k7nr,7nr | \
|
|
59 awk '$2 < 1 {$2=1} {printf "%s\t%d\t%d\t%d\t%s\t.\t%s\t%s\t-1\t%d\n",$1,$2-1,$3,NR,$6,$8,$7/10,$5}' > ${OFILE}
|
|
60 fi
|