comparison macs2npk.sh @ 3:72be1ab49f77 draft

Uploaded
author modencode-dcc
date Fri, 18 Jan 2013 11:18:11 -0500
parents
children
comparison
equal deleted inserted replaced
2:8f4f3945dea7 3:72be1ab49f77
1 #!/bin/bash
2 # Converts macs xls output to narrowPeak output
3
4 # Command Usage: macs2npk.sh INPUTFILE OUTPUTFILE
5
6 if [[ "$#" -lt 1 ]]
7 then
8 echo $(basename $0) 1>&2
9 echo "Converts MACS peak caller xls output file to narrowPeak format" 1>&2
10 echo "USAGE:" 1>&2
11 echo "$(basename $0) <MACSXlsFile> <outputDir>" 1>&2
12 exit 1
13 fi
14
15 MACSFILE=$1
16 if [[ ! -e ${MACSFILE} ]]
17 then
18 echo "MACS xls file ${MACSFILE} does not exist" 1>&2
19 exit 1
20 fi
21 # ODIR=$(dirname ${MACSFILE})
22 # [[ $# -gt 1 ]] && ODIR=$2
23 # if [[ ! -d ${ODIR} ]]
24 # then
25 # echo "Output directory ${ODIR} does not exist" 1>&2
26 # exit 1
27 # fi
28
29 # OFILE="${ODIR}/$(echo $(basename ${MACSFILE} '_peaks.xls')).regionPeak.gz"
30 OFILE="${2}"
31
32 # XLS format
33 # chr start stop length summit tags -10log10(pvalue) fold_enrichment %FDR
34
35 # narrowPeak format
36 # chr start stop name score strand signalValue -log10(pValue) -log10(qValue) summit
37
38 # Remove comments #
39 # Remove empty lines
40 # Remove header
41 # Sort by p-value and then rearrange columns
42 # adjust start coordinates
43
44 # Check if header has FDR column
45 header=$(sed -r -e '/^#/d' -e '/^$/d' "${MACSFILE}" | head -1)
46 hasFdr=0
47 echo ${header} | grep -q 'FDR' && hasFDR=1
48
49 if [[ ${hasFDR} -eq 1 ]]
50 then
51 sed -r -e '/^#/d' -e '/^$/d' "${MACSFILE}" | \
52 sed 1d | \
53 sort -k7nr,7nr | \
54 awk '$2 < 1 {$2=1} {printf "%s\t%d\t%d\t%d\t%s\t.\t%s\t%s\t%f\t%d\n",$1,$2-1,$3,NR,$6,$8,$7/10,-log(($9+1e-30)/100)/log(10),$5}' > ${OFILE}
55 else
56 sed -r -e '/^#/d' -e '/^$/d' "${MACSFILE}" | \
57 sed 1d | \
58 sort -k7nr,7nr | \
59 awk '$2 < 1 {$2=1} {printf "%s\t%d\t%d\t%d\t%s\t.\t%s\t%s\t-1\t%d\n",$1,$2-1,$3,NR,$6,$8,$7/10,$5}' > ${OFILE}
60 fi