Mercurial > repos > yusuf > extend_bed_regions
changeset 0:cb1c17dddc14 default tip
initial commit
author | Yusuf Ali <ali@yusuf.email> |
---|---|
date | Wed, 25 Mar 2015 13:32:42 -0600 |
parents | |
children | |
files | ExtendBedRegions.xml extend_bed_regions |
diffstat | 2 files changed, 92 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ExtendBedRegions.xml Wed Mar 25 13:32:42 2015 -0600 @@ -0,0 +1,26 @@ +<?xml version="1.0"?> + +<tool id="extend_bed_regions_1" name="Expand or contract regions"> + <description>in a BED file by a fixed number of bases</description> + <version_string>echo 1.0.0</version_string> + <command interpreter="perl">extend_bed_regions $in_bed $extent $strand $out_bed</command> + <inputs> + <param format="bed" name="in_bed" type="data" label="BED file whose regions should be modified"/> + <param name="extent" type="integer" value="50" label="Number of bases to extend each BED region (negative values cause contraction)"/> + <param name="strand" type="select" display="radio" label="Which end to extend?" help="If the BED file contains strand info, extension will be strand specific. Otherwise, the positive strand is assumed"> + <option value="5">5'</option> + <option value="3">3'</option> + <option value="both" selected="true">Both</option> + </param> + </inputs> + <outputs> + <data name="out_bed" format="bed" type="data" label="BED file with extended/contracted regions"/> + </outputs> + + <tests/> + + <help> +This tool expands each regions defined in a BED file by the specified amount, at the specified end(s). If expanded regions overlap, they are merged. + </help> + +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/extend_bed_regions Wed Mar 25 13:32:42 2015 -0600 @@ -0,0 +1,66 @@ +#!/usr/bin/env perl + +use strict; +use warnings; + +# This script adds up to X bases to either end of BED file regions (CAN BE NEGATIVE). The tool will merge regions that overlap after the expansion. +@ARGV == 4 or die "Usage: $0 <input regions.bed> <# bases> <5|3|both> <output.bed>\nWhere 5, 3 or both controls which flank is modified\n"; + +my $extension = $ARGV[1]; +my $ends = $ARGV[2]; + +my %chrs; +my @chr_names; +open(BED, $ARGV[0]) + or die "Cannot open $ARGV[0] for reading: $!\n"; +# assuming input in sorted by start per contig +while(<BED>){ + chomp; + my @F = split /\t/, $_; + my $strand = @F > 5 ? $F[5] : "+"; # is the strand field present? + my $start; + my $stop; + if($strand eq "+"){ + if($ends eq "both" or $ends eq "5"){ + $start = $F[1]-$extension; + $start = 1 if $start < 1; + } + if($ends eq "both" or $ends eq "3"){ + $stop = $F[2]+$extension; # can't check if okay unless we knew the chr sizes... + } + } + else{ + # negative strand + if($ends eq "both" or $ends eq "3"){ + $start = $F[1]-$extension; + $start = 1 if $start < 1; + } + if($ends eq "both" or $ends eq "5"){ + $stop = $F[2]+$extension; # can't check if okay unless we knew the chr sizes... + } + } + if(not exists $chrs{$F[0]}){ + $chrs{$F[0]} = [[$start,$stop]]; + push @chr_names, $F[0]; + next; + } + my $last_interval = $chrs{$F[0]}->[$#{$chrs{$F[0]}}]; + if($start <= $last_interval->[1]){ #overlaps previous + if($stop > $last_interval->[1]){ # extends beyond previous + $last_interval->[1] = $stop; + } #else it doesn't contribute new bases + } + else{ # standalone + push @{$chrs{$F[0]}}, [$start, $stop]; + } +} +close(BED); + +open(OUTPUT_BED, ">$ARGV[3]") + or die "Cannot open $ARGV[3] for writing: $!\n"; +for my $chr_name (@chr_names){ + for my $interval (@{$chrs{$chr_name}}){ + print OUTPUT_BED join("\t",$chr_name,$interval->[0],$interval->[1]), "\n"; + } +} +close(OUTPUT_BED);