Mercurial > repos > iuc > resize_coordinate_window
changeset 0:08b6255afde7 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/resize_coordinate_window commit b'67cff25a50ba173b0468819204d0999496f68ea9'
author | iuc |
---|---|
date | Tue, 19 Jan 2016 09:34:56 -0500 |
parents | |
children | 0164d2edba9f |
files | resize_coordinate_window.py resize_coordinate_window.xml test-data/input.gff test-data/output.gff |
diffstat | 4 files changed, 308 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/resize_coordinate_window.py Tue Jan 19 09:34:56 2016 -0500 @@ -0,0 +1,41 @@ +import argparse +import sys + + +def stop_err( msg ): + sys.stderr.write( msg ) + sys.exit(1) + +parser = argparse.ArgumentParser() +parser.add_argument('--input', dest='input', help="Input dataset") +parser.add_argument('--subtract_from_start', dest='subtract_from_start', type=int, help='Distance to subtract from start.') +parser.add_argument('--add_to_end', dest='add_to_end', type=int, help='Distance to add to end.') +parser.add_argument('--extend_existing', dest='extend_existing', help='Extend existing start/end rather or from computed midpoint.') +parser.add_argument('--output', dest='output', help="Output dataset") +args = parser.parse_args() + +extend_existing = args.extend_existing == 'existing' +out = open(args.output, 'wb') + +for line in open(args.input): + if line.startswith('#'): + continue + items = line.split('\t') + if len(items) != 9: + continue + start = int(items[3]) + end = int(items[4]) + if extend_existing: + start -= args.subtract_from_start + end += args.add_to_end + else: + midpoint = (start + end) // 2 + start = midpoint - args.subtract_from_start + end = midpoint + args.add_to_end + if start < 1: + out.close() + stop_err('Requested expansion places region beyond chromosome bounds.') + new_line = '\t'.join([items[0], items[1], items[2], str(start), str(end), items[5], items[6], items[7], items[8]]) + out.write(new_line) +out.close() +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/resize_coordinate_window.xml Tue Jan 19 09:34:56 2016 -0500 @@ -0,0 +1,67 @@ +<tool id="resize_coordinate_window" name="Resize coordinate window" version="1.0.0"> + <description>of GFF data</description> + <command> + python $__tool_directory__/resize_coordinate_window.py + --input "$input" + --subtract_from_start $subtract_from_start + --add_to_end $add_to_end + --extend_existing $extend_existing + --output "$output" + </command> + <inputs> + <param name="input" type="data" format="gff" label="Gff file" /> + <param name="subtract_from_start" type="integer" value="30" min="0" label="Distance to subtract from the start coordinate"/> + <param name="add_to_end" type="integer" value="30" min="0" label="Distance to add to the end coordinate"/> + <param name="extend_existing" type="select" label="Resize window from" help="The midpoint is computed as (start + end) // 2"> + <option value="midpoint" selected="True">the midpoint of the start and end coordinates</option> + <option value="existing">the start and end coordinates</option> + </param> + </inputs> + <outputs> + <data name="output" format="gff" /> + </outputs> + <tests> + <test> + <param name="input" value="input.gff" ftype="gff" /> + <param name="subtract_from_start" value="13" /> + <param name="add_to_end" value="13" /> + <param name="extend_existing" value="midpoint" /> + <output name="output" file="output.gff" ftype="gff" /> + </test> + </tests> + <help> + +**What it does** + +Modifies the start and end coordinates of GFF data such that the new start and end position is based on a +specified window size that is computed either from the existing start and end coordinates or centered on +the midpoint between them. + +----- + +**Example** + +If the input dataset is:: + + chr1 genetrack . 17 37 918 + . stddev=5.96715849116 + chr1 genetrack . 31 51 245 - . stddev=2.66582799529 + chr1 genetrack . 40 60 2060 + . stddev=2.7859667372 + +Resizing the coordinate window by 13 from the computed midpoint of the start and end coordinates produces:: + + chr1 genetrack . 14 40 918 + . stddev=5.96715849116 + chr1 genetrack . 28 54 245 - . stddev=2.66582799529` + chr1 genetrack . 37 63 2060 + . stddev=2.7859667372 + + </help> + <citations> + <citation type="bibtex"> + @unpublished{None, + author = {}, + title = {None}, + year = {None}, + eprint = {None}, + url = {http://www.huck.psu.edu/content/research/independent-centers-excellence/center-for-eukaryotic-gene-regulation} + }</citation> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/input.gff Tue Jan 19 09:34:56 2016 -0500 @@ -0,0 +1,100 @@ +chr1 genetrack . 17 37 918 + . stddev=5.96715849116 +chr1 genetrack . 31 51 245 - . stddev=2.66582799529 +chr1 genetrack . 40 60 2060 + . stddev=2.7859667372 +chr1 genetrack . 62 82 1300 - . stddev=4.13061337623 +chr1 genetrack . 73 93 397 + . stddev=0.0 +chr1 genetrack . 89 109 521 + . stddev=0.747112137937 +chr1 genetrack . 123 143 5129 + . stddev=3.01025384354 +chr1 genetrack . 125 145 4659 - . stddev=3.8642622228 +chr1 genetrack . 155 175 897 - . stddev=3.22709952671 +chr1 genetrack . 171 191 956 - . stddev=4.95899971687 +chr1 genetrack . 180 200 1527 + . stddev=4.62574275346 +chr1 genetrack . 185 205 494 - . stddev=1.4255957 +chr1 genetrack . 192 212 2538 + . stddev=5.04731591122 +chr1 genetrack . 206 226 2087 - . stddev=3.6160253713 +chr1 genetrack . 238 258 2496 + . stddev=2.11105291581 +chr1 genetrack . 242 262 5047 - . stddev=3.62629343395 +chr1 genetrack . 254 274 1525 + . stddev=4.46082441647 +chr1 genetrack . 281 301 15 + . stddev=1.74610678049 +chr1 genetrack . 302 322 626 - . stddev=0.0 +chr1 genetrack . 308 328 1544 + . stddev=4.43066151722 +chr1 genetrack . 334 354 533 + . stddev=1.34355443899 +chr1 genetrack . 344 364 726 - . stddev=1.36767079956 +chr1 genetrack . 347 367 286 + . stddev=0.0 +chr1 genetrack . 358 378 792 - . stddev=1.47737416556 +chr1 genetrack . 374 394 608 + . stddev=1.44652711793 +chr1 genetrack . 389 409 126 - . stddev=0.471404520791 +chr1 genetrack . 439 459 618 - . stddev=5.47536569145 +chr1 genetrack . 441 461 1393 + . stddev=4.75587332865 +chr1 genetrack . 461 481 754 - . stddev=3.28891288785 +chr1 genetrack . 483 503 58 + . stddev=0.0 +chr1 genetrack . 538 558 1015 - . stddev=0.0 +chr1 genetrack . 728 748 39 - . stddev=0.0 +chr1 genetrack . 757 777 23 + . stddev=0.0 +chr1 genetrack . 799 819 607 + . stddev=0.0 +chr1 genetrack . 844 864 665 + . stddev=0.0 +chr1 genetrack . 877 897 468 + . stddev=0.0 +chr1 genetrack . 903 923 107 - . stddev=0.0 +chr1 genetrack . 944 964 2 - . stddev=0.0 +chr1 genetrack . 1092 1112 740 + . stddev=0.0 +chr1 genetrack . 1127 1147 940 - . stddev=3.96036497305 +chr1 genetrack . 1183 1203 25 + . stddev=0.0 +chr1 genetrack . 1291 1311 454 - . stddev=0.0 +chr1 genetrack . 1329 1349 207 - . stddev=0.0 +chr1 genetrack . 1484 1504 584 + . stddev=0.0 +chr1 genetrack . 2075 2095 1181 + . stddev=0.0 +chr1 genetrack . 2102 2122 481 + . stddev=0.0455486534308 +chr1 genetrack . 2125 2145 199 - . stddev=0.0 +chr1 genetrack . 2452 2472 1246 + . stddev=0.0 +chr1 genetrack . 2602 2622 34 + . stddev=0.0 +chr1 genetrack . 2833 2853 1062 + . stddev=1.01561431542 +chr1 genetrack . 2838 2858 1144 - . stddev=1.09438744148 +chr1 genetrack . 3011 3031 1212 - . stddev=0.0 +chr1 genetrack . 3116 3136 555 - . stddev=0.0 +chr1 genetrack . 3130 3150 17 + . stddev=0.0 +chr1 genetrack . 3378 3398 525 - . stddev=0.0 +chr1 genetrack . 3669 3689 845 + . stddev=0.0 +chr1 genetrack . 3785 3805 23 - . stddev=0.0 +chr1 genetrack . 3847 3867 316 - . stddev=0.0 +chr1 genetrack . 3868 3888 491 + . stddev=0.0 +chr1 genetrack . 4097 4117 536 - . stddev=0.0 +chr1 genetrack . 4326 4346 482 + . stddev=0.0 +chr1 genetrack . 4395 4415 3 + . stddev=0.0 +chr1 genetrack . 4461 4481 1110 + . stddev=0.0 +chr1 genetrack . 4500 4520 125 - . stddev=0.0 +chr1 genetrack . 4620 4640 147 + . stddev=0.0 +chr1 genetrack . 4826 4846 1761 + . stddev=4.82408982772 +chr1 genetrack . 4902 4922 710 + . stddev=0.0 +chr1 genetrack . 5110 5130 828 + . stddev=0.0 +chr1 genetrack . 5402 5422 282 - . stddev=0.0 +chr1 genetrack . 5501 5521 75 + . stddev=0.0 +chr1 genetrack . 5707 5727 2 + . stddev=0.0 +chr1 genetrack . 5717 5737 737 - . stddev=0.36608362591 +chr1 genetrack . 6086 6106 646 + . stddev=0.039314009595 +chr1 genetrack . 6098 6118 230 - . stddev=0.0657945476105 +chr1 genetrack . 6187 6207 329 - . stddev=0.0 +chr1 genetrack . 6290 6310 5 + . stddev=0.0 +chr1 genetrack . 6356 6376 285 + . stddev=0.0 +chr1 genetrack . 6380 6400 34 - . stddev=0.0 +chr1 genetrack . 6401 6421 1587 + . stddev=5.61831543503 +chr1 genetrack . 6415 6435 953 - . stddev=3.52372902021 +chr1 genetrack . 6432 6452 742 + . stddev=0.0 +chr1 genetrack . 6496 6516 691 + . stddev=0.0 +chr1 genetrack . 6506 6526 61 - . stddev=1.5137105198 +chr1 genetrack . 6843 6863 28 + . stddev=0.0 +chr1 genetrack . 7058 7078 518 - . stddev=0.0 +chr1 genetrack . 7124 7144 654 + . stddev=0.0 +chr1 genetrack . 7765 7785 714 + . stddev=0.0 +chr1 genetrack . 7847 7867 3 + . stddev=0.0 +chr1 genetrack . 8209 8229 17 + . stddev=0.0 +chr1 genetrack . 8272 8292 2 - . stddev=0.0 +chr1 genetrack . 8459 8479 10 + . stddev=0.0 +chr1 genetrack . 8471 8491 5 - . stddev=0.0 +chr1 genetrack . 8715 8735 5 + . stddev=0.0 +chr1 genetrack . 8834 8854 332 + . stddev=0.0 +chr1 genetrack . 8839 8859 593 - . stddev=0.0 +chr1 genetrack . 9034 9054 24 + . stddev=0.0 +chr1 genetrack . 9058 9078 4 + . stddev=0.0 +chr1 genetrack . 9485 9505 36 + . stddev=0.0 +chr1 genetrack . 9710 9730 480 + . stddev=0.0 +chr1 genetrack . 9923 9943 606 - . stddev=0.0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output.gff Tue Jan 19 09:34:56 2016 -0500 @@ -0,0 +1,100 @@ +chr1 genetrack . 14 40 918 + . stddev=5.96715849116 +chr1 genetrack . 28 54 245 - . stddev=2.66582799529 +chr1 genetrack . 37 63 2060 + . stddev=2.7859667372 +chr1 genetrack . 59 85 1300 - . stddev=4.13061337623 +chr1 genetrack . 70 96 397 + . stddev=0.0 +chr1 genetrack . 86 112 521 + . stddev=0.747112137937 +chr1 genetrack . 120 146 5129 + . stddev=3.01025384354 +chr1 genetrack . 122 148 4659 - . stddev=3.8642622228 +chr1 genetrack . 152 178 897 - . stddev=3.22709952671 +chr1 genetrack . 168 194 956 - . stddev=4.95899971687 +chr1 genetrack . 177 203 1527 + . stddev=4.62574275346 +chr1 genetrack . 182 208 494 - . stddev=1.4255957 +chr1 genetrack . 189 215 2538 + . stddev=5.04731591122 +chr1 genetrack . 203 229 2087 - . stddev=3.6160253713 +chr1 genetrack . 235 261 2496 + . stddev=2.11105291581 +chr1 genetrack . 239 265 5047 - . stddev=3.62629343395 +chr1 genetrack . 251 277 1525 + . stddev=4.46082441647 +chr1 genetrack . 278 304 15 + . stddev=1.74610678049 +chr1 genetrack . 299 325 626 - . stddev=0.0 +chr1 genetrack . 305 331 1544 + . stddev=4.43066151722 +chr1 genetrack . 331 357 533 + . stddev=1.34355443899 +chr1 genetrack . 341 367 726 - . stddev=1.36767079956 +chr1 genetrack . 344 370 286 + . stddev=0.0 +chr1 genetrack . 355 381 792 - . stddev=1.47737416556 +chr1 genetrack . 371 397 608 + . stddev=1.44652711793 +chr1 genetrack . 386 412 126 - . stddev=0.471404520791 +chr1 genetrack . 436 462 618 - . stddev=5.47536569145 +chr1 genetrack . 438 464 1393 + . stddev=4.75587332865 +chr1 genetrack . 458 484 754 - . stddev=3.28891288785 +chr1 genetrack . 480 506 58 + . stddev=0.0 +chr1 genetrack . 535 561 1015 - . stddev=0.0 +chr1 genetrack . 725 751 39 - . stddev=0.0 +chr1 genetrack . 754 780 23 + . stddev=0.0 +chr1 genetrack . 796 822 607 + . stddev=0.0 +chr1 genetrack . 841 867 665 + . stddev=0.0 +chr1 genetrack . 874 900 468 + . stddev=0.0 +chr1 genetrack . 900 926 107 - . stddev=0.0 +chr1 genetrack . 941 967 2 - . stddev=0.0 +chr1 genetrack . 1089 1115 740 + . stddev=0.0 +chr1 genetrack . 1124 1150 940 - . stddev=3.96036497305 +chr1 genetrack . 1180 1206 25 + . stddev=0.0 +chr1 genetrack . 1288 1314 454 - . stddev=0.0 +chr1 genetrack . 1326 1352 207 - . stddev=0.0 +chr1 genetrack . 1481 1507 584 + . stddev=0.0 +chr1 genetrack . 2072 2098 1181 + . stddev=0.0 +chr1 genetrack . 2099 2125 481 + . stddev=0.0455486534308 +chr1 genetrack . 2122 2148 199 - . stddev=0.0 +chr1 genetrack . 2449 2475 1246 + . stddev=0.0 +chr1 genetrack . 2599 2625 34 + . stddev=0.0 +chr1 genetrack . 2830 2856 1062 + . stddev=1.01561431542 +chr1 genetrack . 2835 2861 1144 - . stddev=1.09438744148 +chr1 genetrack . 3008 3034 1212 - . stddev=0.0 +chr1 genetrack . 3113 3139 555 - . stddev=0.0 +chr1 genetrack . 3127 3153 17 + . stddev=0.0 +chr1 genetrack . 3375 3401 525 - . stddev=0.0 +chr1 genetrack . 3666 3692 845 + . stddev=0.0 +chr1 genetrack . 3782 3808 23 - . stddev=0.0 +chr1 genetrack . 3844 3870 316 - . stddev=0.0 +chr1 genetrack . 3865 3891 491 + . stddev=0.0 +chr1 genetrack . 4094 4120 536 - . stddev=0.0 +chr1 genetrack . 4323 4349 482 + . stddev=0.0 +chr1 genetrack . 4392 4418 3 + . stddev=0.0 +chr1 genetrack . 4458 4484 1110 + . stddev=0.0 +chr1 genetrack . 4497 4523 125 - . stddev=0.0 +chr1 genetrack . 4617 4643 147 + . stddev=0.0 +chr1 genetrack . 4823 4849 1761 + . stddev=4.82408982772 +chr1 genetrack . 4899 4925 710 + . stddev=0.0 +chr1 genetrack . 5107 5133 828 + . stddev=0.0 +chr1 genetrack . 5399 5425 282 - . stddev=0.0 +chr1 genetrack . 5498 5524 75 + . stddev=0.0 +chr1 genetrack . 5704 5730 2 + . stddev=0.0 +chr1 genetrack . 5714 5740 737 - . stddev=0.36608362591 +chr1 genetrack . 6083 6109 646 + . stddev=0.039314009595 +chr1 genetrack . 6095 6121 230 - . stddev=0.0657945476105 +chr1 genetrack . 6184 6210 329 - . stddev=0.0 +chr1 genetrack . 6287 6313 5 + . stddev=0.0 +chr1 genetrack . 6353 6379 285 + . stddev=0.0 +chr1 genetrack . 6377 6403 34 - . stddev=0.0 +chr1 genetrack . 6398 6424 1587 + . stddev=5.61831543503 +chr1 genetrack . 6412 6438 953 - . stddev=3.52372902021 +chr1 genetrack . 6429 6455 742 + . stddev=0.0 +chr1 genetrack . 6493 6519 691 + . stddev=0.0 +chr1 genetrack . 6503 6529 61 - . stddev=1.5137105198 +chr1 genetrack . 6840 6866 28 + . stddev=0.0 +chr1 genetrack . 7055 7081 518 - . stddev=0.0 +chr1 genetrack . 7121 7147 654 + . stddev=0.0 +chr1 genetrack . 7762 7788 714 + . stddev=0.0 +chr1 genetrack . 7844 7870 3 + . stddev=0.0 +chr1 genetrack . 8206 8232 17 + . stddev=0.0 +chr1 genetrack . 8269 8295 2 - . stddev=0.0 +chr1 genetrack . 8456 8482 10 + . stddev=0.0 +chr1 genetrack . 8468 8494 5 - . stddev=0.0 +chr1 genetrack . 8712 8738 5 + . stddev=0.0 +chr1 genetrack . 8831 8857 332 + . stddev=0.0 +chr1 genetrack . 8836 8862 593 - . stddev=0.0 +chr1 genetrack . 9031 9057 24 + . stddev=0.0 +chr1 genetrack . 9055 9081 4 + . stddev=0.0 +chr1 genetrack . 9482 9508 36 + . stddev=0.0 +chr1 genetrack . 9707 9733 480 + . stddev=0.0 +chr1 genetrack . 9920 9946 606 - . stddev=0.0