changeset 0:08b6255afde7 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/resize_coordinate_window commit b'67cff25a50ba173b0468819204d0999496f68ea9'
author iuc
date Tue, 19 Jan 2016 09:34:56 -0500
parents
children 0164d2edba9f
files resize_coordinate_window.py resize_coordinate_window.xml test-data/input.gff test-data/output.gff
diffstat 4 files changed, 308 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/resize_coordinate_window.py	Tue Jan 19 09:34:56 2016 -0500
@@ -0,0 +1,41 @@
+import argparse
+import sys
+
+
+def stop_err( msg ):
+    sys.stderr.write( msg )
+    sys.exit(1)
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--input', dest='input', help="Input dataset")
+parser.add_argument('--subtract_from_start', dest='subtract_from_start', type=int, help='Distance to subtract from start.')
+parser.add_argument('--add_to_end', dest='add_to_end', type=int, help='Distance to add to end.')
+parser.add_argument('--extend_existing', dest='extend_existing', help='Extend existing start/end rather or from computed midpoint.')
+parser.add_argument('--output', dest='output', help="Output dataset")
+args = parser.parse_args()
+
+extend_existing = args.extend_existing == 'existing'
+out = open(args.output, 'wb')
+
+for line in open(args.input):
+    if line.startswith('#'):
+        continue
+    items = line.split('\t')
+    if len(items) != 9:
+        continue
+    start = int(items[3])
+    end = int(items[4])
+    if extend_existing:
+        start -= args.subtract_from_start
+        end += args.add_to_end
+    else:
+        midpoint = (start + end) // 2
+        start = midpoint - args.subtract_from_start
+        end = midpoint + args.add_to_end
+    if start < 1:
+        out.close()
+        stop_err('Requested expansion places region beyond chromosome bounds.')
+    new_line = '\t'.join([items[0], items[1], items[2], str(start), str(end), items[5], items[6], items[7], items[8]])
+    out.write(new_line)
+out.close()
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/resize_coordinate_window.xml	Tue Jan 19 09:34:56 2016 -0500
@@ -0,0 +1,67 @@
+<tool id="resize_coordinate_window" name="Resize coordinate window" version="1.0.0">
+    <description>of GFF data</description>
+    <command>
+        python $__tool_directory__/resize_coordinate_window.py
+        --input "$input"
+        --subtract_from_start $subtract_from_start
+        --add_to_end $add_to_end
+        --extend_existing $extend_existing
+        --output "$output"
+    </command>
+    <inputs>
+        <param name="input" type="data" format="gff" label="Gff file" />
+        <param name="subtract_from_start" type="integer" value="30" min="0" label="Distance to subtract from the start coordinate"/>
+        <param name="add_to_end" type="integer" value="30" min="0" label="Distance to add to the end coordinate"/>
+        <param name="extend_existing" type="select" label="Resize window from" help="The midpoint is computed as (start + end) // 2">
+            <option value="midpoint" selected="True">the midpoint of the start and end coordinates</option>
+            <option value="existing">the start and end coordinates</option>
+        </param>
+    </inputs>
+    <outputs>
+        <data name="output" format="gff" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="input" value="input.gff" ftype="gff" />
+            <param name="subtract_from_start" value="13" />
+            <param name="add_to_end" value="13" />
+            <param name="extend_existing" value="midpoint" />
+            <output name="output" file="output.gff" ftype="gff" />
+        </test>
+    </tests>
+    <help>
+
+**What it does**
+
+Modifies the start and end coordinates of GFF data such that the new start and end position is based on a
+specified window size that is computed either from the existing start and end coordinates or centered on
+the midpoint between them.
+
+-----
+
+**Example**
+
+If the input dataset is::
+
+    chr1    genetrack       .       17      37      918     +       .       stddev=5.96715849116
+    chr1    genetrack       .       31      51      245     -       .       stddev=2.66582799529
+    chr1    genetrack       .       40      60      2060    +       .       stddev=2.7859667372
+
+Resizing the coordinate window by 13 from the computed midpoint of the start and end coordinates produces::
+
+    chr1    genetrack       .       14      40      918     +       .       stddev=5.96715849116
+    chr1    genetrack       .       28      54      245     -       .       stddev=2.66582799529`
+    chr1    genetrack       .       37      63      2060    +       .       stddev=2.7859667372
+
+    </help>
+    <citations>
+        <citation type="bibtex">
+            @unpublished{None,
+            author = {},
+            title = {None},
+            year = {None},
+            eprint = {None},
+            url = {http://www.huck.psu.edu/content/research/independent-centers-excellence/center-for-eukaryotic-gene-regulation}
+        }</citation>
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input.gff	Tue Jan 19 09:34:56 2016 -0500
@@ -0,0 +1,100 @@
+chr1	genetrack	.	17	37	918	+	.	stddev=5.96715849116
+chr1	genetrack	.	31	51	245	-	.	stddev=2.66582799529
+chr1	genetrack	.	40	60	2060	+	.	stddev=2.7859667372
+chr1	genetrack	.	62	82	1300	-	.	stddev=4.13061337623
+chr1	genetrack	.	73	93	397	+	.	stddev=0.0
+chr1	genetrack	.	89	109	521	+	.	stddev=0.747112137937
+chr1	genetrack	.	123	143	5129	+	.	stddev=3.01025384354
+chr1	genetrack	.	125	145	4659	-	.	stddev=3.8642622228
+chr1	genetrack	.	155	175	897	-	.	stddev=3.22709952671
+chr1	genetrack	.	171	191	956	-	.	stddev=4.95899971687
+chr1	genetrack	.	180	200	1527	+	.	stddev=4.62574275346
+chr1	genetrack	.	185	205	494	-	.	stddev=1.4255957
+chr1	genetrack	.	192	212	2538	+	.	stddev=5.04731591122
+chr1	genetrack	.	206	226	2087	-	.	stddev=3.6160253713
+chr1	genetrack	.	238	258	2496	+	.	stddev=2.11105291581
+chr1	genetrack	.	242	262	5047	-	.	stddev=3.62629343395
+chr1	genetrack	.	254	274	1525	+	.	stddev=4.46082441647
+chr1	genetrack	.	281	301	15	+	.	stddev=1.74610678049
+chr1	genetrack	.	302	322	626	-	.	stddev=0.0
+chr1	genetrack	.	308	328	1544	+	.	stddev=4.43066151722
+chr1	genetrack	.	334	354	533	+	.	stddev=1.34355443899
+chr1	genetrack	.	344	364	726	-	.	stddev=1.36767079956
+chr1	genetrack	.	347	367	286	+	.	stddev=0.0
+chr1	genetrack	.	358	378	792	-	.	stddev=1.47737416556
+chr1	genetrack	.	374	394	608	+	.	stddev=1.44652711793
+chr1	genetrack	.	389	409	126	-	.	stddev=0.471404520791
+chr1	genetrack	.	439	459	618	-	.	stddev=5.47536569145
+chr1	genetrack	.	441	461	1393	+	.	stddev=4.75587332865
+chr1	genetrack	.	461	481	754	-	.	stddev=3.28891288785
+chr1	genetrack	.	483	503	58	+	.	stddev=0.0
+chr1	genetrack	.	538	558	1015	-	.	stddev=0.0
+chr1	genetrack	.	728	748	39	-	.	stddev=0.0
+chr1	genetrack	.	757	777	23	+	.	stddev=0.0
+chr1	genetrack	.	799	819	607	+	.	stddev=0.0
+chr1	genetrack	.	844	864	665	+	.	stddev=0.0
+chr1	genetrack	.	877	897	468	+	.	stddev=0.0
+chr1	genetrack	.	903	923	107	-	.	stddev=0.0
+chr1	genetrack	.	944	964	2	-	.	stddev=0.0
+chr1	genetrack	.	1092	1112	740	+	.	stddev=0.0
+chr1	genetrack	.	1127	1147	940	-	.	stddev=3.96036497305
+chr1	genetrack	.	1183	1203	25	+	.	stddev=0.0
+chr1	genetrack	.	1291	1311	454	-	.	stddev=0.0
+chr1	genetrack	.	1329	1349	207	-	.	stddev=0.0
+chr1	genetrack	.	1484	1504	584	+	.	stddev=0.0
+chr1	genetrack	.	2075	2095	1181	+	.	stddev=0.0
+chr1	genetrack	.	2102	2122	481	+	.	stddev=0.0455486534308
+chr1	genetrack	.	2125	2145	199	-	.	stddev=0.0
+chr1	genetrack	.	2452	2472	1246	+	.	stddev=0.0
+chr1	genetrack	.	2602	2622	34	+	.	stddev=0.0
+chr1	genetrack	.	2833	2853	1062	+	.	stddev=1.01561431542
+chr1	genetrack	.	2838	2858	1144	-	.	stddev=1.09438744148
+chr1	genetrack	.	3011	3031	1212	-	.	stddev=0.0
+chr1	genetrack	.	3116	3136	555	-	.	stddev=0.0
+chr1	genetrack	.	3130	3150	17	+	.	stddev=0.0
+chr1	genetrack	.	3378	3398	525	-	.	stddev=0.0
+chr1	genetrack	.	3669	3689	845	+	.	stddev=0.0
+chr1	genetrack	.	3785	3805	23	-	.	stddev=0.0
+chr1	genetrack	.	3847	3867	316	-	.	stddev=0.0
+chr1	genetrack	.	3868	3888	491	+	.	stddev=0.0
+chr1	genetrack	.	4097	4117	536	-	.	stddev=0.0
+chr1	genetrack	.	4326	4346	482	+	.	stddev=0.0
+chr1	genetrack	.	4395	4415	3	+	.	stddev=0.0
+chr1	genetrack	.	4461	4481	1110	+	.	stddev=0.0
+chr1	genetrack	.	4500	4520	125	-	.	stddev=0.0
+chr1	genetrack	.	4620	4640	147	+	.	stddev=0.0
+chr1	genetrack	.	4826	4846	1761	+	.	stddev=4.82408982772
+chr1	genetrack	.	4902	4922	710	+	.	stddev=0.0
+chr1	genetrack	.	5110	5130	828	+	.	stddev=0.0
+chr1	genetrack	.	5402	5422	282	-	.	stddev=0.0
+chr1	genetrack	.	5501	5521	75	+	.	stddev=0.0
+chr1	genetrack	.	5707	5727	2	+	.	stddev=0.0
+chr1	genetrack	.	5717	5737	737	-	.	stddev=0.36608362591
+chr1	genetrack	.	6086	6106	646	+	.	stddev=0.039314009595
+chr1	genetrack	.	6098	6118	230	-	.	stddev=0.0657945476105
+chr1	genetrack	.	6187	6207	329	-	.	stddev=0.0
+chr1	genetrack	.	6290	6310	5	+	.	stddev=0.0
+chr1	genetrack	.	6356	6376	285	+	.	stddev=0.0
+chr1	genetrack	.	6380	6400	34	-	.	stddev=0.0
+chr1	genetrack	.	6401	6421	1587	+	.	stddev=5.61831543503
+chr1	genetrack	.	6415	6435	953	-	.	stddev=3.52372902021
+chr1	genetrack	.	6432	6452	742	+	.	stddev=0.0
+chr1	genetrack	.	6496	6516	691	+	.	stddev=0.0
+chr1	genetrack	.	6506	6526	61	-	.	stddev=1.5137105198
+chr1	genetrack	.	6843	6863	28	+	.	stddev=0.0
+chr1	genetrack	.	7058	7078	518	-	.	stddev=0.0
+chr1	genetrack	.	7124	7144	654	+	.	stddev=0.0
+chr1	genetrack	.	7765	7785	714	+	.	stddev=0.0
+chr1	genetrack	.	7847	7867	3	+	.	stddev=0.0
+chr1	genetrack	.	8209	8229	17	+	.	stddev=0.0
+chr1	genetrack	.	8272	8292	2	-	.	stddev=0.0
+chr1	genetrack	.	8459	8479	10	+	.	stddev=0.0
+chr1	genetrack	.	8471	8491	5	-	.	stddev=0.0
+chr1	genetrack	.	8715	8735	5	+	.	stddev=0.0
+chr1	genetrack	.	8834	8854	332	+	.	stddev=0.0
+chr1	genetrack	.	8839	8859	593	-	.	stddev=0.0
+chr1	genetrack	.	9034	9054	24	+	.	stddev=0.0
+chr1	genetrack	.	9058	9078	4	+	.	stddev=0.0
+chr1	genetrack	.	9485	9505	36	+	.	stddev=0.0
+chr1	genetrack	.	9710	9730	480	+	.	stddev=0.0
+chr1	genetrack	.	9923	9943	606	-	.	stddev=0.0
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output.gff	Tue Jan 19 09:34:56 2016 -0500
@@ -0,0 +1,100 @@
+chr1	genetrack	.	14	40	918	+	.	stddev=5.96715849116
+chr1	genetrack	.	28	54	245	-	.	stddev=2.66582799529
+chr1	genetrack	.	37	63	2060	+	.	stddev=2.7859667372
+chr1	genetrack	.	59	85	1300	-	.	stddev=4.13061337623
+chr1	genetrack	.	70	96	397	+	.	stddev=0.0
+chr1	genetrack	.	86	112	521	+	.	stddev=0.747112137937
+chr1	genetrack	.	120	146	5129	+	.	stddev=3.01025384354
+chr1	genetrack	.	122	148	4659	-	.	stddev=3.8642622228
+chr1	genetrack	.	152	178	897	-	.	stddev=3.22709952671
+chr1	genetrack	.	168	194	956	-	.	stddev=4.95899971687
+chr1	genetrack	.	177	203	1527	+	.	stddev=4.62574275346
+chr1	genetrack	.	182	208	494	-	.	stddev=1.4255957
+chr1	genetrack	.	189	215	2538	+	.	stddev=5.04731591122
+chr1	genetrack	.	203	229	2087	-	.	stddev=3.6160253713
+chr1	genetrack	.	235	261	2496	+	.	stddev=2.11105291581
+chr1	genetrack	.	239	265	5047	-	.	stddev=3.62629343395
+chr1	genetrack	.	251	277	1525	+	.	stddev=4.46082441647
+chr1	genetrack	.	278	304	15	+	.	stddev=1.74610678049
+chr1	genetrack	.	299	325	626	-	.	stddev=0.0
+chr1	genetrack	.	305	331	1544	+	.	stddev=4.43066151722
+chr1	genetrack	.	331	357	533	+	.	stddev=1.34355443899
+chr1	genetrack	.	341	367	726	-	.	stddev=1.36767079956
+chr1	genetrack	.	344	370	286	+	.	stddev=0.0
+chr1	genetrack	.	355	381	792	-	.	stddev=1.47737416556
+chr1	genetrack	.	371	397	608	+	.	stddev=1.44652711793
+chr1	genetrack	.	386	412	126	-	.	stddev=0.471404520791
+chr1	genetrack	.	436	462	618	-	.	stddev=5.47536569145
+chr1	genetrack	.	438	464	1393	+	.	stddev=4.75587332865
+chr1	genetrack	.	458	484	754	-	.	stddev=3.28891288785
+chr1	genetrack	.	480	506	58	+	.	stddev=0.0
+chr1	genetrack	.	535	561	1015	-	.	stddev=0.0
+chr1	genetrack	.	725	751	39	-	.	stddev=0.0
+chr1	genetrack	.	754	780	23	+	.	stddev=0.0
+chr1	genetrack	.	796	822	607	+	.	stddev=0.0
+chr1	genetrack	.	841	867	665	+	.	stddev=0.0
+chr1	genetrack	.	874	900	468	+	.	stddev=0.0
+chr1	genetrack	.	900	926	107	-	.	stddev=0.0
+chr1	genetrack	.	941	967	2	-	.	stddev=0.0
+chr1	genetrack	.	1089	1115	740	+	.	stddev=0.0
+chr1	genetrack	.	1124	1150	940	-	.	stddev=3.96036497305
+chr1	genetrack	.	1180	1206	25	+	.	stddev=0.0
+chr1	genetrack	.	1288	1314	454	-	.	stddev=0.0
+chr1	genetrack	.	1326	1352	207	-	.	stddev=0.0
+chr1	genetrack	.	1481	1507	584	+	.	stddev=0.0
+chr1	genetrack	.	2072	2098	1181	+	.	stddev=0.0
+chr1	genetrack	.	2099	2125	481	+	.	stddev=0.0455486534308
+chr1	genetrack	.	2122	2148	199	-	.	stddev=0.0
+chr1	genetrack	.	2449	2475	1246	+	.	stddev=0.0
+chr1	genetrack	.	2599	2625	34	+	.	stddev=0.0
+chr1	genetrack	.	2830	2856	1062	+	.	stddev=1.01561431542
+chr1	genetrack	.	2835	2861	1144	-	.	stddev=1.09438744148
+chr1	genetrack	.	3008	3034	1212	-	.	stddev=0.0
+chr1	genetrack	.	3113	3139	555	-	.	stddev=0.0
+chr1	genetrack	.	3127	3153	17	+	.	stddev=0.0
+chr1	genetrack	.	3375	3401	525	-	.	stddev=0.0
+chr1	genetrack	.	3666	3692	845	+	.	stddev=0.0
+chr1	genetrack	.	3782	3808	23	-	.	stddev=0.0
+chr1	genetrack	.	3844	3870	316	-	.	stddev=0.0
+chr1	genetrack	.	3865	3891	491	+	.	stddev=0.0
+chr1	genetrack	.	4094	4120	536	-	.	stddev=0.0
+chr1	genetrack	.	4323	4349	482	+	.	stddev=0.0
+chr1	genetrack	.	4392	4418	3	+	.	stddev=0.0
+chr1	genetrack	.	4458	4484	1110	+	.	stddev=0.0
+chr1	genetrack	.	4497	4523	125	-	.	stddev=0.0
+chr1	genetrack	.	4617	4643	147	+	.	stddev=0.0
+chr1	genetrack	.	4823	4849	1761	+	.	stddev=4.82408982772
+chr1	genetrack	.	4899	4925	710	+	.	stddev=0.0
+chr1	genetrack	.	5107	5133	828	+	.	stddev=0.0
+chr1	genetrack	.	5399	5425	282	-	.	stddev=0.0
+chr1	genetrack	.	5498	5524	75	+	.	stddev=0.0
+chr1	genetrack	.	5704	5730	2	+	.	stddev=0.0
+chr1	genetrack	.	5714	5740	737	-	.	stddev=0.36608362591
+chr1	genetrack	.	6083	6109	646	+	.	stddev=0.039314009595
+chr1	genetrack	.	6095	6121	230	-	.	stddev=0.0657945476105
+chr1	genetrack	.	6184	6210	329	-	.	stddev=0.0
+chr1	genetrack	.	6287	6313	5	+	.	stddev=0.0
+chr1	genetrack	.	6353	6379	285	+	.	stddev=0.0
+chr1	genetrack	.	6377	6403	34	-	.	stddev=0.0
+chr1	genetrack	.	6398	6424	1587	+	.	stddev=5.61831543503
+chr1	genetrack	.	6412	6438	953	-	.	stddev=3.52372902021
+chr1	genetrack	.	6429	6455	742	+	.	stddev=0.0
+chr1	genetrack	.	6493	6519	691	+	.	stddev=0.0
+chr1	genetrack	.	6503	6529	61	-	.	stddev=1.5137105198
+chr1	genetrack	.	6840	6866	28	+	.	stddev=0.0
+chr1	genetrack	.	7055	7081	518	-	.	stddev=0.0
+chr1	genetrack	.	7121	7147	654	+	.	stddev=0.0
+chr1	genetrack	.	7762	7788	714	+	.	stddev=0.0
+chr1	genetrack	.	7844	7870	3	+	.	stddev=0.0
+chr1	genetrack	.	8206	8232	17	+	.	stddev=0.0
+chr1	genetrack	.	8269	8295	2	-	.	stddev=0.0
+chr1	genetrack	.	8456	8482	10	+	.	stddev=0.0
+chr1	genetrack	.	8468	8494	5	-	.	stddev=0.0
+chr1	genetrack	.	8712	8738	5	+	.	stddev=0.0
+chr1	genetrack	.	8831	8857	332	+	.	stddev=0.0
+chr1	genetrack	.	8836	8862	593	-	.	stddev=0.0
+chr1	genetrack	.	9031	9057	24	+	.	stddev=0.0
+chr1	genetrack	.	9055	9081	4	+	.	stddev=0.0
+chr1	genetrack	.	9482	9508	36	+	.	stddev=0.0
+chr1	genetrack	.	9707	9733	480	+	.	stddev=0.0
+chr1	genetrack	.	9920	9946	606	-	.	stddev=0.0