changeset 0:7f827a8e4ec5 draft

planemo upload for repository https://github.com/bardin-lab/damid_galaxy_tools commit c753dd4f3e1863aae7ba45dcc7efdf6937b03542-dirty
author mvdbeek
date Fri, 26 Oct 2018 11:58:06 -0400
parents
children f3ca59e53b73
files consecutive_peaks.py consecutive_peaks.xml test-data/deseq2_peaks.bed test-data/grouped.bed
diffstat 4 files changed, 110 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/consecutive_peaks.py	Fri Oct 26 11:58:06 2018 -0400
@@ -0,0 +1,34 @@
+import click
+import numpy as np
+import pandas as pd
+
+SHIFTED_PADJ_COLUMN = 'shifted'
+CONSECUTIVE_MAX = 'consecutive_max'
+PEAKS_PER_GROUP = 'peaks_per_group'
+
+
+@click.command()
+@click.argument('input_file', type=click.Path(exists=True))
+@click.argument('output_file', type=click.Path())
+@click.argument('padj_column', default=8)
+@click.argument('groupby_column', default=9)
+@click.argument('add_number_of_peaks', default=True)
+def determine_consecutive_peaks(input_file, output_file, padj_column, groupby_column, add_number_of_peaks):
+    """Finds the two lowest consecutives peaks for a group and reports"""
+    df = pd.read_csv(input_file, sep='\t', header=None)
+    grouped = df.groupby(groupby_column, sort=False)
+    if add_number_of_peaks:
+        df[PEAKS_PER_GROUP] = grouped[groupby_column].transform(np.size)
+    df[SHIFTED_PADJ_COLUMN] = grouped[8].shift()
+    df[CONSECUTIVE_MAX] = df[[padj_column, SHIFTED_PADJ_COLUMN]].max(axis=1)
+    grouped = df.groupby(groupby_column, sort=False)
+    idx = grouped[CONSECUTIVE_MAX].transform(min)  # index of groupwise consecutive minimum
+    new_df = df[df[CONSECUTIVE_MAX] == idx]
+    new_df.sort_values(by=CONSECUTIVE_MAX)
+    new_df[padj_column].replace(new_df[CONSECUTIVE_MAX])
+    new_df = new_df.drop(labels=[CONSECUTIVE_MAX, SHIFTED_PADJ_COLUMN], axis=1)
+    new_df.to_csv(output_file, sep='\t', header=None, na_rep="NaN")
+
+
+if __name__ == '__main__':
+    determine_consecutive_peaks()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/consecutive_peaks.xml	Fri Oct 26 11:58:06 2018 -0400
@@ -0,0 +1,27 @@
+<tool id="consecutive_peaks" name="Consecutive peaks" version="0.1.0">
+    <requirements>
+        <requirement type="package" version="7.0">click</requirement>
+        <requirement type="package" version="0.23.4">pandas</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+python '$__tool_directory__/consecutive_peaks.py' '$input_file' '$output_file'
+    ]]></command>
+    <inputs>
+       <param name="input_file" type="data" format="tabular" label="Input file" help="file containing peaks and genes"/>
+       <param name="padj_column" type="data_column" data_ref="input_file" value="c8" label="Column containing padj value"/>
+       <param name="groupby" type="data_column" data_ref="input_file" value="c9" label="Group values by this column" help="Usually gene id."/>
+    </inputs>
+    <outputs>
+        <data name="output_file" format_source="input_file"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="input_file" value="deseq2_peaks.bed"/>
+            <output name="output_file" value="grouped.bed"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+Can be used to get the lowest p.adj for 2 consecutive peaks in a gene.
+The reported p.adj is the higher p.adj of the 2 consecutive peaks.
+    ]]></help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/deseq2_peaks.bed	Fri Oct 26 11:58:06 2018 -0400
@@ -0,0 +1,40 @@
+2L	5151	5155	40.343042254382	0.364512442363274	0.631307618064081	0.577392751066524	0.56367416996817	0.705361606068607	.
+2L	6878	6882	12.092185769541	0.00257609419374086	0.826397671378337	0.00311725732412125	0.997512792537189	0.99873245096982	.
+2L	6920	6924	7.11337584147453	1.04514109300767	0.946014451365484	1.10478343274683	0.269253440927885	0.432484606530128	.
+2L	7691	7695	11.843536367488	0.151639310614245	0.802284946799774	0.189009293043721	0.850085533276262	0.911041669589917	gene_id "FBgn0031208"; gene_symbol "CG11023";
+2L	7714	7718	9.80005463135221	0.0255918574661465	0.866303804672169	0.0295414349193942	0.976432773062627	0.986972269575067	gene_id "FBgn0031208"; gene_symbol "CG11023";
+2L	12439	12443	395.521214697051	0.588699507050432	0.532443765923156	1.105655741935	0.268875553092083	0.432057816054657	gene_id "FBgn0002121"; gene_symbol "l(2)gl";
+2L	12691	12695	344.959086225371	0.583783806387309	0.531317629472573	1.09874729164703	0.271878308464162	0.435248226767611	gene_id "FBgn0002121"; gene_symbol "l(2)gl";
+2L	13064	13068	84.613630660452	0.217237382993247	0.510837471900007	0.425257337104216	0.670649090435978	0.787775400487521	gene_id "FBgn0002121"; gene_symbol "l(2)gl";
+2L	13291	13295	30.1123838825855	0.155591640049906	0.640283070812485	0.24300445715747	0.808001944619076	0.883541896449563	gene_id "FBgn0002121"; gene_symbol "l(2)gl";
+2L	13321	13325	904.966177257045	0.376461729884521	0.50000103395844	0.75292190278913	0.451496845464273	0.611195498344276	gene_id "FBgn0002121"; gene_symbol "l(2)gl";
+2L	13563	13567	866.603095816488	0.212809815158458	0.495309217028247	0.429650424103295	0.667449951314894	0.785492185048526	gene_id "FBgn0002121"; gene_symbol "l(2)gl";
+2L	13719	13723	113.070341442585	0.0255689119036721	0.508640103147958	0.0502691623122653	0.959907897598717	0.977709493692812	gene_id "FBgn0002121"; gene_symbol "l(2)gl";
+2L	14019	14023	327.160239956387	0.00817968469447774	0.493410798870461	0.0165778388174783	0.986773404191456	0.992823409766067	gene_id "FBgn0002121"; gene_symbol "l(2)gl";
+2L	14665	14669	109.110588808567	0.357622042095099	0.517327585875048	0.691287400593937	0.48938494804763	0.643965620035756	gene_id "FBgn0002121"; gene_symbol "l(2)gl";
+2L	14818	14822	109.589493018768	0.630819180292167	0.51523842372404	1.22432480041518	0.220829724870924	0.377953444427728	gene_id "FBgn0002121"; gene_symbol "l(2)gl";
+2L	14871	14875	15.7006038983715	0.472365868872293	0.749919525827981	0.629888744863333	0.528767377230365	0.677153275459159	gene_id "FBgn0002121"; gene_symbol "l(2)gl";
+2L	14972	14976	23.8476901187452	0.843585794286652	0.688647606374209	1.22498907493225	0.22057933683896	0.377689786125795	gene_id "FBgn0002121"; gene_symbol "l(2)gl";
+2L	15073	15077	1049.30525844711	0.9735164004558	0.481901880919704	2.02015480536796	0.0433673325255996	0.120295777588402	gene_id "FBgn0002121"; gene_symbol "l(2)gl";
+2L	15371	15375	1216.91488535489	0.863105078420247	0.47092399272887	1.83279062385163	0.0668337140515137	0.163575572623876	gene_id "FBgn0002121"; gene_symbol "l(2)gl";
+2L	15662	15666	728.708978107195	0.995657604251844	0.475779274418689	2.09268805470424	0.0363770102785434	0.106086100091789	gene_id "FBgn0002121"; gene_symbol "l(2)gl";
+2L	15837	15841	931.005092915203	1.39832486621617	0.473976801158749	2.95019685098012	0.00317571521468965	0.0178266247016122	gene_id "FBgn0002121"; gene_symbol "l(2)gl";
+2L	16091	16095	466.664835290671	1.68546447518356	0.476743856067749	3.53536695592787	0.00040720924302942	0.00388838218326285	gene_id "FBgn0002121"; gene_symbol "l(2)gl";
+2L	16173	16177	57.9667762555473	1.83956446735437	0.587556668205665	3.1308715684773	0.0017428836310417	0.0114466453224446	gene_id "FBgn0002121"; gene_symbol "l(2)gl";
+2L	16954	16958	104.269971829585	2.00754698293392	0.578809459512681	3.46840734880895	0.000523553024927028	0.00468870862799906	gene_id "FBgn0002121"; gene_symbol "l(2)gl";
+2L	18815	18819	22.0488120933426	1.87326623013989	0.777316254714993	2.40991516487292	0.0159562302940107	0.0585670598443906	gene_id "FBgn0002121"; gene_symbol "l(2)gl";
+2L	19786	19790	113.299800906455	2.88332490053871	0.588037152599408	4.90330396267145	9.42379868583578e-07	3.69117942073248e-05	gene_id "FBgn0002121"; gene_symbol "l(2)gl";
+2L	21641	21645	34.3500408874986	1.50597265027687	0.653596477890316	2.30413213843786	0.0212152271514627	0.0720282848419051	.
+2L	22094	22098	156.619353330383	1.30715333943466	0.521853623943724	2.50482755979792	0.0122511106265175	0.0482672435893478	gene_id "FBgn0031209"; gene_symbol "Ir21a";gene_id "FBgn0263584"; gene_symbol "CR43609";
+2L	22350	22354	119.545732721289	1.13036602772727	0.532845303338328	2.12137748169199	0.0338900487038572	0.100853283788594	gene_id "FBgn0031209"; gene_symbol "Ir21a";gene_id "FBgn0263584"; gene_symbol "CR43609";
+2L	22504	22508	85.4632791613666	0.342897535321554	0.527063176431041	0.650581468512851	0.515316696087814	0.665812942096135	gene_id "FBgn0031209"; gene_symbol "Ir21a";gene_id "FBgn0263584"; gene_symbol "CR43609";
+2L	23026	23030	57.9770314290585	0.141961019055869	0.534449867988492	0.265620832857845	0.790531239499375	0.871973857457818	gene_id "FBgn0031209"; gene_symbol "Ir21a";gene_id "FBgn0263584"; gene_symbol "CR43609";
+2L	24357	24361	56.1813761950221	0.415613328757581	0.595963429784736	0.697380590798502	0.48556464487102	0.640693696805243	gene_id "FBgn0031209"; gene_symbol "Ir21a";
+2L	25083	25087	47.4292294853296	0.713596808954638	0.648352348117706	1.10063117844232	0.27105721016165	0.434420734304972	gene_id "FBgn0031209"; gene_symbol "Ir21a";
+2L	43327	43331	5.84901314262136	0.679132204154339	0.997795654791302	0.680632553262006	0.496104023308378	0.649678753077093	gene_id "FBgn0051973"; gene_symbol "Cda5";
+2L	47323	47327	32.6029541213852	1.75539500166679	0.756690748055963	2.31983145846124	0.020349996576802	0.0698998692979901	gene_id "FBgn0051973"; gene_symbol "Cda5";
+2L	54049	54053	5.49243058011769	4.55190341736556	1.22468204021079	3.71680425441865	0.000201758637404118	0.00229429817130296	gene_id "FBgn0051973"; gene_symbol "Cda5";
+2L	55633	55637	7.66903089601476	0.101760387554123	0.930459196736503	0.109365771127888	0.912912381544608	0.949924793245379	gene_id "FBgn0051973"; gene_symbol "Cda5";gene_id "FBgn0267987"; gene_symbol "CR46254";
+2L	65315	65319	137.101857971652	1.23449113928199	0.524990063103154	2.35145620087561	0.0187000910325455	0.065785553432607	gene_id "FBgn0051973"; gene_symbol "Cda5";
+2L	65606	65610	125.605086834427	1.48437920210473	0.503182219131581	2.94998341687541	0.00317790986101209	0.017835456290875	.
+2L	65671	65675	49.7172333525855	1.78672093221744	0.65335924547836	2.73466847616013	0.00624431313027083	0.0293462152487687	.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/grouped.bed	Fri Oct 26 11:58:06 2018 -0400
@@ -0,0 +1,9 @@
+3	2L	7691	7695	11.843536367488001	0.151639310614245	0.802284946799774	0.18900929304372102	0.8500855332762621	0.9110416695899171	"gene_id ""FBgn0031208""; gene_symbol ""CG11023"";"	2
+22	2L	16173	16177	57.966776255547295	1.83956446735437	0.5875566682056651	3.1308715684773	0.0017428836310417	0.0114466453224446	"gene_id ""FBgn0002121""; gene_symbol ""l(2)gl"";"	21
+23	2L	16954	16958	104.269971829585	2.00754698293392	0.578809459512681	3.46840734880895	0.0005235530249270281	0.0046887086279990605	"gene_id ""FBgn0002121""; gene_symbol ""l(2)gl"";"	21
+27	2L	22094	22098	156.619353330383	1.30715333943466	0.521853623943724	2.50482755979792	0.0122511106265175	0.0482672435893478	"gene_id ""FBgn0031209""; gene_symbol ""Ir21a"";gene_id ""FBgn0263584""; gene_symbol ""CR43609"";"	4
+31	2L	24357	24361	56.181376195022104	0.41561332875758095	0.5959634297847359	0.6973805907985021	0.48556464487102	0.6406936968052429	"gene_id ""FBgn0031209""; gene_symbol ""Ir21a"";"	2
+32	2L	25083	25087	47.4292294853296	0.713596808954638	0.648352348117706	1.1006311784423202	0.27105721016165	0.4344207343049721	"gene_id ""FBgn0031209""; gene_symbol ""Ir21a"";"	2
+36	2L	55633	55637	7.6690308960147595	0.10176038755412299	0.9304591967365029	0.109365771127888	0.9129123815446079	0.949924793245379	"gene_id ""FBgn0051973""; gene_symbol ""Cda5"";gene_id ""FBgn0267987""; gene_symbol ""CR46254"";"	1
+37	2L	65315	65319	137.10185797165198	1.23449113928199	0.524990063103154	2.3514562008756097	0.0187000910325455	0.065785553432607	"gene_id ""FBgn0051973""; gene_symbol ""Cda5"";"	4
+39	2L	65671	65675	49.717233352585495	1.78672093221744	0.65335924547836	2.73466847616013	0.006244313130270829	0.0293462152487687	.	6