Mercurial > repos > mvdbeek > damidseq_consecutive_peaks
annotate consecutive_peaks.py @ 0:7f827a8e4ec5 draft
planemo upload for repository https://github.com/bardin-lab/damid_galaxy_tools commit c753dd4f3e1863aae7ba45dcc7efdf6937b03542-dirty
author | mvdbeek |
---|---|
date | Fri, 26 Oct 2018 11:58:06 -0400 |
parents | |
children | f3ca59e53b73 |
rev | line source |
---|---|
0
7f827a8e4ec5
planemo upload for repository https://github.com/bardin-lab/damid_galaxy_tools commit c753dd4f3e1863aae7ba45dcc7efdf6937b03542-dirty
mvdbeek
parents:
diff
changeset
|
1 import click |
7f827a8e4ec5
planemo upload for repository https://github.com/bardin-lab/damid_galaxy_tools commit c753dd4f3e1863aae7ba45dcc7efdf6937b03542-dirty
mvdbeek
parents:
diff
changeset
|
2 import numpy as np |
7f827a8e4ec5
planemo upload for repository https://github.com/bardin-lab/damid_galaxy_tools commit c753dd4f3e1863aae7ba45dcc7efdf6937b03542-dirty
mvdbeek
parents:
diff
changeset
|
3 import pandas as pd |
7f827a8e4ec5
planemo upload for repository https://github.com/bardin-lab/damid_galaxy_tools commit c753dd4f3e1863aae7ba45dcc7efdf6937b03542-dirty
mvdbeek
parents:
diff
changeset
|
4 |
7f827a8e4ec5
planemo upload for repository https://github.com/bardin-lab/damid_galaxy_tools commit c753dd4f3e1863aae7ba45dcc7efdf6937b03542-dirty
mvdbeek
parents:
diff
changeset
|
5 SHIFTED_PADJ_COLUMN = 'shifted' |
7f827a8e4ec5
planemo upload for repository https://github.com/bardin-lab/damid_galaxy_tools commit c753dd4f3e1863aae7ba45dcc7efdf6937b03542-dirty
mvdbeek
parents:
diff
changeset
|
6 CONSECUTIVE_MAX = 'consecutive_max' |
7f827a8e4ec5
planemo upload for repository https://github.com/bardin-lab/damid_galaxy_tools commit c753dd4f3e1863aae7ba45dcc7efdf6937b03542-dirty
mvdbeek
parents:
diff
changeset
|
7 PEAKS_PER_GROUP = 'peaks_per_group' |
7f827a8e4ec5
planemo upload for repository https://github.com/bardin-lab/damid_galaxy_tools commit c753dd4f3e1863aae7ba45dcc7efdf6937b03542-dirty
mvdbeek
parents:
diff
changeset
|
8 |
7f827a8e4ec5
planemo upload for repository https://github.com/bardin-lab/damid_galaxy_tools commit c753dd4f3e1863aae7ba45dcc7efdf6937b03542-dirty
mvdbeek
parents:
diff
changeset
|
9 |
7f827a8e4ec5
planemo upload for repository https://github.com/bardin-lab/damid_galaxy_tools commit c753dd4f3e1863aae7ba45dcc7efdf6937b03542-dirty
mvdbeek
parents:
diff
changeset
|
10 @click.command() |
7f827a8e4ec5
planemo upload for repository https://github.com/bardin-lab/damid_galaxy_tools commit c753dd4f3e1863aae7ba45dcc7efdf6937b03542-dirty
mvdbeek
parents:
diff
changeset
|
11 @click.argument('input_file', type=click.Path(exists=True)) |
7f827a8e4ec5
planemo upload for repository https://github.com/bardin-lab/damid_galaxy_tools commit c753dd4f3e1863aae7ba45dcc7efdf6937b03542-dirty
mvdbeek
parents:
diff
changeset
|
12 @click.argument('output_file', type=click.Path()) |
7f827a8e4ec5
planemo upload for repository https://github.com/bardin-lab/damid_galaxy_tools commit c753dd4f3e1863aae7ba45dcc7efdf6937b03542-dirty
mvdbeek
parents:
diff
changeset
|
13 @click.argument('padj_column', default=8) |
7f827a8e4ec5
planemo upload for repository https://github.com/bardin-lab/damid_galaxy_tools commit c753dd4f3e1863aae7ba45dcc7efdf6937b03542-dirty
mvdbeek
parents:
diff
changeset
|
14 @click.argument('groupby_column', default=9) |
7f827a8e4ec5
planemo upload for repository https://github.com/bardin-lab/damid_galaxy_tools commit c753dd4f3e1863aae7ba45dcc7efdf6937b03542-dirty
mvdbeek
parents:
diff
changeset
|
15 @click.argument('add_number_of_peaks', default=True) |
7f827a8e4ec5
planemo upload for repository https://github.com/bardin-lab/damid_galaxy_tools commit c753dd4f3e1863aae7ba45dcc7efdf6937b03542-dirty
mvdbeek
parents:
diff
changeset
|
16 def determine_consecutive_peaks(input_file, output_file, padj_column, groupby_column, add_number_of_peaks): |
7f827a8e4ec5
planemo upload for repository https://github.com/bardin-lab/damid_galaxy_tools commit c753dd4f3e1863aae7ba45dcc7efdf6937b03542-dirty
mvdbeek
parents:
diff
changeset
|
17 """Finds the two lowest consecutives peaks for a group and reports""" |
7f827a8e4ec5
planemo upload for repository https://github.com/bardin-lab/damid_galaxy_tools commit c753dd4f3e1863aae7ba45dcc7efdf6937b03542-dirty
mvdbeek
parents:
diff
changeset
|
18 df = pd.read_csv(input_file, sep='\t', header=None) |
7f827a8e4ec5
planemo upload for repository https://github.com/bardin-lab/damid_galaxy_tools commit c753dd4f3e1863aae7ba45dcc7efdf6937b03542-dirty
mvdbeek
parents:
diff
changeset
|
19 grouped = df.groupby(groupby_column, sort=False) |
7f827a8e4ec5
planemo upload for repository https://github.com/bardin-lab/damid_galaxy_tools commit c753dd4f3e1863aae7ba45dcc7efdf6937b03542-dirty
mvdbeek
parents:
diff
changeset
|
20 if add_number_of_peaks: |
7f827a8e4ec5
planemo upload for repository https://github.com/bardin-lab/damid_galaxy_tools commit c753dd4f3e1863aae7ba45dcc7efdf6937b03542-dirty
mvdbeek
parents:
diff
changeset
|
21 df[PEAKS_PER_GROUP] = grouped[groupby_column].transform(np.size) |
7f827a8e4ec5
planemo upload for repository https://github.com/bardin-lab/damid_galaxy_tools commit c753dd4f3e1863aae7ba45dcc7efdf6937b03542-dirty
mvdbeek
parents:
diff
changeset
|
22 df[SHIFTED_PADJ_COLUMN] = grouped[8].shift() |
7f827a8e4ec5
planemo upload for repository https://github.com/bardin-lab/damid_galaxy_tools commit c753dd4f3e1863aae7ba45dcc7efdf6937b03542-dirty
mvdbeek
parents:
diff
changeset
|
23 df[CONSECUTIVE_MAX] = df[[padj_column, SHIFTED_PADJ_COLUMN]].max(axis=1) |
7f827a8e4ec5
planemo upload for repository https://github.com/bardin-lab/damid_galaxy_tools commit c753dd4f3e1863aae7ba45dcc7efdf6937b03542-dirty
mvdbeek
parents:
diff
changeset
|
24 grouped = df.groupby(groupby_column, sort=False) |
7f827a8e4ec5
planemo upload for repository https://github.com/bardin-lab/damid_galaxy_tools commit c753dd4f3e1863aae7ba45dcc7efdf6937b03542-dirty
mvdbeek
parents:
diff
changeset
|
25 idx = grouped[CONSECUTIVE_MAX].transform(min) # index of groupwise consecutive minimum |
7f827a8e4ec5
planemo upload for repository https://github.com/bardin-lab/damid_galaxy_tools commit c753dd4f3e1863aae7ba45dcc7efdf6937b03542-dirty
mvdbeek
parents:
diff
changeset
|
26 new_df = df[df[CONSECUTIVE_MAX] == idx] |
7f827a8e4ec5
planemo upload for repository https://github.com/bardin-lab/damid_galaxy_tools commit c753dd4f3e1863aae7ba45dcc7efdf6937b03542-dirty
mvdbeek
parents:
diff
changeset
|
27 new_df.sort_values(by=CONSECUTIVE_MAX) |
7f827a8e4ec5
planemo upload for repository https://github.com/bardin-lab/damid_galaxy_tools commit c753dd4f3e1863aae7ba45dcc7efdf6937b03542-dirty
mvdbeek
parents:
diff
changeset
|
28 new_df[padj_column].replace(new_df[CONSECUTIVE_MAX]) |
7f827a8e4ec5
planemo upload for repository https://github.com/bardin-lab/damid_galaxy_tools commit c753dd4f3e1863aae7ba45dcc7efdf6937b03542-dirty
mvdbeek
parents:
diff
changeset
|
29 new_df = new_df.drop(labels=[CONSECUTIVE_MAX, SHIFTED_PADJ_COLUMN], axis=1) |
7f827a8e4ec5
planemo upload for repository https://github.com/bardin-lab/damid_galaxy_tools commit c753dd4f3e1863aae7ba45dcc7efdf6937b03542-dirty
mvdbeek
parents:
diff
changeset
|
30 new_df.to_csv(output_file, sep='\t', header=None, na_rep="NaN") |
7f827a8e4ec5
planemo upload for repository https://github.com/bardin-lab/damid_galaxy_tools commit c753dd4f3e1863aae7ba45dcc7efdf6937b03542-dirty
mvdbeek
parents:
diff
changeset
|
31 |
7f827a8e4ec5
planemo upload for repository https://github.com/bardin-lab/damid_galaxy_tools commit c753dd4f3e1863aae7ba45dcc7efdf6937b03542-dirty
mvdbeek
parents:
diff
changeset
|
32 |
7f827a8e4ec5
planemo upload for repository https://github.com/bardin-lab/damid_galaxy_tools commit c753dd4f3e1863aae7ba45dcc7efdf6937b03542-dirty
mvdbeek
parents:
diff
changeset
|
33 if __name__ == '__main__': |
7f827a8e4ec5
planemo upload for repository https://github.com/bardin-lab/damid_galaxy_tools commit c753dd4f3e1863aae7ba45dcc7efdf6937b03542-dirty
mvdbeek
parents:
diff
changeset
|
34 determine_consecutive_peaks() |