Mercurial > repos > mvdbeek > damid_deseq2_to_bedgraph
annotate damid_to_bedgraph.py @ 0:755cbe6825b5 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
author | mvdbeek |
---|---|
date | Fri, 14 Dec 2018 06:27:41 -0500 |
parents | |
children |
rev | line source |
---|---|
0
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
mvdbeek
parents:
diff
changeset
|
1 from collections import OrderedDict |
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
mvdbeek
parents:
diff
changeset
|
2 |
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
mvdbeek
parents:
diff
changeset
|
3 import click |
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
mvdbeek
parents:
diff
changeset
|
4 import numpy as np |
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
mvdbeek
parents:
diff
changeset
|
5 import pandas as pd |
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
mvdbeek
parents:
diff
changeset
|
6 import traces |
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
mvdbeek
parents:
diff
changeset
|
7 |
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
mvdbeek
parents:
diff
changeset
|
8 |
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
mvdbeek
parents:
diff
changeset
|
9 def order_index(df): |
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
mvdbeek
parents:
diff
changeset
|
10 """ |
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
mvdbeek
parents:
diff
changeset
|
11 Split chr_start_stop in df index and order by chrom and start. |
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
mvdbeek
parents:
diff
changeset
|
12 """ |
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
mvdbeek
parents:
diff
changeset
|
13 idx = df.index.str.split('_') |
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
mvdbeek
parents:
diff
changeset
|
14 idx = pd.DataFrame.from_records(list(idx)) |
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
mvdbeek
parents:
diff
changeset
|
15 |
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
mvdbeek
parents:
diff
changeset
|
16 idx.columns = ['chr', 'start', 'stop'] |
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
mvdbeek
parents:
diff
changeset
|
17 idx = idx.astype(dtype={"chr": "object", |
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
mvdbeek
parents:
diff
changeset
|
18 "start": "int32", |
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
mvdbeek
parents:
diff
changeset
|
19 "stop": "int32"}) |
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
mvdbeek
parents:
diff
changeset
|
20 coordinates = idx.sort_values(['chr', 'start']) |
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
mvdbeek
parents:
diff
changeset
|
21 df.index = np.arange(len(df.index)) |
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
mvdbeek
parents:
diff
changeset
|
22 df = df.loc[coordinates.index] |
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
mvdbeek
parents:
diff
changeset
|
23 df = coordinates.join(df) |
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
mvdbeek
parents:
diff
changeset
|
24 # index is center of GATC site |
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
mvdbeek
parents:
diff
changeset
|
25 df.index = df['start'] + 2 |
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
mvdbeek
parents:
diff
changeset
|
26 return df |
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
mvdbeek
parents:
diff
changeset
|
27 |
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
mvdbeek
parents:
diff
changeset
|
28 |
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
mvdbeek
parents:
diff
changeset
|
29 def interpolate_values(df, sampling_width=100): |
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
mvdbeek
parents:
diff
changeset
|
30 result = [] |
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
mvdbeek
parents:
diff
changeset
|
31 for chrom in df['chr'].unique(): |
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
mvdbeek
parents:
diff
changeset
|
32 chrom_df = df[df['chr'] == chrom] |
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
mvdbeek
parents:
diff
changeset
|
33 time_series = traces.TimeSeries(chrom_df['log2FC']) |
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
mvdbeek
parents:
diff
changeset
|
34 s = pd.DataFrame.from_records(time_series.sample(sampling_width, interpolate='linear')) |
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
mvdbeek
parents:
diff
changeset
|
35 # Calculate new start and end of interpolated region |
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
mvdbeek
parents:
diff
changeset
|
36 start = s[0] - int(sampling_width / 2) |
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
mvdbeek
parents:
diff
changeset
|
37 start.loc[start < 0] = 1 |
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
mvdbeek
parents:
diff
changeset
|
38 end = s[0] + int(sampling_width / 2) |
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
mvdbeek
parents:
diff
changeset
|
39 result.append(pd.DataFrame(OrderedDict([('chr', chrom), ('start', start), ('end', end), ('score', s[1])]))) |
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
mvdbeek
parents:
diff
changeset
|
40 return pd.concat(result) |
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
mvdbeek
parents:
diff
changeset
|
41 |
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
mvdbeek
parents:
diff
changeset
|
42 |
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
mvdbeek
parents:
diff
changeset
|
43 @click.command() |
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
mvdbeek
parents:
diff
changeset
|
44 @click.argument('input_path', type=click.Path(exists=True), required=True) |
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
mvdbeek
parents:
diff
changeset
|
45 @click.argument('output_path', type=click.Path(exists=False), required=True) |
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
mvdbeek
parents:
diff
changeset
|
46 @click.option('--resolution', help="Interpolate log2 fold change at this resolution (in basepairs)", default=50) |
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
mvdbeek
parents:
diff
changeset
|
47 def deseq2_to_bedgraph(input_path, output_path, resolution=50): |
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
mvdbeek
parents:
diff
changeset
|
48 """Convert deseq2 output on GATC fragments to bedgraph file with interpolated values.""" |
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
mvdbeek
parents:
diff
changeset
|
49 df = pd.read_csv(input_path, sep='\t', header=None, index_col=0, usecols=[0, 2], names=['GATC', 'log2FC']) |
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
mvdbeek
parents:
diff
changeset
|
50 df = df[~df.index.str.contains('\.')] |
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
mvdbeek
parents:
diff
changeset
|
51 df = order_index(df) |
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
mvdbeek
parents:
diff
changeset
|
52 r = interpolate_values(df, sampling_width=resolution) |
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
mvdbeek
parents:
diff
changeset
|
53 r.to_csv(output_path, sep='\t', header=None, index=None) |
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
mvdbeek
parents:
diff
changeset
|
54 |
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
mvdbeek
parents:
diff
changeset
|
55 |
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
mvdbeek
parents:
diff
changeset
|
56 if __name__ == '__main__': |
755cbe6825b5
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damid_deseq2_to_bedgraph commit 98722d2ca8205595f032361072aaab450e5f4f83
mvdbeek
parents:
diff
changeset
|
57 deseq2_to_bedgraph() |