Mercurial > repos > jay > gaiac_windrose_plot
comparison gaiac_data_averaging/gaiac_dataaveraging.py @ 0:1d05627d399f draft
planemo upload for repository https://github.com/jaidevjoshi83/gaiac.git commit c29a769ed165f313a6410925be24f776652a9663-dirty
| author | jay |
|---|---|
| date | Thu, 15 May 2025 14:48:15 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1d05627d399f |
|---|---|
| 1 import pandas as pd | |
| 2 import argparse | |
| 3 | |
| 4 #This tool would average your time series data based on the time intervals based on the time and date column | |
| 5 #python avg_timeseries.py -I data.tsv -C 1 -T 5 -O averaged_output.tsv | |
| 6 | |
| 7 | |
| 8 def main(): | |
| 9 parser = argparse.ArgumentParser(description="Average time series data over specified intervals.") | |
| 10 parser.add_argument("-I", "--infile", required=True, help="Input data file (TSV format)") | |
| 11 parser.add_argument("-C", "--dt_column", required=True, help="Column number (1-based) for the DateTime column") | |
| 12 parser.add_argument("-T", "--time_interval", required=True, help="Time interval in minutes, e.g., '5' or '30'") | |
| 13 parser.add_argument("-O", "--out_file", default='OutFile.tsv', help="Output file name (TSV format)") | |
| 14 parser.add_argument("-S", "--sep", default='\t', help="deliminator") | |
| 15 | |
| 16 args = parser.parse_args() | |
| 17 | |
| 18 # Load data | |
| 19 data = pd.read_csv(args.infile, sep=args.sep) | |
| 20 | |
| 21 # Extract the correct datetime column name | |
| 22 col_index = int(args.dt_column) - 1 # Convert 1-based index to 0-based | |
| 23 datetime_col = data.columns[col_index] | |
| 24 | |
| 25 # Set datetime index | |
| 26 data[datetime_col] = pd.to_datetime(data[datetime_col], errors='coerce') | |
| 27 data.set_index(datetime_col, inplace=True) | |
| 28 | |
| 29 # Group by time intervals and compute mean for numeric columns | |
| 30 df_avg = data.resample(f'{args.time_interval}Min').mean(numeric_only=True) | |
| 31 | |
| 32 # Round to 3 decimals and save to output file | |
| 33 df_avg.round(3).to_csv(args.out_file, sep='\t') | |
| 34 | |
| 35 print(f"Averaged data saved to {args.out_file}") | |
| 36 | |
| 37 if __name__ == "__main__": | |
| 38 main() |
