Mercurial > repos > jay > gaiac_windrose_plot
changeset 6:eac57ce5ca9c draft default tip
planemo upload for repository https://github.com/jaidevjoshi83/gaiac commit e62320eeba2e8bae9b1965507caa8faf027f6eaf-dirty
| author | jay |
|---|---|
| date | Wed, 28 Jan 2026 14:47:37 +0000 |
| parents | 23562132641e |
| children | |
| files | gaiac_time_sync/gaiac_time_sync.py gaiac_time_sync/test-data/out.tsv gaiac_time_sync/test_output.csv gaiac_time_sync/test_output_3files.csv |
| diffstat | 4 files changed, 7 insertions(+), 68 deletions(-) [+] |
line wrap: on
line diff
--- a/gaiac_time_sync/gaiac_time_sync.py Wed Jan 28 14:31:37 2026 +0000 +++ b/gaiac_time_sync/gaiac_time_sync.py Wed Jan 28 14:47:37 2026 +0000 @@ -33,8 +33,7 @@ if use_index: # Parse dates using index df = pd.read_csv(file, sep=sep, parse_dates=[col_idx]) - # Normalize the column name to specific identifier for merging - # This handles cases where different files describe the date column with different headers + original_col_name = df.columns[col_idx] merge_col = "__common_timestamp__" df.rename(columns={original_col_name: merge_col}, inplace=True) @@ -48,76 +47,32 @@ print("No valid dataframes loaded.") return - # Get common timestamps by successive inner merges common_times = dfs[0][[merge_col]] for df in dfs[1:]: common_times = common_times.merge(df[[merge_col]], on=merge_col, how='inner') - # Now filter each dataframe to contain only common timestamps aligned_dfs = [ df[df[merge_col].isin(common_times[merge_col])].reset_index(drop=True) for df in dfs ] - # After filtering, if we used a placeholder name, we might want to restore original names? - # Or keep it universal. - # The requirement says "returns files with time synchronized data". - # If output_mode='multiple', we dump them back. - # If we renamed the timestamp column to '__common_timestamp__', it will appear as such in output. - # User might prefer the original name. - # But if input files had DIFFERENT names for that column, which one should we use? - # Let's simple restore it to "date_time" (user input) or something generic if it was index. - # Actually, simplest is to rename it back to "Date_Time" or similar if we changed it. if use_index and output_mode == 'multiple': for df in aligned_dfs: df.rename(columns={merge_col: "Date_Time"}, inplace=True) - # Update merge_col to new name so single mode merging works if triggered + merge_col = "Date_Time" # Output files - if output_mode == 'multiple': - for i, (file, df) in enumerate(zip(file_list, aligned_dfs)): - # Clean filename logic (handle paths) - filename = os.path.splitext(os.path.basename(file.strip()))[0] - # output is just a prefix-ish or single file arg? - # In XML, -o $out. But $out is a single file path in Galaxy typically unless discover_datasets used. - # Wait, XML output is: <data name='out' ... /> - # If output_mode is 'multiple', the script generates multiple files? - # XML says one output 'out'. - # The script default is 'single' in argparse, but XML doesn't set mode! - # XML command: python ... -o $out - # XML inputs don't allow selecting mode. - # Python script default mode is 'single'. - - # So output_mode is likely 'single'. - pass - - # Re-eval python default arguments: - # parser.add_argument('-m', '--mode', default='single', ...) - # XML doesn't pass -m. So it uses 'single'. - # So we merge into one file. - if output_mode == 'single': # Merge all aligned dataframes merged_df = aligned_dfs[0] - # Rename back if needed? if use_index: merged_df.rename(columns={merge_col: "Date_Time"}, inplace=True) merge_col = "Date_Time" for i, df in enumerate(aligned_dfs[1:]): - # When merging 'single', we end up with wide format? - # Or just inner join? - # Original code: - # merged_df = merged_df.merge(df, on=date_time, how='inner') - # If we merge, we need suffixes if other columns have same names! - # Original code didn't specify suffixes, so pandas defaults _x, _y. - # With >2 files, it gets messy (_x, _y, _x, _y...) - # But let's keep original logic for suffixes. - - # If we used index, the column is named 'merge_col' in 'df' too. if use_index: df.rename(columns={'__common_timestamp__': merge_col}, inplace=True) @@ -126,19 +81,12 @@ merged_df.to_csv(output, index=False, sep=sep) print("Single merged file saved.") - elif output_mode == 'multiple': # Original logic for multiple - # ... - # The original code's "multiple" block was slightly broken or unused by Galaxy XML - # because Galaxy XML expects specific output file or discovery. - # But I digress, I just need to fix the Date parsing error. + elif output_mode == 'multiple': for i, (file, df) in enumerate(zip(file_list, aligned_dfs)): filename = os.path.splitext(os.path.basename(file.strip()))[0] - # output is passed as full path 'out.tsv' probably. - # If default output was 'aligned', it tries 'aligned_filename.csv' - # Here 'output' arg is likely a file path from Galaxy. output_prefix = os.path.splitext(output)[0] - output_file = f"{output_prefix}_{filename}.csv" + output_file = f"{output_prefix}_{filename}.tsv" df.to_csv(output_file, index=False, sep=sep) print("Aligned files saved individually.") else: @@ -158,7 +106,7 @@ parser.add_argument( '-s', '--sep', default='\t', - help='Separator used in the input files (default: ,)' + help='Separator used in the input files (default: tab)' ) parser.add_argument( @@ -170,7 +118,7 @@ parser.add_argument( '-o', '--output', - default='aligned', + default='aligned.tsv', help="Output filename" )
--- a/gaiac_time_sync/test-data/out.tsv Wed Jan 28 14:31:37 2026 +0000 +++ b/gaiac_time_sync/test-data/out.tsv Wed Jan 28 14:47:37 2026 +0000 @@ -1,4 +1,4 @@ -date_time Temparature1 Humidity1 Temparature2 Humidity2 Temparature3 humidity3 +Date_Time Temparature1 Humidity1 Temparature2 Humidity2 Temparature3 humidity3 2019-06-07 13:28:00 39.0 50.471 39.0 50.471 38 53.0 2019-06-07 13:29:00 39.0 51.0 39.0 50.471 38 52.824 2019-06-07 13:31:00 38.9 50.0 40.0 49.0 38 51.944
--- a/gaiac_time_sync/test_output.csv Wed Jan 28 14:31:37 2026 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,5 +0,0 @@ -Date_Time Temparature1 Humidity1 Temparature2 Humidity2 -2019-06-07 13:28:00 39.0 50.471 39.0 50.471 -2019-06-07 13:29:00 39.0 51.0 39.0 50.471 -2019-06-07 13:30:00 39.588 49.647 39.588 49.647 -2019-06-07 13:31:00 38.9 50.0 40.0 49.0
--- a/gaiac_time_sync/test_output_3files.csv Wed Jan 28 14:31:37 2026 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,4 +0,0 @@ -Date_Time Temparature1 Humidity1 Temparature2 Humidity2 Temparature3 humidity3 -2019-06-07 13:28:00 39.0 50.471 39.0 50.471 38 53.0 -2019-06-07 13:29:00 39.0 51.0 39.0 50.471 38 52.824 -2019-06-07 13:31:00 38.9 50.0 40.0 49.0 38 51.944
