Source code for shared.data_processing_utils

#! /usr/bin/env python3.10

import decimal
import re

import h5py as h5py
import numpy as np

from shared.logging import setup_logger

logger = setup_logger(__name__.split('.')[-1])


[docs] def extract_datasets_from_h5_to_csv(h5_filepath, dataset_mapping): """ Extract specific datasets from an HDF5 file and save them to CSV files. Parameters: - h5_filepath: Path to the input HDF5 file. - dataset_mapping: Dictionary where keys are dataset names in the HDF5 file and values are the output filenames for CSV files. Note: - it will flatten the data in the dataset to a 2D array if the dataset is a 3D array - 3D arrays of shape (x, y, z) → 2D arrays of shape (x*y, z) - 4D arrays of shape (w, x, y, z) → 2D arrays of shape (wxy, z) - ... and so on. """ with h5py.File(h5_filepath, 'r') as h5_file: for dataset_name, output_file in dataset_mapping.items(): try: if dataset_name in h5_file: logger.debug(f"Extracting dataset {dataset_name} from {h5_filepath} to {output_file}") data = flatten_ND_to_2D(h5_file[dataset_name][:]) # Flatten the data to 2D # Save the data as a CSV file np.savetxt(output_file, data, delimiter=',') else: logger.warning(f"Dataset {dataset_name} not found in {h5_filepath}") except Exception as e: logger.error(f"Error extracting dataset {dataset_name}: {e}")
[docs] def flatten_ND_to_2D(array): """Flatten a multi-dimensional array keeping the last dimension and format its values.""" if array.ndim > 1: flat = array.reshape(-1, array.shape[-1]) # Flatten all dimensions except the last one else: flat = array # If already 1D, no reshaping needed return flat
[docs] def float_range(start, stop, step): """ A generator function that yields a range of floating point numbers. :param start: The start of the range. :param stop: The end of the range. :param step: The step size. :type start: float :type stop: float :type step: float :yields: Each value in the range. :rtype: float """ while start < stop: yield format(start, '.1f') # float(start) start += decimal.Decimal(step)
[docs] def get_dipole_values_as_array(filename, string, delimiter): """ Reads a file and returns the values of a string in the file as an array. :param filename: The path to the file. :param string: The string to search for. :param delimiter: The delimiter that separates the string and its values. :type filename: str :type string: str :type delimiter: str :return: The values of the string in the file. :rtype: list """ with open(filename, 'r') as fin: value = [] for line in fin: if string in line: option_value = (line.partition(delimiter)[2]).strip() value.append(option_value) return value
[docs] def natural_sort(iterable, key=None, reverse=False): """ Sorts the given iterable in a natural order. This function is a key-function to the built-in `sorted` function and can be used as a drop-in replacement for it. A natural sort, also known as an alphanumeric sort, is a sorting method that orders strings containing numbers in a way that considers the numerical value of the digits rather than treating the entire string as a sequence of characters. In other words, it sorts strings with numbers in a way that reflects their numerical order. :param iterable: The iterable to be sorted. :param key: A callable used to extract a comparison key from each element in the iterable. :param reverse: If set to True, the iterable will be sorted in descending order. :type iterable: iterable :type key: callable, optional :type reverse: bool, optional :return: A new list containing the sorted elements from the iterable. :rtype: list Usage:: >>> natural_sort(['2 ft', '10 ft', '1 ft']) ['1 ft', '2 ft', '10 ft'] """ def __float_convert(match): try: return float(match.group()) except ValueError: return match.group() if key is None: key = lambda x: x else: key = lambda x: (__float_convert(match) for match in re.finditer(r'\d+|\D+', key(x))) return sorted(iterable, key=key, reverse=reverse)