Source code for deephaven.learn.gather

#
# Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending
#

""" Utilities for gathering Deephaven table data into Python objects """

import enum
from typing import Any, Type

import jpy
import numpy as np

from deephaven import DHError

_JGatherer = jpy.get_type("io.deephaven.integrations.learn.gather.NumPy")


[docs]class MemoryLayout(enum.Enum):
    """ Memory layouts for an array. """

    ROW_MAJOR = True
    """ Row-major memory layout."""
    COLUMN_MAJOR = False
    """ Column-major memory layout."""
    C = True
    """ Memory layout consistent with C arrays (row-major)."""
    FORTRAN = False
    """ Memory layout consistent with Fortran arrays (column-major)."""

    def __init__(self, is_row_major):
        self.is_row_major = is_row_major


def _convert_to_numpy_dtype(np_type: Type) -> Type:
    """ Converts an input type to the corresponding NumPy data type. """
    if np_type.__module__ == np.__name__:
        return np_type
    elif np_type == bool:
        np_type = np.bool_
    elif np_type == float:
        np_type = np.double
    elif np_type == int:
        np_type = np.intc
    else:
        raise ValueError(f"{np_type} is not a data type that can be converted to a NumPy dtype.")
    return np_type


[docs]def table_to_numpy_2d(row_set, col_set, order: MemoryLayout = MemoryLayout.ROW_MAJOR, np_type: Type = np.intc) -> np.ndarray:
    """ Converts Deephaven table data to a 2d NumPy array of the appropriate size

    Args:
        row_set: a RowSequence describing the number of rows in the table
        col_set: ColumnSources describing which columns to copy
        order (MemoryLayout): the desired memory layout of the output array
        np_type: the desired NumPy data type of the output NumPy array

    Returns
        a np.ndarray

    Raises:
        DHError
    """

    try:
        np_type = _convert_to_numpy_dtype(np_type)

        if np_type == np.byte:
            buffer = _JGatherer.tensorBuffer2DByte(row_set, col_set, order.is_row_major)
        elif np_type == np.short:
            buffer = _JGatherer.tensorBuffer2DShort(row_set, col_set, order.is_row_major)
        elif np_type == np.intc:
            buffer = _JGatherer.tensorBuffer2DInt(row_set, col_set, order.is_row_major)
        elif np_type == np.int_:
            buffer = _JGatherer.tensorBuffer2DLong(row_set, col_set, order.is_row_major)
        elif np_type == np.single:
            buffer = _JGatherer.tensorBuffer2DFloat(row_set, col_set, order.is_row_major)
        elif np_type == np.double:
            buffer = _JGatherer.tensorBuffer2DDouble(row_set, col_set, order.is_row_major)
        else:
            raise ValueError(f"Data type {np_type} is not supported.")

        tensor = np.frombuffer(buffer, dtype=np_type)

        if order.is_row_major:
            tensor.shape = (len(col_set), row_set.intSize())
            return tensor.T
        else:
            tensor.shape = (row_set.intSize(), len(col_set))
            return tensor
    except Exception as e:
        raise DHError(e, f"failed to convert rows: {row_set} and cols: {col_set} to a 2D NumPy array") from e