team-10/venv/Lib/site-packages/pyarrow/_acero.pyx

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

# ---------------------------------------------------------------------
# Low-level Acero bindings

# cython: profile=False
# distutils: language = c++
# cython: language_level = 3

from pyarrow.includes.common cimport *
from pyarrow.includes.libarrow cimport *
from pyarrow.includes.libarrow_acero cimport *
from pyarrow.lib cimport (Table, pyarrow_unwrap_table, pyarrow_wrap_table,
                          RecordBatchReader)
from pyarrow.lib import frombytes, tobytes
from pyarrow._compute cimport (
    Expression, FunctionOptions, _ensure_field_ref, _true,
    unwrap_null_placement, unwrap_sort_keys
)


cdef class ExecNodeOptions(_Weakrefable):
    """
    Base class for the node options.

    Use one of the subclasses to construct an options object.
    """
    __slots__ = ()  # avoid mistakingly creating attributes

    cdef void init(self, const shared_ptr[CExecNodeOptions]& sp):
        self.wrapped = sp

    cdef inline shared_ptr[CExecNodeOptions] unwrap(self) nogil:
        return self.wrapped


cdef class _TableSourceNodeOptions(ExecNodeOptions):

    def _set_options(self, Table table):
        cdef:
            shared_ptr[CTable] c_table

        c_table = pyarrow_unwrap_table(table)
        self.wrapped.reset(
            new CTableSourceNodeOptions(c_table)
        )


class TableSourceNodeOptions(_TableSourceNodeOptions):
    """
    A Source node which accepts a table.

    This is the option class for the "table_source" node factory.

    Parameters
    ----------
    table : pyarrow.Table
        The table which acts as the data source.
    """

    def __init__(self, Table table):
        self._set_options(table)


cdef class _FilterNodeOptions(ExecNodeOptions):

    def _set_options(self, Expression filter_expression not None):
        self.wrapped.reset(
            new CFilterNodeOptions(<CExpression>filter_expression.unwrap())
        )


class FilterNodeOptions(_FilterNodeOptions):
    """
    Make a node which excludes some rows from batches passed through it.

    This is the option class for the "filter" node factory.

    The "filter" operation provides an option to define data filtering
    criteria. It selects rows where the given expression evaluates to true.
    Filters can be written using pyarrow.compute.Expression, and the
    expression must have a return type of boolean.

    Parameters
    ----------
    filter_expression : pyarrow.compute.Expression
    """

    def __init__(self, Expression filter_expression):
        self._set_options(filter_expression)


cdef class _ProjectNodeOptions(ExecNodeOptions):

    def _set_options(self, expressions, names=None):
        cdef:
            Expression expr
            vector[CExpression] c_expressions
            vector[c_string] c_names

        for expr in expressions:
            c_expressions.push_back(expr.unwrap())

        if names is not None:
            if len(names) != len(expressions):
                raise ValueError(
                    "The number of names should be equal to the number of expressions"
                )

            for name in names:
                c_names.push_back(<c_string>tobytes(name))

            self.wrapped.reset(
                new CProjectNodeOptions(c_expressions, c_names)
            )
        else:
            self.wrapped.reset(
                new CProjectNodeOptions(c_expressions)
            )


class ProjectNodeOptions(_ProjectNodeOptions):
    """
    Make a node which executes expressions on input batches,
    producing batches of the same length with new columns.

    This is the option class for the "project" node factory.

    The "project" operation rearranges, deletes, transforms, and
    creates columns. Each output column is computed by evaluating
    an expression against the source record batch. These must be
    scalar expressions (expressions consisting of scalar literals,
    field references and scalar functions, i.e. elementwise functions
    that return one value for each input row independent of the value
    of all other rows).

    Parameters
    ----------
    expressions : list of pyarrow.compute.Expression
        List of expressions to evaluate against the source batch. This must
        be scalar expressions.
    names : list of str, optional
        List of names for each of the output columns (same length as
        `expressions`). If `names` is not provided, the string
        representations of exprs will be used.
    """

    def __init__(self, expressions, names=None):
        self._set_options(expressions, names)


cdef class _AggregateNodeOptions(ExecNodeOptions):

    def _set_options(self, aggregates, keys=None):
        cdef:
            CAggregate c_aggr
            vector[CAggregate] c_aggregations
            vector[CFieldRef] c_keys

        for arg_names, func_name, opts, name in aggregates:
            c_aggr.function = tobytes(func_name)
            if opts is not None:
                c_aggr.options = (<FunctionOptions?>opts).wrapped
            else:
                c_aggr.options = <shared_ptr[CFunctionOptions]>nullptr
            if not isinstance(arg_names, (list, tuple)):
                arg_names = [arg_names]
            for arg in arg_names:
                c_aggr.target.push_back(_ensure_field_ref(arg))
            c_aggr.name = tobytes(name)

            c_aggregations.push_back(move(c_aggr))

        if keys is None:
            keys = []
        for name in keys:
            c_keys.push_back(_ensure_field_ref(name))

        self.wrapped.reset(
            new CAggregateNodeOptions(c_aggregations, c_keys)
        )


class AggregateNodeOptions(_AggregateNodeOptions):
    """
    Make a node which aggregates input batches, optionally grouped by keys.

    This is the option class for the "aggregate" node factory.

    Acero supports two types of aggregates: "scalar" aggregates,
    and "hash" aggregates. Scalar aggregates reduce an array or scalar
    input to a single scalar output (e.g. computing the mean of a column).
    Hash aggregates act like GROUP BY in SQL and first partition data
    based on one or more key columns, then reduce the data in each partition.
    The aggregate node supports both types of computation, and can compute
    any number of aggregations at once.

    Parameters
    ----------
    aggregates : list of tuples
        Aggregations which will be applied to the targeted fields.
        Specified as a list of tuples, where each tuple is one aggregation
        specification and consists of: aggregation target column(s) followed
        by function name, aggregation function options object and the
        output field name.
        The target column(s) specification can be a single field reference,
        an empty list or a list of fields unary, nullary and n-ary aggregation
        functions respectively. Each field reference can be a string
        column name or expression.
    keys : list of field references, optional
        Keys by which aggregations will be grouped. Each key can reference
        a field using a string name or expression.
    """

    def __init__(self, aggregates, keys=None):
        self._set_options(aggregates, keys)


cdef class _OrderByNodeOptions(ExecNodeOptions):

    def _set_options(self, sort_keys, null_placement):
        self.wrapped.reset(
            new COrderByNodeOptions(
                COrdering(unwrap_sort_keys(sort_keys, allow_str=False),
                          unwrap_null_placement(null_placement))
            )
        )


class OrderByNodeOptions(_OrderByNodeOptions):
    """
    Make a node which applies a new ordering to the data.

    Currently this node works by accumulating all data, sorting, and then
    emitting the new data with an updated batch index.
    Larger-than-memory sort is not currently supported.

    This is the option class for the "order_by" node factory.

    Parameters
    ----------
    sort_keys : sequence of (name, order) tuples
        Names of field/column keys to sort the input on,
        along with the order each field/column is sorted in.
        Accepted values for `order` are "ascending", "descending".
        Each field reference can be a string column name or expression.
    null_placement : str, default "at_end"
        Where nulls in input should be sorted, only applying to
        columns/fields mentioned in `sort_keys`.
        Accepted values are "at_start", "at_end".
    """

    def __init__(self, sort_keys=(), *, null_placement="at_end"):
        self._set_options(sort_keys, null_placement)


cdef class _HashJoinNodeOptions(ExecNodeOptions):

    def _set_options(
        self, join_type, left_keys, right_keys, left_output=None, right_output=None,
        output_suffix_for_left="", output_suffix_for_right="", Expression filter_expression=None,
    ):
        cdef:
            CJoinType c_join_type
            vector[CFieldRef] c_left_keys
            vector[CFieldRef] c_right_keys
            vector[CFieldRef] c_left_output
            vector[CFieldRef] c_right_output
            CExpression c_filter_expression

        # join type
        if join_type == "left semi":
            c_join_type = CJoinType_LEFT_SEMI
        elif join_type == "right semi":
            c_join_type = CJoinType_RIGHT_SEMI
        elif join_type == "left anti":
            c_join_type = CJoinType_LEFT_ANTI
        elif join_type == "right anti":
            c_join_type = CJoinType_RIGHT_ANTI
        elif join_type == "inner":
            c_join_type = CJoinType_INNER
        elif join_type == "left outer":
            c_join_type = CJoinType_LEFT_OUTER
        elif join_type == "right outer":
            c_join_type = CJoinType_RIGHT_OUTER
        elif join_type == "full outer":
            c_join_type = CJoinType_FULL_OUTER
        else:
            raise ValueError("Unsupported join type")

        # left/right keys
        if not isinstance(left_keys, (list, tuple)):
            left_keys = [left_keys]
        for key in left_keys:
            c_left_keys.push_back(_ensure_field_ref(key))
        if not isinstance(right_keys, (list, tuple)):
            right_keys = [right_keys]
        for key in right_keys:
            c_right_keys.push_back(_ensure_field_ref(key))

        if filter_expression is None:
            c_filter_expression = _true
        else:
            c_filter_expression = filter_expression.unwrap()

        # left/right output fields
        if left_output is not None and right_output is not None:
            for colname in left_output:
                c_left_output.push_back(_ensure_field_ref(colname))
            for colname in right_output:
                c_right_output.push_back(_ensure_field_ref(colname))

            self.wrapped.reset(
                new CHashJoinNodeOptions(
                    c_join_type, c_left_keys, c_right_keys,
                    c_left_output, c_right_output,
                    c_filter_expression,
                    <c_string>tobytes(output_suffix_for_left),
                    <c_string>tobytes(output_suffix_for_right)
                )
            )
        else:
            self.wrapped.reset(
                new CHashJoinNodeOptions(
                    c_join_type, c_left_keys, c_right_keys,
                    c_filter_expression,
                    <c_string>tobytes(output_suffix_for_left),
                    <c_string>tobytes(output_suffix_for_right)
                )
            )


class HashJoinNodeOptions(_HashJoinNodeOptions):
    """
    Make a node which implements join operation using hash join strategy.

    This is the option class for the "hashjoin" node factory.

    Parameters
    ----------
    join_type : str
        Type of join. One of "left semi", "right semi", "left anti",
        "right anti", "inner", "left outer", "right outer", "full outer".
    left_keys : str, Expression or list
        Key fields from left input. Each key can be a string column name
        or a field expression, or a list of such field references.
    right_keys : str, Expression or list
        Key fields from right input. See `left_keys` for details.
    left_output : list, optional
        List of output fields passed from left input. If left and right
        output fields are not specified, all valid fields from both left and
        right input will be output. Each field can be a string column name
        or a field expression.
    right_output : list, optional
        List of output fields passed from right input. If left and right
        output fields are not specified, all valid fields from both left and
        right input will be output. Each field can be a string column name
        or a field expression.
    output_suffix_for_left : str
        Suffix added to names of output fields coming from left input
        (used to distinguish, if necessary, between fields of the same
        name in left and right input and can be left empty if there are
        no name collisions).
    output_suffix_for_right : str
        Suffix added to names of output fields coming from right input,
        see `output_suffix_for_left` for details.
    filter_expression : pyarrow.compute.Expression
        Residual filter which is applied to matching row.
    """

    def __init__(
        self, join_type, left_keys, right_keys, left_output=None, right_output=None,
        output_suffix_for_left="", output_suffix_for_right="", filter_expression=None,
    ):
        self._set_options(
            join_type, left_keys, right_keys, left_output, right_output,
            output_suffix_for_left, output_suffix_for_right, filter_expression
        )


cdef class _AsofJoinNodeOptions(ExecNodeOptions):

    def _set_options(self, left_on, left_by, right_on, right_by, tolerance):
        cdef:
            vector[CFieldRef] c_left_by
            vector[CFieldRef] c_right_by
            CAsofJoinKeys c_left_keys
            CAsofJoinKeys c_right_keys
            vector[CAsofJoinKeys] c_input_keys

        # Prepare left AsofJoinNodeOption::Keys
        if not isinstance(left_by, (list, tuple)):
            left_by = [left_by]
        for key in left_by:
            c_left_by.push_back(_ensure_field_ref(key))

        c_left_keys.on_key = _ensure_field_ref(left_on)
        c_left_keys.by_key = c_left_by

        c_input_keys.push_back(c_left_keys)

        # Prepare right AsofJoinNodeOption::Keys
        if not isinstance(right_by, (list, tuple)):
            right_by = [right_by]
        for key in right_by:
            c_right_by.push_back(_ensure_field_ref(key))

        c_right_keys.on_key = _ensure_field_ref(right_on)
        c_right_keys.by_key = c_right_by

        c_input_keys.push_back(c_right_keys)

        self.wrapped.reset(
            new CAsofJoinNodeOptions(
                c_input_keys,
                tolerance,
            )
        )


class AsofJoinNodeOptions(_AsofJoinNodeOptions):
    """
    Make a node which implements 'as of join' operation.

    This is the option class for the "asofjoin" node factory.

    Parameters
    ----------
    left_on : str, Expression
        The left key on which the join operation should be performed.
        Can be a string column name or a field expression.

        An inexact match is used on the "on" key, i.e. a row is considered a
        match if and only if left_on - tolerance <= right_on <= left_on.

        The input dataset must be sorted by the "on" key. Must be a single
        field of a common type.

        Currently, the "on" key must be an integer, date, or timestamp type.
    left_by: str, Expression or list
        The left keys on which the join operation should be performed.
        Exact equality is used for each field of the "by" keys.
        Each key can be a string column name or a field expression,
        or a list of such field references.
    right_on : str, Expression
        The right key on which the join operation should be performed.
        See `left_on` for details.
    right_by: str, Expression or list
        The right keys on which the join operation should be performed.
        See `left_by` for details.
    tolerance : int
        The tolerance to use for the asof join. The tolerance is interpreted in
        the same units as the "on" key.
    """

    def __init__(self, left_on, left_by, right_on, right_by, tolerance):
        self._set_options(left_on, left_by, right_on, right_by, tolerance)


cdef class Declaration(_Weakrefable):
    """
    Helper class for declaring the nodes of an ExecPlan.

    A Declaration represents an unconstructed ExecNode, and potentially
    more since its inputs may also be Declarations or when constructed
    with ``from_sequence``.

    The possible ExecNodes to use are registered with a name,
    the "factory name", and need to be specified using this name, together
    with its corresponding ExecNodeOptions subclass.

    Parameters
    ----------
    factory_name : str
        The ExecNode factory name, such as "table_source", "filter",
        "project" etc. See the ExecNodeOptions subclasses for the exact
        factory names to use.
    options : ExecNodeOptions
        Corresponding ExecNodeOptions subclass (matching the factory name).
    inputs : list of Declaration, optional
        Input nodes for this declaration. Optional if the node is a source
        node, or when the declaration gets combined later with
        ``from_sequence``.

    Returns
    -------
    Declaration
    """
    cdef void init(self, const CDeclaration& c_decl):
        self.decl = c_decl

    @staticmethod
    cdef wrap(const CDeclaration& c_decl):
        cdef Declaration self = Declaration.__new__(Declaration)
        self.init(c_decl)
        return self

    cdef inline CDeclaration unwrap(self) nogil:
        return self.decl

    def __init__(self, factory_name, ExecNodeOptions options, inputs=None):
        cdef:
            c_string c_factory_name
            CDeclaration c_decl
            vector[CDeclaration.Input] c_inputs

        c_factory_name = tobytes(factory_name)

        if inputs is not None:
            for ipt in inputs:
                c_inputs.push_back(
                    CDeclaration.Input((<Declaration>ipt).unwrap())
                )

        c_decl = CDeclaration(c_factory_name, c_inputs, options.unwrap())
        self.init(c_decl)

    @staticmethod
    def from_sequence(decls):
        """
        Convenience factory for the common case of a simple sequence of nodes.

        Each of the declarations will be appended to the inputs of the
        subsequent declaration, and the final modified declaration will
        be returned.

        Parameters
        ----------
        decls : list of Declaration

        Returns
        -------
        Declaration
        """
        cdef:
            vector[CDeclaration] c_decls
            CDeclaration c_decl

        for decl in decls:
            c_decls.push_back((<Declaration> decl).unwrap())

        c_decl = CDeclaration.Sequence(c_decls)
        return Declaration.wrap(c_decl)

    def __str__(self):
        return frombytes(GetResultValue(DeclarationToString(self.decl)))

    def __repr__(self):
        return f"<pyarrow.acero.Declaration>\n{self}"

    def to_table(self, bint use_threads=True):
        """
        Run the declaration and collect the results into a table.

        This method will implicitly add a sink node to the declaration
        to collect results into a table. It will then create an ExecPlan
        from the declaration, start the exec plan, block until the plan
        has finished, and return the created table.

        Parameters
        ----------
        use_threads : bool, default True
            If set to False, then all CPU work will be done on the calling
            thread. I/O tasks will still happen on the I/O executor
            and may be multi-threaded (but should not use significant CPU
            resources).

        Returns
        -------
        pyarrow.Table
        """
        cdef:
            shared_ptr[CTable] c_table

        with nogil:
            c_table = GetResultValue(DeclarationToTable(self.unwrap(), use_threads))
        return pyarrow_wrap_table(c_table)

    def to_reader(self, bint use_threads=True):
        """Run the declaration and return results as a RecordBatchReader.

        For details about the parameters, see `to_table`.

        Returns
        -------
        pyarrow.RecordBatchReader
        """
        cdef:
            RecordBatchReader reader
        reader = RecordBatchReader.__new__(RecordBatchReader)
        reader.reader.reset(
            GetResultValue(DeclarationToReader(self.unwrap(), use_threads)).release()
        )
        return reader
Adding all project files 2025-08-02 02:00:33 +02:00			`# Licensed to the Apache Software Foundation (ASF) under one`
			`# or more contributor license agreements. See the NOTICE file`
			`# distributed with this work for additional information`
			`# regarding copyright ownership. The ASF licenses this file`
			`# to you under the Apache License, Version 2.0 (the`
			`# "License"); you may not use this file except in compliance`
			`# with the License. You may obtain a copy of the License at`
			`#`
			`# http://www.apache.org/licenses/LICENSE-2.0`
			`#`
			`# Unless required by applicable law or agreed to in writing,`
			`# software distributed under the License is distributed on an`
			`# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY`
			`# KIND, either express or implied. See the License for the`
			`# specific language governing permissions and limitations`
			`# under the License.`

			`# ---------------------------------------------------------------------`
			`# Low-level Acero bindings`

			`# cython: profile=False`
			`# distutils: language = c++`
			`# cython: language_level = 3`

			`from pyarrow.includes.common cimport *`
			`from pyarrow.includes.libarrow cimport *`
			`from pyarrow.includes.libarrow_acero cimport *`
			`from pyarrow.lib cimport (Table, pyarrow_unwrap_table, pyarrow_wrap_table,`
			`RecordBatchReader)`
			`from pyarrow.lib import frombytes, tobytes`
			`from pyarrow._compute cimport (`
			`Expression, FunctionOptions, _ensure_field_ref, _true,`
			`unwrap_null_placement, unwrap_sort_keys`
			`)`


			`cdef class ExecNodeOptions(_Weakrefable):`
			`"""`
			`Base class for the node options.`

			`Use one of the subclasses to construct an options object.`
			`"""`
			`__slots__ = () # avoid mistakingly creating attributes`

			`cdef void init(self, const shared_ptr[CExecNodeOptions]& sp):`
			`self.wrapped = sp`

			`cdef inline shared_ptr[CExecNodeOptions] unwrap(self) nogil:`
			`return self.wrapped`


			`cdef class _TableSourceNodeOptions(ExecNodeOptions):`

			`def _set_options(self, Table table):`
			`cdef:`
			`shared_ptr[CTable] c_table`

			`c_table = pyarrow_unwrap_table(table)`
			`self.wrapped.reset(`
			`new CTableSourceNodeOptions(c_table)`
			`)`


			`class TableSourceNodeOptions(_TableSourceNodeOptions):`
			`"""`
			`A Source node which accepts a table.`

			`This is the option class for the "table_source" node factory.`

			`Parameters`
			`----------`
			`table : pyarrow.Table`
			`The table which acts as the data source.`
			`"""`

			`def __init__(self, Table table):`
			`self._set_options(table)`


			`cdef class _FilterNodeOptions(ExecNodeOptions):`

			`def _set_options(self, Expression filter_expression not None):`
			`self.wrapped.reset(`
			`new CFilterNodeOptions(<CExpression>filter_expression.unwrap())`
			`)`


			`class FilterNodeOptions(_FilterNodeOptions):`
			`"""`
			`Make a node which excludes some rows from batches passed through it.`

			`This is the option class for the "filter" node factory.`

			`The "filter" operation provides an option to define data filtering`
			`criteria. It selects rows where the given expression evaluates to true.`
			`Filters can be written using pyarrow.compute.Expression, and the`
			`expression must have a return type of boolean.`

			`Parameters`
			`----------`
			`filter_expression : pyarrow.compute.Expression`
			`"""`

			`def __init__(self, Expression filter_expression):`
			`self._set_options(filter_expression)`


			`cdef class _ProjectNodeOptions(ExecNodeOptions):`

			`def _set_options(self, expressions, names=None):`
			`cdef:`
			`Expression expr`
			`vector[CExpression] c_expressions`
			`vector[c_string] c_names`

			`for expr in expressions:`
			`c_expressions.push_back(expr.unwrap())`

			`if names is not None:`
			`if len(names) != len(expressions):`
			`raise ValueError(`
			`"The number of names should be equal to the number of expressions"`
			`)`

			`for name in names:`
			`c_names.push_back(<c_string>tobytes(name))`

			`self.wrapped.reset(`
			`new CProjectNodeOptions(c_expressions, c_names)`
			`)`
			`else:`
			`self.wrapped.reset(`
			`new CProjectNodeOptions(c_expressions)`
			`)`


			`class ProjectNodeOptions(_ProjectNodeOptions):`
			`"""`
			`Make a node which executes expressions on input batches,`
			`producing batches of the same length with new columns.`

			`This is the option class for the "project" node factory.`

			`The "project" operation rearranges, deletes, transforms, and`
			`creates columns. Each output column is computed by evaluating`
			`an expression against the source record batch. These must be`
			`scalar expressions (expressions consisting of scalar literals,`
			`field references and scalar functions, i.e. elementwise functions`
			`that return one value for each input row independent of the value`
			`of all other rows).`

			`Parameters`
			`----------`
			`expressions : list of pyarrow.compute.Expression`
			`List of expressions to evaluate against the source batch. This must`
			`be scalar expressions.`
			`names : list of str, optional`
			`List of names for each of the output columns (same length as`
			`expressions`). If `names` is not provided, the string
			`representations of exprs will be used.`
			`"""`

			`def __init__(self, expressions, names=None):`
			`self._set_options(expressions, names)`


			`cdef class _AggregateNodeOptions(ExecNodeOptions):`

			`def _set_options(self, aggregates, keys=None):`
			`cdef:`
			`CAggregate c_aggr`
			`vector[CAggregate] c_aggregations`
			`vector[CFieldRef] c_keys`

			`for arg_names, func_name, opts, name in aggregates:`
			`c_aggr.function = tobytes(func_name)`
			`if opts is not None:`
			`c_aggr.options = (<FunctionOptions?>opts).wrapped`
			`else:`
			`c_aggr.options = <shared_ptr[CFunctionOptions]>nullptr`
			`if not isinstance(arg_names, (list, tuple)):`
			`arg_names = [arg_names]`
			`for arg in arg_names:`
			`c_aggr.target.push_back(_ensure_field_ref(arg))`
			`c_aggr.name = tobytes(name)`

			`c_aggregations.push_back(move(c_aggr))`

			`if keys is None:`
			`keys = []`
			`for name in keys:`
			`c_keys.push_back(_ensure_field_ref(name))`

			`self.wrapped.reset(`
			`new CAggregateNodeOptions(c_aggregations, c_keys)`
			`)`


			`class AggregateNodeOptions(_AggregateNodeOptions):`
			`"""`
			`Make a node which aggregates input batches, optionally grouped by keys.`

			`This is the option class for the "aggregate" node factory.`

			`Acero supports two types of aggregates: "scalar" aggregates,`
			`and "hash" aggregates. Scalar aggregates reduce an array or scalar`
			`input to a single scalar output (e.g. computing the mean of a column).`
			`Hash aggregates act like GROUP BY in SQL and first partition data`
			`based on one or more key columns, then reduce the data in each partition.`
			`The aggregate node supports both types of computation, and can compute`
			`any number of aggregations at once.`

			`Parameters`
			`----------`
			`aggregates : list of tuples`
			`Aggregations which will be applied to the targeted fields.`
			`Specified as a list of tuples, where each tuple is one aggregation`
			`specification and consists of: aggregation target column(s) followed`
			`by function name, aggregation function options object and the`
			`output field name.`
			`The target column(s) specification can be a single field reference,`
			`an empty list or a list of fields unary, nullary and n-ary aggregation`
			`functions respectively. Each field reference can be a string`
			`column name or expression.`
			`keys : list of field references, optional`
			`Keys by which aggregations will be grouped. Each key can reference`
			`a field using a string name or expression.`
			`"""`

			`def __init__(self, aggregates, keys=None):`
			`self._set_options(aggregates, keys)`


			`cdef class _OrderByNodeOptions(ExecNodeOptions):`

			`def _set_options(self, sort_keys, null_placement):`
			`self.wrapped.reset(`
			`new COrderByNodeOptions(`
			`COrdering(unwrap_sort_keys(sort_keys, allow_str=False),`
			`unwrap_null_placement(null_placement))`
			`)`
			`)`


			`class OrderByNodeOptions(_OrderByNodeOptions):`
			`"""`
			`Make a node which applies a new ordering to the data.`

			`Currently this node works by accumulating all data, sorting, and then`
			`emitting the new data with an updated batch index.`
			`Larger-than-memory sort is not currently supported.`

			`This is the option class for the "order_by" node factory.`

			`Parameters`
			`----------`
			`sort_keys : sequence of (name, order) tuples`
			`Names of field/column keys to sort the input on,`
			`along with the order each field/column is sorted in.`
			Accepted values for `order` are "ascending", "descending".
			`Each field reference can be a string column name or expression.`
			`null_placement : str, default "at_end"`
			`Where nulls in input should be sorted, only applying to`
			columns/fields mentioned in `sort_keys`.
			`Accepted values are "at_start", "at_end".`
			`"""`

			`def __init__(self, sort_keys=(), *, null_placement="at_end"):`
			`self._set_options(sort_keys, null_placement)`


			`cdef class _HashJoinNodeOptions(ExecNodeOptions):`

			`def _set_options(`
			`self, join_type, left_keys, right_keys, left_output=None, right_output=None,`
			`output_suffix_for_left="", output_suffix_for_right="", Expression filter_expression=None,`
			`):`
			`cdef:`
			`CJoinType c_join_type`
			`vector[CFieldRef] c_left_keys`
			`vector[CFieldRef] c_right_keys`
			`vector[CFieldRef] c_left_output`
			`vector[CFieldRef] c_right_output`
			`CExpression c_filter_expression`

			`# join type`
			`if join_type == "left semi":`
			`c_join_type = CJoinType_LEFT_SEMI`
			`elif join_type == "right semi":`
			`c_join_type = CJoinType_RIGHT_SEMI`
			`elif join_type == "left anti":`
			`c_join_type = CJoinType_LEFT_ANTI`
			`elif join_type == "right anti":`
			`c_join_type = CJoinType_RIGHT_ANTI`
			`elif join_type == "inner":`
			`c_join_type = CJoinType_INNER`
			`elif join_type == "left outer":`
			`c_join_type = CJoinType_LEFT_OUTER`
			`elif join_type == "right outer":`
			`c_join_type = CJoinType_RIGHT_OUTER`
			`elif join_type == "full outer":`
			`c_join_type = CJoinType_FULL_OUTER`
			`else:`
			`raise ValueError("Unsupported join type")`

			`# left/right keys`
			`if not isinstance(left_keys, (list, tuple)):`
			`left_keys = [left_keys]`
			`for key in left_keys:`
			`c_left_keys.push_back(_ensure_field_ref(key))`
			`if not isinstance(right_keys, (list, tuple)):`
			`right_keys = [right_keys]`
			`for key in right_keys:`
			`c_right_keys.push_back(_ensure_field_ref(key))`

			`if filter_expression is None:`
			`c_filter_expression = _true`
			`else:`
			`c_filter_expression = filter_expression.unwrap()`

			`# left/right output fields`
			`if left_output is not None and right_output is not None:`
			`for colname in left_output:`
			`c_left_output.push_back(_ensure_field_ref(colname))`
			`for colname in right_output:`
			`c_right_output.push_back(_ensure_field_ref(colname))`

			`self.wrapped.reset(`
			`new CHashJoinNodeOptions(`
			`c_join_type, c_left_keys, c_right_keys,`
			`c_left_output, c_right_output,`
			`c_filter_expression,`
			`<c_string>tobytes(output_suffix_for_left),`
			`<c_string>tobytes(output_suffix_for_right)`
			`)`
			`)`
			`else:`
			`self.wrapped.reset(`
			`new CHashJoinNodeOptions(`
			`c_join_type, c_left_keys, c_right_keys,`
			`c_filter_expression,`
			`<c_string>tobytes(output_suffix_for_left),`
			`<c_string>tobytes(output_suffix_for_right)`
			`)`
			`)`


			`class HashJoinNodeOptions(_HashJoinNodeOptions):`
			`"""`
			`Make a node which implements join operation using hash join strategy.`

			`This is the option class for the "hashjoin" node factory.`

			`Parameters`
			`----------`
			`join_type : str`
			`Type of join. One of "left semi", "right semi", "left anti",`
			`"right anti", "inner", "left outer", "right outer", "full outer".`
			`left_keys : str, Expression or list`
			`Key fields from left input. Each key can be a string column name`
			`or a field expression, or a list of such field references.`
			`right_keys : str, Expression or list`
			Key fields from right input. See `left_keys` for details.
			`left_output : list, optional`
			`List of output fields passed from left input. If left and right`
			`output fields are not specified, all valid fields from both left and`
			`right input will be output. Each field can be a string column name`
			`or a field expression.`
			`right_output : list, optional`
			`List of output fields passed from right input. If left and right`
			`output fields are not specified, all valid fields from both left and`
			`right input will be output. Each field can be a string column name`
			`or a field expression.`
			`output_suffix_for_left : str`
			`Suffix added to names of output fields coming from left input`
			`(used to distinguish, if necessary, between fields of the same`
			`name in left and right input and can be left empty if there are`
			`no name collisions).`
			`output_suffix_for_right : str`
			`Suffix added to names of output fields coming from right input,`
			see `output_suffix_for_left` for details.
			`filter_expression : pyarrow.compute.Expression`
			`Residual filter which is applied to matching row.`
			`"""`

			`def __init__(`
			`self, join_type, left_keys, right_keys, left_output=None, right_output=None,`
			`output_suffix_for_left="", output_suffix_for_right="", filter_expression=None,`
			`):`
			`self._set_options(`
			`join_type, left_keys, right_keys, left_output, right_output,`
			`output_suffix_for_left, output_suffix_for_right, filter_expression`
			`)`


			`cdef class _AsofJoinNodeOptions(ExecNodeOptions):`

			`def _set_options(self, left_on, left_by, right_on, right_by, tolerance):`
			`cdef:`
			`vector[CFieldRef] c_left_by`
			`vector[CFieldRef] c_right_by`
			`CAsofJoinKeys c_left_keys`
			`CAsofJoinKeys c_right_keys`
			`vector[CAsofJoinKeys] c_input_keys`

			`# Prepare left AsofJoinNodeOption::Keys`
			`if not isinstance(left_by, (list, tuple)):`
			`left_by = [left_by]`
			`for key in left_by:`
			`c_left_by.push_back(_ensure_field_ref(key))`

			`c_left_keys.on_key = _ensure_field_ref(left_on)`
			`c_left_keys.by_key = c_left_by`

			`c_input_keys.push_back(c_left_keys)`

			`# Prepare right AsofJoinNodeOption::Keys`
			`if not isinstance(right_by, (list, tuple)):`
			`right_by = [right_by]`
			`for key in right_by:`
			`c_right_by.push_back(_ensure_field_ref(key))`

			`c_right_keys.on_key = _ensure_field_ref(right_on)`
			`c_right_keys.by_key = c_right_by`

			`c_input_keys.push_back(c_right_keys)`

			`self.wrapped.reset(`
			`new CAsofJoinNodeOptions(`
			`c_input_keys,`
			`tolerance,`
			`)`
			`)`


			`class AsofJoinNodeOptions(_AsofJoinNodeOptions):`
			`"""`
			`Make a node which implements 'as of join' operation.`

			`This is the option class for the "asofjoin" node factory.`

			`Parameters`
			`----------`
			`left_on : str, Expression`
			`The left key on which the join operation should be performed.`
			`Can be a string column name or a field expression.`

			`An inexact match is used on the "on" key, i.e. a row is considered a`
			`match if and only if left_on - tolerance <= right_on <= left_on.`

			`The input dataset must be sorted by the "on" key. Must be a single`
			`field of a common type.`

			`Currently, the "on" key must be an integer, date, or timestamp type.`
			`left_by: str, Expression or list`
			`The left keys on which the join operation should be performed.`
			`Exact equality is used for each field of the "by" keys.`
			`Each key can be a string column name or a field expression,`
			`or a list of such field references.`
			`right_on : str, Expression`
			`The right key on which the join operation should be performed.`
			See `left_on` for details.
			`right_by: str, Expression or list`
			`The right keys on which the join operation should be performed.`
			See `left_by` for details.
			`tolerance : int`
			`The tolerance to use for the asof join. The tolerance is interpreted in`
			`the same units as the "on" key.`
			`"""`

			`def __init__(self, left_on, left_by, right_on, right_by, tolerance):`
			`self._set_options(left_on, left_by, right_on, right_by, tolerance)`


			`cdef class Declaration(_Weakrefable):`
			`"""`
			`Helper class for declaring the nodes of an ExecPlan.`

			`A Declaration represents an unconstructed ExecNode, and potentially`
			`more since its inputs may also be Declarations or when constructed`
			with ``from_sequence``.

			`The possible ExecNodes to use are registered with a name,`
			`the "factory name", and need to be specified using this name, together`
			`with its corresponding ExecNodeOptions subclass.`

			`Parameters`
			`----------`
			`factory_name : str`
			`The ExecNode factory name, such as "table_source", "filter",`
			`"project" etc. See the ExecNodeOptions subclasses for the exact`
			`factory names to use.`
			`options : ExecNodeOptions`
			`Corresponding ExecNodeOptions subclass (matching the factory name).`
			`inputs : list of Declaration, optional`
			`Input nodes for this declaration. Optional if the node is a source`
			`node, or when the declaration gets combined later with`
			``from_sequence``.

			`Returns`
			`-------`
			`Declaration`
			`"""`
			`cdef void init(self, const CDeclaration& c_decl):`
			`self.decl = c_decl`

			`@staticmethod`
			`cdef wrap(const CDeclaration& c_decl):`
			`cdef Declaration self = Declaration.__new__(Declaration)`
			`self.init(c_decl)`
			`return self`

			`cdef inline CDeclaration unwrap(self) nogil:`
			`return self.decl`

			`def __init__(self, factory_name, ExecNodeOptions options, inputs=None):`
			`cdef:`
			`c_string c_factory_name`
			`CDeclaration c_decl`
			`vector[CDeclaration.Input] c_inputs`

			`c_factory_name = tobytes(factory_name)`

			`if inputs is not None:`
			`for ipt in inputs:`
			`c_inputs.push_back(`
			`CDeclaration.Input((<Declaration>ipt).unwrap())`
			`)`

			`c_decl = CDeclaration(c_factory_name, c_inputs, options.unwrap())`
			`self.init(c_decl)`

			`@staticmethod`
			`def from_sequence(decls):`
			`"""`
			`Convenience factory for the common case of a simple sequence of nodes.`

			`Each of the declarations will be appended to the inputs of the`
			`subsequent declaration, and the final modified declaration will`
			`be returned.`

			`Parameters`
			`----------`
			`decls : list of Declaration`

			`Returns`
			`-------`
			`Declaration`
			`"""`
			`cdef:`
			`vector[CDeclaration] c_decls`
			`CDeclaration c_decl`

			`for decl in decls:`
			`c_decls.push_back((<Declaration> decl).unwrap())`

			`c_decl = CDeclaration.Sequence(c_decls)`
			`return Declaration.wrap(c_decl)`

			`def __str__(self):`
			`return frombytes(GetResultValue(DeclarationToString(self.decl)))`

			`def __repr__(self):`
			`return f"<pyarrow.acero.Declaration>\n{self}"`

			`def to_table(self, bint use_threads=True):`
			`"""`
			`Run the declaration and collect the results into a table.`

			`This method will implicitly add a sink node to the declaration`
			`to collect results into a table. It will then create an ExecPlan`
			`from the declaration, start the exec plan, block until the plan`
			`has finished, and return the created table.`

			`Parameters`
			`----------`
			`use_threads : bool, default True`
			`If set to False, then all CPU work will be done on the calling`
			`thread. I/O tasks will still happen on the I/O executor`
			`and may be multi-threaded (but should not use significant CPU`
			`resources).`

			`Returns`
			`-------`
			`pyarrow.Table`
			`"""`
			`cdef:`
			`shared_ptr[CTable] c_table`

			`with nogil:`
			`c_table = GetResultValue(DeclarationToTable(self.unwrap(), use_threads))`
			`return pyarrow_wrap_table(c_table)`

			`def to_reader(self, bint use_threads=True):`
			`"""Run the declaration and return results as a RecordBatchReader.`

			For details about the parameters, see `to_table`.

			`Returns`
			`-------`
			`pyarrow.RecordBatchReader`
			`"""`
			`cdef:`
			`RecordBatchReader reader`
			`reader = RecordBatchReader.__new__(RecordBatchReader)`
			`reader.reader.reset(`
			`GetResultValue(DeclarationToReader(self.unwrap(), use_threads)).release()`
			`)`
			`return reader`