Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH-39301: [Archery][CI][Integration] Add nanoarrow to archery + integration setup #39302

Merged
merged 26 commits into from
May 10, 2024
Merged
Show file tree
Hide file tree
Changes from 22 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .github/workflows/integration.yml
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,11 @@ jobs:
with:
repository: apache/arrow-rs
path: rust
- name: Checkout Arrow nanoarrow
uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # v4.0.0
with:
repository: apache/arrow-nanoarrow
path: nanoarrow
- name: Free up disk space
run: |
ci/scripts/util_free_space.sh
Expand All @@ -98,6 +103,7 @@ jobs:
archery docker run \
-e ARCHERY_DEFAULT_BRANCH=${{ github.event.repository.default_branch }} \
-e ARCHERY_INTEGRATION_WITH_RUST=1 \
-e ARCHERY_INTEGRATION_WITH_NANOARROW=1 \
conda-integration
- name: Docker Push
if: >-
Expand Down
2 changes: 2 additions & 0 deletions ci/scripts/integration_arrow_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ build_dir=${2}

${arrow_dir}/ci/scripts/rust_build.sh ${arrow_dir} ${build_dir}

${arrow_dir}/ci/scripts/nanoarrow_build.sh ${arrow_dir} ${build_dir}

if [ "${ARROW_INTEGRATION_CPP}" == "ON" ]; then
${arrow_dir}/ci/scripts/cpp_build.sh ${arrow_dir} ${build_dir}
fi
Expand Down
52 changes: 52 additions & 0 deletions ci/scripts/nanoarrow_build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#!/usr/bin/env bash
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

set -e

arrow_dir=${1}
source_dir=${1}/nanoarrow
build_dir=${2}/nanoarrow

# This file is used to build the nanoarrow binaries needed for the archery
# integration tests. Testing of the nanoarrow implementation in normal CI is handled
# by github workflows in the arrow-nanoarrow repository.

if [ "${ARCHERY_INTEGRATION_WITH_NANOARROW}" -eq "0" ]; then
echo "====================================================================="
echo "Not building nanoarrow"
echo "====================================================================="
exit 0;
elif [ ! -d "${source_dir}" ]; then
echo "====================================================================="
echo "The nanoarrow source is missing. Please clone the arrow-nanoarrow repository"
echo "to arrow/nanoarrow before running the integration tests:"
echo " git clone https://github.com/apache/arrow-nanoarrow.git path/to/arrow/nanoarrow"
echo "====================================================================="
exit 1;
fi

set -x

mkdir -p ${build_dir}
pushd ${build_dir}

cmake ${source_dir} -DNANOARROW_BUILD_INTEGRATION_TESTS=ON
cmake --build .

popd
5 changes: 4 additions & 1 deletion dev/archery/archery/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -741,6 +741,9 @@ def _set_default(opt, default):
@click.option('--with-rust', type=bool, default=False,
help='Include Rust in integration tests',
envvar="ARCHERY_INTEGRATION_WITH_RUST")
@click.option('--with-nanoarrow', type=bool, default=False,
help='Include nanoarrow in integration tests',
envvar="ARCHERY_INTEGRATION_WITH_NANOARROW")
@click.option('--write_generated_json', default="",
help='Generate test JSON to indicated path')
@click.option('--run-ipc', is_flag=True, default=False,
Expand Down Expand Up @@ -776,7 +779,7 @@ def integration(with_all=False, random_seed=12345, **args):

gen_path = args['write_generated_json']

languages = ['cpp', 'csharp', 'java', 'js', 'go', 'rust']
languages = ['cpp', 'csharp', 'java', 'js', 'go', 'rust', 'nanoarrow']
formats = ['ipc', 'flight', 'c_data']

enabled_languages = 0
Expand Down
9 changes: 6 additions & 3 deletions dev/archery/archery/integration/datagen.py
Original file line number Diff line number Diff line change
Expand Up @@ -1928,18 +1928,21 @@ def _temp_path():
.skip_tester('C#')
.skip_tester('Java')
.skip_tester('JS')
.skip_tester('Rust'),
.skip_tester('Rust')
.skip_tester('nanoarrow'),

generate_binary_view_case()
.skip_tester('Java')
.skip_tester('JS')
.skip_tester('Rust'),
.skip_tester('Rust')
.skip_tester('nanoarrow'),

generate_list_view_case()
.skip_tester('C#') # Doesn't support large list views
.skip_tester('Java')
.skip_tester('JS')
.skip_tester('Rust'),
.skip_tester('Rust')
.skip_tester('nanoarrow'),

generate_extension_case()
# TODO: ensure the extension is registered in the C++ entrypoint
Expand Down
8 changes: 6 additions & 2 deletions dev/archery/archery/integration/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
from .tester_java import JavaTester
from .tester_js import JSTester
from .tester_csharp import CSharpTester
from .tester_nanoarrow import NanoarrowTester
from .util import guid, printer
from .util import SKIP_C_ARRAY, SKIP_C_SCHEMA, SKIP_FLIGHT, SKIP_IPC
from ..utils.source import ARROW_ROOT_DEFAULT
Expand Down Expand Up @@ -541,8 +542,8 @@ def get_static_json_files():

def run_all_tests(with_cpp=True, with_java=True, with_js=True,
with_csharp=True, with_go=True, with_rust=False,
run_ipc=False, run_flight=False, run_c_data=False,
tempdir=None, **kwargs):
with_nanoarrow=False, run_ipc=False, run_flight=False,
run_c_data=False, tempdir=None, **kwargs):
tempdir = tempdir or tempfile.mkdtemp(prefix='arrow-integration-')

testers: List[Tester] = []
Expand All @@ -565,6 +566,9 @@ def run_all_tests(with_cpp=True, with_java=True, with_js=True,
if with_rust:
testers.append(RustTester(**kwargs))

if with_nanoarrow:
testers.append(NanoarrowTester(**kwargs))

static_json_files = get_static_json_files()
generated_json_files = datagen.get_generated_json_files(tempdir=tempdir)
json_files = static_json_files + generated_json_files
Expand Down
148 changes: 148 additions & 0 deletions dev/archery/archery/integration/tester_nanoarrow.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

import functools
import os

from . import cdata
from .tester import Tester, CDataExporter, CDataImporter
from ..utils.source import ARROW_ROOT_DEFAULT


_NANOARROW_PATH = os.environ.get(
"ARROW_NANOARROW_PATH",
os.path.join(ARROW_ROOT_DEFAULT, "nanoarrow/cdata"),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unless we clone nanoarrow inside the arrow repo ARROW_ROOT_DEFAULT would never be the correct path here as it will include the arrow folder.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you wanted to do this interactively that is basically what you would have to do (I've tried this and it does work)...this is the approach used by the Rust tester that I copied here. In docker-compose.yml this is overridden such that the build happens elsewhere.

)

_INTEGRATION_DLL = os.path.join(
_NANOARROW_PATH, "libnanoarrow_c_data_integration" + cdata.dll_suffix
)


class NanoarrowTester(Tester):
PRODUCER = False
CONSUMER = False
FLIGHT_SERVER = False
FLIGHT_CLIENT = False
C_DATA_SCHEMA_EXPORTER = True
C_DATA_ARRAY_EXPORTER = True
C_DATA_SCHEMA_IMPORTER = True
C_DATA_ARRAY_IMPORTER = True

name = "nanoarrow"

def validate(self, json_path, arrow_path, quirks=None):
raise NotImplementedError()

def json_to_file(self, json_path, arrow_path):
raise NotImplementedError()

def stream_to_file(self, stream_path, file_path):
raise NotImplementedError()

def file_to_stream(self, file_path, stream_path):
raise NotImplementedError()

def make_c_data_exporter(self):
return NanoarrowCDataExporter(self.debug, self.args)

def make_c_data_importer(self):
return NanoarrowCDataImporter(self.debug, self.args)


_nanoarrow_c_data_entrypoints = """
const char* nanoarrow_CDataIntegration_ExportSchemaFromJson(
const char* json_path, struct ArrowSchema* out);

const char* nanoarrow_CDataIntegration_ImportSchemaAndCompareToJson(
const char* json_path, struct ArrowSchema* schema);

const char* nanoarrow_CDataIntegration_ExportBatchFromJson(
const char* json_path, int num_batch, struct ArrowArray* out);

const char* nanoarrow_CDataIntegration_ImportBatchAndCompareToJson(
const char* json_path, int num_batch, struct ArrowArray* batch);

int64_t nanoarrow_BytesAllocated(void);
"""


@functools.lru_cache
def _load_ffi(ffi, lib_path=_INTEGRATION_DLL):
ffi.cdef(_nanoarrow_c_data_entrypoints)
dll = ffi.dlopen(lib_path)
return dll


class _CDataBase:
def __init__(self, debug, args):
self.debug = debug
self.args = args
self.ffi = cdata.ffi()
self.dll = _load_ffi(self.ffi)

def _check_nanoarrow_error(self, na_error):
"""
Check a `const char*` error return from an integration entrypoint.

A null means success, a non-empty string is an error message.
The string is statically allocated on the nanoarrow side and does not
need to be released.
"""
assert self.ffi.typeof(na_error) is self.ffi.typeof("const char*")
if na_error != self.ffi.NULL:
error = self.ffi.string(na_error).decode("utf8", errors="replace")
raise RuntimeError(f"nanoarrow C Data Integration call failed: {error}")


class NanoarrowCDataExporter(CDataExporter, _CDataBase):
def export_schema_from_json(self, json_path, c_schema_ptr):
na_error = self.dll.nanoarrow_CDataIntegration_ExportSchemaFromJson(
str(json_path).encode(), c_schema_ptr
)
self._check_nanoarrow_error(na_error)

def export_batch_from_json(self, json_path, num_batch, c_array_ptr):
na_error = self.dll.nanoarrow_CDataIntegration_ExportBatchFromJson(
str(json_path).encode(), num_batch, c_array_ptr
)
self._check_nanoarrow_error(na_error)

@property
def supports_releasing_memory(self):
return True

def record_allocation_state(self):
return self.dll.nanoarrow_BytesAllocated()


class NanoarrowCDataImporter(CDataImporter, _CDataBase):
def import_schema_and_compare_to_json(self, json_path, c_schema_ptr):
na_error = self.dll.nanoarrow_CDataIntegration_ImportSchemaAndCompareToJson(
str(json_path).encode(), c_schema_ptr
)
self._check_nanoarrow_error(na_error)

def import_batch_and_compare_to_json(self, json_path, num_batch, c_array_ptr):
na_error = self.dll.nanoarrow_CDataIntegration_ImportBatchAndCompareToJson(
str(json_path).encode(), num_batch, c_array_ptr
)
self._check_nanoarrow_error(na_error)

@property
def supports_releasing_memory(self):
return True
2 changes: 2 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -1750,9 +1750,11 @@ services:
environment:
<<: [*common, *ccache]
ARCHERY_INTEGRATION_WITH_RUST: 0
ARCHERY_INTEGRATION_WITH_NANOARROW: 1
# Tell Archery where Arrow binaries are located
ARROW_CPP_EXE_PATH: /build/cpp/debug
ARROW_RUST_EXE_PATH: /build/rust/debug
ARROW_NANOARROW_PATH: /build/nanoarrow
command:
["/arrow/ci/scripts/integration_arrow_build.sh /arrow /build &&
/arrow/ci/scripts/integration_arrow.sh /arrow /build"]
Expand Down
Loading