interop.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2020-2025, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <cudf/column/column.hpp>
20 #include <cudf/detail/transform.hpp>
21 #include <cudf/table/table.hpp>
23 #include <cudf/types.hpp>
24 #include <cudf/utilities/export.hpp>
26 #include <cudf/utilities/span.hpp>
27 
28 #include <rmm/resource_ref.hpp>
29 
30 #include <utility>
31 
32 struct DLManagedTensor;
33 
34 struct ArrowDeviceArray;
35 
36 struct ArrowSchema;
37 
38 struct ArrowArray;
39 
40 struct ArrowArrayStream;
41 
43 // These are types from arrow that we are forward declaring for our API to
44 // avoid needing to include nanoarrow headers.
45 typedef int32_t ArrowDeviceType; // NOLINT
46 
47 #define ARROW_DEVICE_CUDA 2 // NOLINT
49 
50 namespace CUDF_EXPORT cudf {
75 std::unique_ptr<table> from_dlpack(
76  DLManagedTensor const* managed_tensor,
79 
99 DLManagedTensor* to_dlpack(
100  table_view const& input,
103  // end of group
105 
119  std::string name;
120  std::vector<column_metadata> children_meta;
121 
127  column_metadata(std::string _name) : name(std::move(_name)) {}
128  column_metadata() = default;
129 };
130 
135 using unique_schema_t = std::unique_ptr<ArrowSchema, void (*)(ArrowSchema*)>;
136 
141 using unique_device_array_t = std::unique_ptr<ArrowDeviceArray, void (*)(ArrowDeviceArray*)>;
142 
147 using owned_columns_t = std::vector<std::unique_ptr<cudf::column>>;
148 
157 template <typename ViewType>
164  explicit custom_view_deleter(owned_columns_t&& owned) : owned_mem_{std::move(owned)} {}
165 
171  void operator()(ViewType* ptr) const { delete ptr; }
172 
174 };
175 
181  std::unique_ptr<cudf::table_view, custom_view_deleter<cudf::table_view>>;
182 
188  std::unique_ptr<cudf::column_view, custom_view_deleter<cudf::column_view>>;
189 
190 namespace interop {
191 
192 struct arrow_array_container;
193 
208 
222 std::vector<cudf::column_metadata> get_table_metadata(cudf::table_view const& input);
223 
233  public:
246  column_metadata const& metadata,
249 
262  arrow_column(ArrowSchema&& schema,
263  ArrowDeviceArray&& input,
266 
279  arrow_column(ArrowSchema&& schema,
280  ArrowArray&& input,
283 
295  ArrowSchema* output,
298 
307  void to_arrow(ArrowDeviceArray* output,
308  ArrowDeviceType device_type = ARROW_DEVICE_CUDA,
311 
319  [[nodiscard]] column_view view() const;
320 
321  private:
322  std::shared_ptr<arrow_array_container>
323  container;
325  owned_columns_t view_columns;
326  column_view cached_view;
327 };
328 
337 class arrow_table {
338  public:
354 
367  arrow_table(ArrowSchema&& schema,
368  ArrowDeviceArray&& input,
371 
384  arrow_table(ArrowSchema&& schema,
385  ArrowArray&& input,
388 
400  arrow_table(ArrowArrayStream&& input,
403 
415  ArrowSchema* output,
418 
427  void to_arrow(ArrowDeviceArray* output,
428  ArrowDeviceType device_type = ARROW_DEVICE_CUDA,
431 
439  [[nodiscard]] table_view view() const;
440 
441  private:
442  std::shared_ptr<arrow_array_container>
443  container;
445  owned_columns_t view_columns;
446  table_view cached_view;
447 };
448 
449 } // namespace interop
450 
468 
494  cudf::table&& table,
497 
523  cudf::column&& col,
526 
555  cudf::table_view const& table,
558 
587  cudf::column_view const& col,
590 
612  cudf::table_view const& table,
615 
637  cudf::column_view const& col,
640 
658 std::unique_ptr<cudf::table> from_arrow(
659  ArrowSchema const* schema,
660  ArrowArray const* input,
663 
677 std::unique_ptr<cudf::column> from_arrow_column(
678  ArrowSchema const* schema,
679  ArrowArray const* input,
682 
703 std::unique_ptr<table> from_arrow_host(
704  ArrowSchema const* schema,
705  ArrowDeviceArray const* input,
708 
722 std::unique_ptr<table> from_arrow_stream(
723  ArrowArrayStream* input,
726 
746 std::unique_ptr<column> from_arrow_host_column(
747  ArrowSchema const* schema,
748  ArrowDeviceArray const* input,
751 
790  ArrowSchema const* schema,
791  ArrowDeviceArray const* input,
794 
828  ArrowSchema const* schema,
829  ArrowDeviceArray const* input,
832  // end of group
834 } // namespace CUDF_EXPORT cudf
A non-owning, immutable view of device data as a column of elements, some of which may be null as ind...
A container of nullable device data as a column of elements.
Definition: column.hpp:47
A standard interchange medium for ArrowDeviceArray data in cudf.
Definition: interop.hpp:232
arrow_column(ArrowSchema &&schema, ArrowDeviceArray &&input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Construct a new arrow column object.
arrow_column(ArrowSchema &&schema, ArrowArray &&input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Construct a new arrow column object.
column_view view() const
Get a view of the column data.
void to_arrow(ArrowDeviceArray *output, ArrowDeviceType device_type=ARROW_DEVICE_CUDA, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref()) const
Convert the column to an ArrowDeviceArray.
arrow_column(cudf::column &&input, column_metadata const &metadata, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Construct a new arrow column object.
void to_arrow_schema(ArrowSchema *output, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref()) const
Convert the column to an ArrowSchema.
A standard interchange medium for ArrowDeviceArray data in cudf.
Definition: interop.hpp:337
arrow_table(ArrowSchema &&schema, ArrowDeviceArray &&input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Construct a new arrow table object.
arrow_table(ArrowArrayStream &&input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Construct a new arrow table object.
arrow_table(cudf::table &&input, cudf::host_span< column_metadata const > metadata, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Construct a new arrow table object.
void to_arrow(ArrowDeviceArray *output, ArrowDeviceType device_type=ARROW_DEVICE_CUDA, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref()) const
Convert the table to an ArrowDeviceArray.
arrow_table(ArrowSchema &&schema, ArrowArray &&input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Construct a new arrow table object.
table_view view() const
Get a view of the table data.
void to_arrow_schema(ArrowSchema *output, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref()) const
Convert the table to an ArrowSchema.
A set of cudf::column_view's of the same size.
Definition: table_view.hpp:200
A set of cudf::column's of the same size.
Definition: table.hpp:40
Class definition for cudf::column.
rmm::cuda_stream_view const get_default_stream()
Get the current default stream.
std::unique_ptr< cudf::table_view, custom_view_deleter< cudf::table_view > > unique_table_view_t
typedef for a unique_ptr to a cudf::table_view with custom deleter
Definition: interop.hpp:181
std::vector< std::unique_ptr< cudf::column > > owned_columns_t
typedef for a vector of owning columns, used for conversion from ArrowDeviceArray
Definition: interop.hpp:147
std::unique_ptr< column > from_arrow_host_column(ArrowSchema const *schema, ArrowDeviceArray const *input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Create cudf::column from given ArrowDeviceArray input.
std::unique_ptr< cudf::column > from_arrow_column(ArrowSchema const *schema, ArrowArray const *input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Create cudf::column from a given ArrowArray and ArrowSchema input.
unique_schema_t to_arrow_schema(cudf::table_view const &input, cudf::host_span< column_metadata const > metadata)
Create ArrowSchema from cudf table and metadata.
std::unique_ptr< table > from_arrow_stream(ArrowArrayStream *input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Create cudf::table from given ArrowArrayStream input.
unique_table_view_t from_arrow_device(ArrowSchema const *schema, ArrowDeviceArray const *input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Create cudf::table_view from given ArrowDeviceArray and ArrowSchema
unique_column_view_t from_arrow_device_column(ArrowSchema const *schema, ArrowDeviceArray const *input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Create cudf::column_view from given ArrowDeviceArray and ArrowSchema
unique_device_array_t to_arrow_host(cudf::column_view const &col, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Copy column view data to host and create ArrowDeviceArray for it.
unique_device_array_t to_arrow_device(cudf::column_view const &col, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Create ArrowDeviceArray from a column view.
std::unique_ptr< cudf::column_view, custom_view_deleter< cudf::column_view > > unique_column_view_t
typedef for a unique_ptr to a cudf::column_view with custom deleter
Definition: interop.hpp:188
std::unique_ptr< ArrowSchema, void(*)(ArrowSchema *)> unique_schema_t
typedef for a unique_ptr to an ArrowSchema with custom deleter
Definition: interop.hpp:135
std::unique_ptr< ArrowDeviceArray, void(*)(ArrowDeviceArray *)> unique_device_array_t
typedef for a unique_ptr to an ArrowDeviceArray with a custom deleter
Definition: interop.hpp:141
std::unique_ptr< table > from_arrow_host(ArrowSchema const *schema, ArrowDeviceArray const *input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Create cudf::table from given ArrowDeviceArray input.
std::unique_ptr< cudf::table > from_arrow(ArrowSchema const *schema, ArrowArray const *input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Create cudf::table from given ArrowArray and ArrowSchema input.
std::unique_ptr< table > from_dlpack(DLManagedTensor const *managed_tensor, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Convert a DLPack DLTensor into a cudf table.
DLManagedTensor * to_dlpack(table_view const &input, rmm::cuda_stream_view stream=cudf::get_default_stream(), rmm::device_async_resource_ref mr=cudf::get_current_device_resource_ref())
Convert a cudf table into a DLPack DLTensor.
rmm::device_async_resource_ref get_current_device_resource_ref()
Get the current device memory resource reference.
cuda::mr::async_resource_ref< cuda::mr::device_accessible > device_async_resource_ref
std::vector< cudf::column_metadata > get_table_metadata(cudf::table_view const &input)
Helper function to generate empty table metadata (all columns with no names) for arrow conversion.
cudf::column_metadata get_column_metadata(cudf::column_view const &input)
Helper function to generate empty column metadata (column with no name) for arrow conversion.
cuDF interfaces
Definition: host_udf.hpp:37
APIs for spans.
Detailed metadata information for arrow array.
Definition: interop.hpp:118
column_metadata(std::string _name)
Construct a new column metadata object.
Definition: interop.hpp:127
std::vector< column_metadata > children_meta
Metadata of children of the column.
Definition: interop.hpp:120
std::string name
Name of the column.
Definition: interop.hpp:119
functor for a custom deleter to a unique_ptr of table_view
Definition: interop.hpp:158
void operator()(ViewType *ptr) const
operator to delete the unique_ptr
Definition: interop.hpp:171
owned_columns_t owned_mem_
Owned columns that must be deleted.
Definition: interop.hpp:173
custom_view_deleter(owned_columns_t &&owned)
Construct a new custom view deleter object.
Definition: interop.hpp:164
C++20 std::span with reduced feature set.
Definition: span.hpp:194
Class definition for cudf::table.
Class definitions for (mutable)_table_view
Type declarations for libcudf.