device_buffer.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2019-2025, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #pragma once
17 
18 #include <rmm/cuda_device.hpp>
19 #include <rmm/cuda_stream_view.hpp>
20 #include <rmm/detail/error.hpp>
21 #include <rmm/detail/export.hpp>
23 #include <rmm/resource_ref.hpp>
24 
25 #include <cuda_runtime_api.h>
26 
27 #include <cassert>
28 #include <cstddef>
29 #include <utility>
30 
31 namespace RMM_NAMESPACE {
83  public:
84  // The copy constructor and copy assignment operator without a stream are deleted because they
85  // provide no way to specify an explicit stream
86  device_buffer(device_buffer const& other) = delete;
87  device_buffer& operator=(device_buffer const& other) = delete;
88 
92  // Note: we cannot use `device_buffer() = default;` because nvcc implicitly adds
93  // `__host__ __device__` specifiers to the defaulted constructor when it is called within the
94  // context of both host and device functions.
96 
107  explicit device_buffer(std::size_t size,
108  cuda_stream_view stream,
110  : _stream{stream}, _mr{mr}
111  {
112  cuda_set_device_raii dev{_device};
113  allocate_async(size);
114  }
115 
135  device_buffer(void const* source_data,
136  std::size_t size,
137  cuda_stream_view stream,
139  : _stream{stream}, _mr{mr}
140  {
141  cuda_set_device_raii dev{_device};
142  allocate_async(size);
143  copy_async(source_data, size);
144  }
145 
168  cuda_stream_view stream,
170  : device_buffer{other.data(), other.size(), stream, mr}
171  {
172  }
173 
185  device_buffer(device_buffer&& other) noexcept
186  : _data{other._data},
187  _size{other._size},
188  _capacity{other._capacity},
189  _stream{other.stream()},
190  _mr{other._mr},
191  _device{other._device}
192  {
193  other._data = nullptr;
194  other._size = 0;
195  other._capacity = 0;
196  other.set_stream(cuda_stream_view{});
197  other._device = cuda_device_id{-1};
198  }
199 
215  {
216  if (&other != this) {
217  cuda_set_device_raii dev{_device};
218  deallocate_async();
219 
220  _data = other._data;
221  _size = other._size;
222  _capacity = other._capacity;
223  set_stream(other.stream());
224  _mr = other._mr;
225  _device = other._device;
226 
227  other._data = nullptr;
228  other._size = 0;
229  other._capacity = 0;
230  other.set_stream(cuda_stream_view{});
231  other._device = cuda_device_id{-1};
232  }
233  return *this;
234  }
235 
243  ~device_buffer() noexcept
244  {
245  cuda_set_device_raii dev{_device};
246  deallocate_async();
247  _stream = cuda_stream_view{};
248  }
249 
268  void reserve(std::size_t new_capacity, cuda_stream_view stream)
269  {
270  set_stream(stream);
271  if (new_capacity > capacity()) {
272  cuda_set_device_raii dev{_device};
273  auto tmp = device_buffer{new_capacity, stream, _mr};
274  auto const old_size = size();
275  RMM_CUDA_TRY(cudaMemcpyAsync(tmp.data(), data(), size(), cudaMemcpyDefault, stream.value()));
276  *this = std::move(tmp);
277  _size = old_size;
278  }
279  }
280 
306  void resize(std::size_t new_size, cuda_stream_view stream)
307  {
308  set_stream(stream);
309  // If the requested size is smaller than the current capacity, just update
310  // the size without any allocations
311  if (new_size <= capacity()) {
312  _size = new_size;
313  } else {
314  cuda_set_device_raii dev{_device};
315  auto tmp = device_buffer{new_size, stream, _mr};
316  RMM_CUDA_TRY(cudaMemcpyAsync(tmp.data(), data(), size(), cudaMemcpyDefault, stream.value()));
317  *this = std::move(tmp);
318  }
319  }
320 
335  {
336  set_stream(stream);
337  if (size() != capacity()) {
338  cuda_set_device_raii dev{_device};
339  // Invoke copy ctor on self which only copies `[0, size())` and swap it
340  // with self. The temporary `device_buffer` will hold the old contents
341  // which will then be destroyed
342  auto tmp = device_buffer{*this, stream, _mr};
343  std::swap(tmp, *this);
344  }
345  }
346 
350  [[nodiscard]] void const* data() const noexcept { return _data; }
351 
355  void* data() noexcept { return _data; }
356 
360  [[nodiscard]] std::size_t size() const noexcept { return _size; }
361 
365  [[nodiscard]] std::int64_t ssize() const noexcept
366  {
367  assert(size() < static_cast<std::size_t>(std::numeric_limits<int64_t>::max()) &&
368  "Size overflows signed integer");
369  return static_cast<int64_t>(size());
370  }
371 
378  [[nodiscard]] bool is_empty() const noexcept { return 0 == size(); }
379 
387  [[nodiscard]] std::size_t capacity() const noexcept { return _capacity; }
388 
392  [[nodiscard]] cuda_stream_view stream() const noexcept { return _stream; }
393 
405  void set_stream(cuda_stream_view stream) noexcept { _stream = stream; }
406 
410  [[nodiscard]] rmm::device_async_resource_ref memory_resource() const noexcept { return _mr; }
411 
412  private:
413  void* _data{nullptr};
414  std::size_t _size{};
415  std::size_t _capacity{};
416  cuda_stream_view _stream{};
417 
421  cuda_device_id _device{get_current_cuda_device()};
422 
432  void allocate_async(std::size_t bytes)
433  {
434  _size = bytes;
435  _capacity = bytes;
436  _data = (bytes > 0) ? _mr.allocate_async(bytes, stream()) : nullptr;
437  }
438 
448  void deallocate_async() noexcept
449  {
450  if (capacity() > 0) { _mr.deallocate_async(data(), capacity(), stream()); }
451  _size = 0;
452  _capacity = 0;
453  _data = nullptr;
454  }
455 
468  void copy_async(void const* source, std::size_t bytes)
469  {
470  if (bytes > 0) {
471  RMM_EXPECTS(nullptr != source, "Invalid copy from nullptr.");
472  RMM_EXPECTS(nullptr != _data, "Invalid copy to nullptr.");
473 
474  RMM_CUDA_TRY(cudaMemcpyAsync(_data, source, bytes, cudaMemcpyDefault, stream().value()));
475  }
476  }
477 };
478  // end of group
480 } // namespace RMM_NAMESPACE
Strongly-typed non-owning wrapper for CUDA streams with default constructor.
Definition: cuda_stream_view.hpp:39
constexpr cudaStream_t value() const noexcept
Get the wrapped stream.
Definition: cuda_stream_view.hpp:73
RAII construct for device memory allocation.
Definition: device_buffer.hpp:82
cuda_stream_view stream() const noexcept
The stream most recently specified for allocation/deallocation.
Definition: device_buffer.hpp:392
void resize(std::size_t new_size, cuda_stream_view stream)
Resize the device memory allocation.
Definition: device_buffer.hpp:306
void * data() noexcept
Pointer to the device memory allocation.
Definition: device_buffer.hpp:355
~device_buffer() noexcept
Destroy the device buffer object.
Definition: device_buffer.hpp:243
device_buffer & operator=(device_buffer &&other) noexcept
Move assignment operator moves the contents from other.
Definition: device_buffer.hpp:214
device_buffer()
Default constructor creates an empty device_buffer
Definition: device_buffer.hpp:95
std::size_t capacity() const noexcept
Returns actual size in bytes of device memory allocation.
Definition: device_buffer.hpp:387
void const * data() const noexcept
Const pointer to the device memory allocation.
Definition: device_buffer.hpp:350
void reserve(std::size_t new_capacity, cuda_stream_view stream)
Increase the capacity of the device memory allocation.
Definition: device_buffer.hpp:268
device_buffer(std::size_t size, cuda_stream_view stream, device_async_resource_ref mr=mr::get_current_device_resource_ref())
Constructs a new device buffer of size uninitialized bytes.
Definition: device_buffer.hpp:107
void set_stream(cuda_stream_view stream) noexcept
Sets the stream to be used for deallocation.
Definition: device_buffer.hpp:405
std::size_t size() const noexcept
The number of bytes.
Definition: device_buffer.hpp:360
device_buffer(void const *source_data, std::size_t size, cuda_stream_view stream, device_async_resource_ref mr=mr::get_current_device_resource_ref())
Construct a new device buffer by copying from a raw pointer to an existing host or device memory allo...
Definition: device_buffer.hpp:135
device_buffer(device_buffer &&other) noexcept
Constructs a new device_buffer by moving the contents of another device_buffer into the newly constru...
Definition: device_buffer.hpp:185
device_buffer(device_buffer const &other, cuda_stream_view stream, device_async_resource_ref mr=mr::get_current_device_resource_ref())
Construct a new device_buffer by deep copying the contents of another device_buffer,...
Definition: device_buffer.hpp:167
void shrink_to_fit(cuda_stream_view stream)
Forces the deallocation of unused memory.
Definition: device_buffer.hpp:334
std::int64_t ssize() const noexcept
The signed number of bytes.
Definition: device_buffer.hpp:365
bool is_empty() const noexcept
Whether or not the buffer currently holds any data.
Definition: device_buffer.hpp:378
rmm::device_async_resource_ref memory_resource() const noexcept
The resource used to allocate and deallocate.
Definition: device_buffer.hpp:410
cuda_device_id get_current_cuda_device()
Returns a cuda_device_id for the current device.
Definition: cuda_device.hpp:99
cuda::mr::async_resource_ref< cuda::mr::device_accessible > device_async_resource_ref
Alias for a cuda::mr::async_resource_ref with the property cuda::mr::device_accessible.
Definition: resource_ref.hpp:40
device_async_resource_ref get_current_device_resource_ref()
Get the device_async_resource_ref for the current device.
Definition: per_device_resource.hpp:411
Management of per-device device_memory_resources.
Strong type for a CUDA device identifier.
Definition: cuda_device.hpp:41
RAII class that sets the current CUDA device to the specified device on construction and restores the...
Definition: cuda_device.hpp:151