Skip to content

Commit 43fd0c8

Browse files
authored
Enable parallelism for root node presolve (NVIDIA#371)
Papilo's problem builder was spending a long amount of time building the constraint matrix. Rewrote that step -> provides ~1.8x speedup on presolve With TBB there is another ~1.2x additional speedup so ~2x. [Parallel Presolve.xlsx](https://github.com/user-attachments/files/22145182/Parallel.Presolve.xlsx) Authors: - Hugo Linsenmaier (https://github.com/hlinsen) Approvers: - Rajesh Gandham (https://github.com/rg20) - Ramakrishnap (https://github.com/rgsl888prabhu) URL: NVIDIA#371
1 parent 6e23f2d commit 43fd0c8

12 files changed

Lines changed: 159 additions & 24 deletions

File tree

‎ci/build_wheel_libcuopt.sh‎

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@ source rapids-init-pip
2121
package_name="libcuopt"
2222
package_dir="python/libcuopt"
2323

24-
# Install Boost
25-
bash ci/utils/install_boost.sh
24+
# Install Boost and TBB
25+
bash ci/utils/install_boost_tbb.sh
2626

2727
export SKBUILD_CMAKE_ARGS="-DCUOPT_BUILD_WHEELS=ON;-DDISABLE_DEPRECATION_WARNING=ON"
2828

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,19 +17,19 @@
1717

1818
set -euo pipefail
1919

20-
# Install Boost
20+
# Install Boost and TBB
2121
if [ -f /etc/os-release ]; then
2222
. /etc/os-release
2323
if [[ "$ID" == "rocky" ]]; then
24-
echo "Detected Rocky Linux. Installing Boost via dnf..."
25-
dnf install -y boost-devel
24+
echo "Detected Rocky Linux. Installing Boost and TBB via dnf..."
25+
dnf install -y boost-devel tbb-devel
2626
if [[ "$(uname -m)" == "x86_64" ]]; then
2727
dnf install -y gcc-toolset-14-libquadmath-devel
2828
fi
2929
elif [[ "$ID" == "ubuntu" ]]; then
30-
echo "Detected Ubuntu. Installing Boost via apt..."
30+
echo "Detected Ubuntu. Installing Boost and TBB via apt..."
3131
apt-get update
32-
apt-get install -y libboost-dev
32+
apt-get install -y libboost-dev libtbb-dev
3333
else
3434
echo "Unknown OS: $ID. Please install Boost development libraries manually."
3535
exit 1

‎conda/environments/all_cuda-129_arch-aarch64.yaml‎

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ dependencies:
7979
- sphinxcontrib-openapi
8080
- sphinxcontrib-websupport
8181
- sysroot_linux-aarch64==2.28
82+
- tbb-devel
8283
- uvicorn==0.34.*
8384
- zlib
8485
- pip:

‎conda/environments/all_cuda-129_arch-x86_64.yaml‎

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ dependencies:
7979
- sphinxcontrib-openapi
8080
- sphinxcontrib-websupport
8181
- sysroot_linux-64==2.28
82+
- tbb-devel
8283
- uvicorn==0.34.*
8384
- zlib
8485
- pip:

‎conda/recipes/libcuopt/recipe.yaml‎

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ cache:
5757
- cuda-version =${{ cuda_version }}
5858
- cmake ${{ cmake_version }}
5959
- ninja
60+
- tbb-devel
6061
- zlib
6162
- bzip2
6263
host:
@@ -72,6 +73,7 @@ cache:
7273
- libcusparse-dev
7374
- cuda-cudart-dev
7475
- boost
76+
- tbb-devel
7577
- zlib
7678
- bzip2
7779

‎cpp/CMakeLists.txt‎

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -177,8 +177,8 @@ FetchContent_Declare(
177177
SYSTEM
178178
)
179179

180+
find_package(TBB REQUIRED)
180181
set(BUILD_TESTING OFF CACHE BOOL "Disable test build for papilo")
181-
set(TBB OFF CACHE BOOL "Disable TBB")
182182
set(PAPILO_NO_BINARIES ON)
183183
option(LUSOL "Disable LUSOL" OFF)
184184

@@ -254,6 +254,7 @@ target_include_directories(cuopt
254254
set(CUOPT_PRIVATE_CUDA_LIBS
255255
CUDA::curand
256256
CUDA::cusolver
257+
TBB::tbb
257258
OpenMP::OpenMP_CXX)
258259

259260
list(PREPEND CUOPT_PRIVATE_CUDA_LIBS CUDA::cublasLt)

‎cpp/cmake/thirdparty/FindTBB.cmake‎

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# SPDX-License-Identifier: Apache-2.0
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
# FindTBB.cmake - Find TBB (Threading Building Blocks) library
17+
#
18+
# This module defines the following variables:
19+
# TBB_FOUND - True if TBB is found
20+
# TBB_INCLUDE_DIRS - TBB include directories
21+
# TBB_LIBRARIES - TBB libraries
22+
# TBB::tbb - Imported target for TBB
23+
24+
# Try pkg-config first
25+
find_package(PkgConfig QUIET)
26+
if(PkgConfig_FOUND)
27+
pkg_check_modules(PC_TBB QUIET tbb)
28+
endif()
29+
30+
find_path(TBB_INCLUDE_DIR
31+
NAMES tbb/tbb.h
32+
PATHS
33+
${PC_TBB_INCLUDE_DIRS}
34+
/usr/include
35+
/usr/local/include
36+
/opt/intel/tbb/include
37+
/opt/intel/oneapi/tbb/latest/include
38+
)
39+
40+
find_library(TBB_LIBRARY
41+
NAMES tbb
42+
PATHS
43+
${PC_TBB_LIBRARY_DIRS}
44+
/usr/lib
45+
/usr/lib64
46+
/usr/local/lib
47+
/usr/local/lib64
48+
/opt/intel/tbb/lib
49+
/opt/intel/oneapi/tbb/latest/lib
50+
)
51+
52+
find_library(TBB_MALLOC_LIBRARY
53+
NAMES tbbmalloc
54+
PATHS
55+
${PC_TBB_LIBRARY_DIRS}
56+
/usr/lib
57+
/usr/lib64
58+
/usr/local/lib
59+
/usr/local/lib64
60+
/opt/intel/tbb/lib
61+
/opt/intel/oneapi/tbb/latest/lib
62+
)
63+
64+
include(FindPackageHandleStandardArgs)
65+
find_package_handle_standard_args(TBB
66+
REQUIRED_VARS TBB_INCLUDE_DIR TBB_LIBRARY
67+
)
68+
69+
if(TBB_FOUND AND NOT TARGET TBB::tbb)
70+
add_library(TBB::tbb UNKNOWN IMPORTED)
71+
set_target_properties(TBB::tbb PROPERTIES
72+
IMPORTED_LOCATION "${TBB_LIBRARY}"
73+
INTERFACE_INCLUDE_DIRECTORIES "${TBB_INCLUDE_DIR}"
74+
)
75+
76+
if(TBB_MALLOC_LIBRARY)
77+
set_target_properties(TBB::tbb PROPERTIES
78+
INTERFACE_LINK_LIBRARIES "${TBB_MALLOC_LIBRARY}"
79+
)
80+
endif()
81+
82+
# Add compile definitions from pkg-config if available
83+
if(PC_TBB_CFLAGS_OTHER)
84+
set_target_properties(TBB::tbb PROPERTIES
85+
INTERFACE_COMPILE_OPTIONS "${PC_TBB_CFLAGS_OTHER}"
86+
)
87+
endif()
88+
endif()
89+
90+
mark_as_advanced(TBB_INCLUDE_DIR TBB_LIBRARY TBB_MALLOC_LIBRARY)

‎cpp/src/mip/presolve/third_party_presolve.cpp‎

Lines changed: 51 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,13 @@
1919
#include <cuopt/logger.hpp>
2020
#include <mip/mip_constants.hpp>
2121
#include <mip/presolve/third_party_presolve.hpp>
22+
#include <utilities/timer.hpp>
2223

24+
#pragma GCC diagnostic push
25+
#pragma GCC diagnostic ignored "-Wstringop-overflow" // ignore boost error for pip wheel build
2326
#include <papilo/core/Presolve.hpp>
2427
#include <papilo/core/ProblemBuilder.hpp>
28+
#pragma GCC diagnostic pop
2529

2630
namespace cuopt::linear_programming::detail {
2731

@@ -124,18 +128,32 @@ papilo::Problem<f_t> build_papilo_problem(const optimization_problem_t<i_t, f_t>
124128
builder.setRowRhsAll(h_constr_ub);
125129
}
126130

131+
std::vector<papilo::RowFlags> h_row_flags(h_constr_lb.size());
132+
std::vector<std::tuple<i_t, i_t, f_t>> h_entries;
127133
// Add constraints row by row
128134
for (size_t i = 0; i < h_constr_lb.size(); ++i) {
129135
// Get row entries
130136
i_t row_start = h_offsets[i];
131137
i_t row_end = h_offsets[i + 1];
132138
i_t num_entries = row_end - row_start;
133-
builder.addRowEntries(
134-
i, num_entries, h_variables.data() + row_start, h_coefficients.data() + row_start);
135-
builder.setRowLhsInf(i, h_constr_lb[i] == -std::numeric_limits<f_t>::infinity());
136-
builder.setRowRhsInf(i, h_constr_ub[i] == std::numeric_limits<f_t>::infinity());
137-
if (h_constr_lb[i] == -std::numeric_limits<f_t>::infinity()) { builder.setRowLhs(i, 0); }
138-
if (h_constr_ub[i] == std::numeric_limits<f_t>::infinity()) { builder.setRowRhs(i, 0); }
139+
for (size_t j = 0; j < num_entries; ++j) {
140+
h_entries.push_back(
141+
std::make_tuple(i, h_variables[row_start + j], h_coefficients[row_start + j]));
142+
}
143+
144+
if (h_constr_lb[i] == -std::numeric_limits<f_t>::infinity()) {
145+
h_row_flags[i].set(papilo::RowFlag::kLhsInf);
146+
} else {
147+
h_row_flags[i].unset(papilo::RowFlag::kLhsInf);
148+
}
149+
if (h_constr_ub[i] == std::numeric_limits<f_t>::infinity()) {
150+
h_row_flags[i].set(papilo::RowFlag::kRhsInf);
151+
} else {
152+
h_row_flags[i].unset(papilo::RowFlag::kRhsInf);
153+
}
154+
155+
if (h_constr_lb[i] == -std::numeric_limits<f_t>::infinity()) { h_constr_lb[i] = 0; }
156+
if (h_constr_ub[i] == std::numeric_limits<f_t>::infinity()) { h_constr_ub[i] = 0; }
139157
}
140158

141159
for (size_t i = 0; i < h_var_lb.size(); ++i) {
@@ -144,7 +162,24 @@ papilo::Problem<f_t> build_papilo_problem(const optimization_problem_t<i_t, f_t>
144162
if (h_var_lb[i] == -std::numeric_limits<f_t>::infinity()) { builder.setColLb(i, 0); }
145163
if (h_var_ub[i] == std::numeric_limits<f_t>::infinity()) { builder.setColUb(i, 0); }
146164
}
147-
return builder.build();
165+
166+
auto problem = builder.build();
167+
168+
if (h_entries.size()) {
169+
auto constexpr const sorted_entries = true;
170+
auto csr_storage = papilo::SparseStorage<f_t>(h_entries, num_rows, num_cols, sorted_entries);
171+
problem.setConstraintMatrix(csr_storage, h_constr_lb, h_constr_ub, h_row_flags);
172+
173+
papilo::ConstraintMatrix<f_t>& matrix = problem.getConstraintMatrix();
174+
for (int i = 0; i < problem.getNRows(); ++i) {
175+
papilo::RowFlags rowFlag = matrix.getRowFlags()[i];
176+
if (!rowFlag.test(papilo::RowFlag::kRhsInf) && !rowFlag.test(papilo::RowFlag::kLhsInf) &&
177+
matrix.getLeftHandSides()[i] == matrix.getRightHandSides()[i])
178+
matrix.getRowFlags()[i].set(papilo::RowFlag::kEquation);
179+
}
180+
}
181+
182+
return problem;
148183
}
149184

150185
template <typename i_t, typename f_t>
@@ -299,14 +334,16 @@ void set_presolve_methods(papilo::Presolve<f_t>& presolver, problem_category_t c
299334
presolver.addPresolveMethod(uptr(new papilo::Substitution<f_t>()));
300335
}
301336

302-
template <typename f_t>
337+
template <typename i_t, typename f_t>
303338
void set_presolve_options(papilo::Presolve<f_t>& presolver,
304339
problem_category_t category,
305340
f_t absolute_tolerance,
306341
f_t relative_tolerance,
307-
double time_limit)
342+
double time_limit,
343+
i_t num_cpu_threads)
308344
{
309-
presolver.getPresolveOptions().tlim = time_limit;
345+
presolver.getPresolveOptions().tlim = time_limit;
346+
presolver.getPresolveOptions().threads = num_cpu_threads; // user setting or 0 (automatic)
310347
}
311348

312349
template <typename i_t, typename f_t>
@@ -315,7 +352,8 @@ std::pair<optimization_problem_t<i_t, f_t>, bool> third_party_presolve_t<i_t, f_
315352
problem_category_t category,
316353
f_t absolute_tolerance,
317354
f_t relative_tolerance,
318-
double time_limit)
355+
double time_limit,
356+
i_t num_cpu_threads)
319357
{
320358
cuopt_expects(
321359
presolve_calls_ == 0, error_type_t::ValidationError, "Presolve can only be called once");
@@ -330,8 +368,8 @@ std::pair<optimization_problem_t<i_t, f_t>, bool> third_party_presolve_t<i_t, f_
330368

331369
papilo::Presolve<f_t> presolver;
332370
set_presolve_methods<f_t>(presolver, category);
333-
set_presolve_options<f_t>(
334-
presolver, category, absolute_tolerance, relative_tolerance, time_limit);
371+
set_presolve_options<i_t, f_t>(
372+
presolver, category, absolute_tolerance, relative_tolerance, time_limit, num_cpu_threads);
335373

336374
// Disable papilo logs
337375
presolver.setVerbosityLevel(papilo::VerbosityLevel::kQuiet);

‎cpp/src/mip/presolve/third_party_presolve.hpp‎

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,8 @@ class third_party_presolve_t {
3131
problem_category_t category,
3232
f_t absolute_tolerance,
3333
f_t relative_tolerance,
34-
double time_limit);
34+
double time_limit,
35+
i_t num_cpu_threads = 0);
3536

3637
void undo(rmm::device_uvector<f_t>& primal_solution,
3738
rmm::device_uvector<f_t>& dual_solution,

‎cpp/src/mip/solve.cu‎

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,8 @@ mip_solution_t<i_t, f_t> solve_mip(optimization_problem_t<i_t, f_t>& op_problem,
200200
cuopt::linear_programming::problem_category_t::MIP,
201201
settings.tolerances.absolute_tolerance,
202202
settings.tolerances.relative_tolerance,
203-
presolve_time_limit);
203+
presolve_time_limit,
204+
settings.num_cpu_threads);
204205
if (!feasible) {
205206
return mip_solution_t<i_t, f_t>(mip_termination_status_t::Infeasible,
206207
solver_stats_t<i_t, f_t>{},

0 commit comments

Comments
 (0)