From 6e309dff24a226492bb8705e736a00aff54a6a22 Mon Sep 17 00:00:00 2001 From: Nicolas Blin Date: Wed, 4 Feb 2026 10:39:04 +0100 Subject: [PATCH 01/10] working fp32 support for PDLP no presolve no crossover --- .../pdlp/solver_settings.hpp | 2 +- cpp/src/dual_simplex/sparse_matrix.cpp | 7 + cpp/src/linear_programming/cusparse_view.cu | 4 +- .../initial_scaling.cu | 2 +- .../optimal_batch_size_handler.cu | 2 +- .../optimization_problem.cu | 2 +- cpp/src/linear_programming/pdhg.cu | 2 +- cpp/src/linear_programming/pdlp.cu | 67 +++- .../pdlp_warm_start_data.cu | 2 +- .../localized_duality_gap_container.cu | 2 +- .../restart_strategy/pdlp_restart_strategy.cu | 2 +- .../weighted_average_solution.cu | 2 +- cpp/src/linear_programming/saddle_point.cu | 2 +- cpp/src/linear_programming/solve.cu | 290 ++++++++++-------- cpp/src/linear_programming/solver_settings.cu | 2 +- cpp/src/linear_programming/solver_solution.cu | 2 +- .../adaptive_step_size_strategy.cu | 2 +- .../convergence_information.cu | 2 +- .../infeasibility_information.cu | 2 +- .../termination_strategy.cu | 2 +- cpp/src/linear_programming/translate.hpp | 2 +- .../utilities/problem_checking.cu | 2 +- cpp/src/math_optimization/solution_writer.cu | 27 +- cpp/src/math_optimization/solution_writer.hpp | 5 +- cpp/src/math_optimization/solver_settings.cu | 30 +- cpp/src/mip/diversity/lns/rins.cu | 16 +- .../local_search/rounding/simple_rounding.cu | 2 +- cpp/src/mip/mip_constants.hpp | 2 + cpp/src/mip/problem/problem.cu | 2 +- cpp/src/mip/solution/solution.cu | 2 +- cpp/src/mip/solver_solution.cu | 6 +- cpp/tests/linear_programming/pdlp_test.cu | 115 ++++++- 32 files changed, 414 insertions(+), 197 deletions(-) diff --git a/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp b/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp index 3b94fee14..e7377165c 100644 --- a/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp +++ b/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp @@ -208,7 +208,7 @@ class pdlp_solver_settings_t { bool detect_infeasibility{false}; bool strict_infeasibility{false}; i_t iteration_limit{std::numeric_limits::max()}; - double time_limit{std::numeric_limits::infinity()}; + f_t time_limit{std::numeric_limits::infinity()}; pdlp_solver_mode_t pdlp_solver_mode{pdlp_solver_mode_t::Stable3}; bool log_to_console{true}; std::string log_file{""}; diff --git a/cpp/src/dual_simplex/sparse_matrix.cpp b/cpp/src/dual_simplex/sparse_matrix.cpp index 7edc7b1eb..15379c132 100644 --- a/cpp/src/dual_simplex/sparse_matrix.cpp +++ b/cpp/src/dual_simplex/sparse_matrix.cpp @@ -10,6 +10,7 @@ #include #include +#include // #include // #include @@ -850,6 +851,12 @@ f_t sparse_dot(const std::vector& xind, return dot; } +#if PDLP_INSTANTIATE_FLOAT +// Minimal float instantiation for LP usage +template class csc_matrix_t; +template class csr_matrix_t; +#endif + #ifdef DUAL_SIMPLEX_INSTANTIATE_DOUBLE template class csc_matrix_t; diff --git a/cpp/src/linear_programming/cusparse_view.cu b/cpp/src/linear_programming/cusparse_view.cu index bdd2aa0c8..02332da03 100644 --- a/cpp/src/linear_programming/cusparse_view.cu +++ b/cpp/src/linear_programming/cusparse_view.cu @@ -946,7 +946,7 @@ cusparse_view_t::cusparse_view_t( { } -#if MIP_INSTANTIATE_FLOAT +#if PDLP_INSTANTIATE_FLOAT template class cusparse_sp_mat_descr_wrapper_t; template class cusparse_dn_vec_descr_wrapper_t; template class cusparse_dn_mat_descr_wrapper_t; @@ -960,7 +960,7 @@ template class cusparse_view_t; #endif #if CUDA_VER_12_4_UP -#if MIP_INSTANTIATE_FLOAT +#if PDLP_INSTANTIATE_FLOAT template void my_cusparsespmm_preprocess(cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, diff --git a/cpp/src/linear_programming/initial_scaling_strategy/initial_scaling.cu b/cpp/src/linear_programming/initial_scaling_strategy/initial_scaling.cu index 72474fe90..451694ffa 100644 --- a/cpp/src/linear_programming/initial_scaling_strategy/initial_scaling.cu +++ b/cpp/src/linear_programming/initial_scaling_strategy/initial_scaling.cu @@ -858,7 +858,7 @@ pdlp_initial_scaling_strategy_t::view() int* A_T_offsets, \ int* A_T_indices); -#if MIP_INSTANTIATE_FLOAT +#if PDLP_INSTANTIATE_FLOAT INSTANTIATE(float) #endif diff --git a/cpp/src/linear_programming/optimal_batch_size_handler/optimal_batch_size_handler.cu b/cpp/src/linear_programming/optimal_batch_size_handler/optimal_batch_size_handler.cu index 0abd6a0b6..d0fa98d83 100644 --- a/cpp/src/linear_programming/optimal_batch_size_handler/optimal_batch_size_handler.cu +++ b/cpp/src/linear_programming/optimal_batch_size_handler/optimal_batch_size_handler.cu @@ -434,7 +434,7 @@ int optimal_batch_size_handler(const optimization_problem_t& op_proble return 0; } -#if MIP_INSTANTIATE_FLOAT +#if PDLP_INSTANTIATE_FLOAT template int optimal_batch_size_handler( const optimization_problem_t& op_problem, int max_batch_size); #endif diff --git a/cpp/src/linear_programming/optimization_problem.cu b/cpp/src/linear_programming/optimization_problem.cu index ba57141e9..bd802325d 100644 --- a/cpp/src/linear_programming/optimization_problem.cu +++ b/cpp/src/linear_programming/optimization_problem.cu @@ -1062,7 +1062,7 @@ bool optimization_problem_t::has_quadratic_objective() const return !Q_values_.empty(); } // NOTE: Explicitly instantiate all types here in order to avoid linker error -#if MIP_INSTANTIATE_FLOAT +#if PDLP_INSTANTIATE_FLOAT template class optimization_problem_t; #endif #if MIP_INSTANTIATE_DOUBLE diff --git a/cpp/src/linear_programming/pdhg.cu b/cpp/src/linear_programming/pdhg.cu index 551772cf3..f094e37dd 100644 --- a/cpp/src/linear_programming/pdhg.cu +++ b/cpp/src/linear_programming/pdhg.cu @@ -1159,7 +1159,7 @@ rmm::device_uvector& pdhg_solver_t::get_dual_solution() return current_saddle_point_state_.get_dual_solution(); } -#if MIP_INSTANTIATE_FLOAT +#if PDLP_INSTANTIATE_FLOAT template class pdhg_solver_t; #endif #if MIP_INSTANTIATE_DOUBLE diff --git a/cpp/src/linear_programming/pdlp.cu b/cpp/src/linear_programming/pdlp.cu index 8a05f1b2a..f68fa3e33 100644 --- a/cpp/src/linear_programming/pdlp.cu +++ b/cpp/src/linear_programming/pdlp.cu @@ -41,6 +41,59 @@ namespace cuopt::linear_programming::detail { +// Templated wrapper for cuBLAS geam function +// cublasSgeam for float, cublasDgeam for double +template +inline cublasStatus_t cublasGeam(cublasHandle_t handle, + cublasOperation_t transa, + cublasOperation_t transb, + int m, + int n, + const T* alpha, + const T* A, + int lda, + const T* beta, + const T* B, + int ldb, + T* C, + int ldc); + +template <> +inline cublasStatus_t cublasGeam(cublasHandle_t handle, + cublasOperation_t transa, + cublasOperation_t transb, + int m, + int n, + const float* alpha, + const float* A, + int lda, + const float* beta, + const float* B, + int ldb, + float* C, + int ldc) +{ + return cublasSgeam(handle, transa, transb, m, n, alpha, A, lda, beta, B, ldb, C, ldc); +} + +template <> +inline cublasStatus_t cublasGeam(cublasHandle_t handle, + cublasOperation_t transa, + cublasOperation_t transb, + int m, + int n, + const double* alpha, + const double* A, + int lda, + const double* beta, + const double* B, + int ldb, + double* C, + int ldc) +{ + return cublasDgeam(handle, transa, transb, m, n, alpha, A, lda, beta, B, ldb, C, ldc); +} + template static size_t batch_size_handler(const problem_t& op_problem, const pdlp_solver_settings_t& settings) @@ -1869,7 +1922,7 @@ void pdlp_solver_t::transpose_primal_dual_to_row( rmm::device_uvector dual_slack_transposed( is_dual_slack_empty ? 0 : primal_size_h_ * climber_strategies_.size(), stream_view_); - CUBLAS_CHECK(cublasDgeam(handle_ptr_->get_cublas_handle(), + CUBLAS_CHECK(cublasGeam(handle_ptr_->get_cublas_handle(), CUBLAS_OP_T, CUBLAS_OP_N, climber_strategies_.size(), @@ -1884,7 +1937,7 @@ void pdlp_solver_t::transpose_primal_dual_to_row( climber_strategies_.size())); if (!is_dual_slack_empty) { - CUBLAS_CHECK(cublasDgeam(handle_ptr_->get_cublas_handle(), + CUBLAS_CHECK(cublasGeam(handle_ptr_->get_cublas_handle(), CUBLAS_OP_T, CUBLAS_OP_N, climber_strategies_.size(), @@ -1898,7 +1951,7 @@ void pdlp_solver_t::transpose_primal_dual_to_row( dual_slack_transposed.data(), climber_strategies_.size())); } - CUBLAS_CHECK(cublasDgeam(handle_ptr_->get_cublas_handle(), + CUBLAS_CHECK(cublasGeam(handle_ptr_->get_cublas_handle(), CUBLAS_OP_T, CUBLAS_OP_N, climber_strategies_.size(), @@ -1945,7 +1998,7 @@ void pdlp_solver_t::transpose_primal_dual_back_to_col( rmm::device_uvector dual_slack_transposed( is_dual_slack_empty ? 0 : primal_size_h_ * climber_strategies_.size(), stream_view_); - CUBLAS_CHECK(cublasDgeam(handle_ptr_->get_cublas_handle(), + CUBLAS_CHECK(cublasGeam(handle_ptr_->get_cublas_handle(), CUBLAS_OP_T, CUBLAS_OP_N, primal_size_h_, @@ -1960,7 +2013,7 @@ void pdlp_solver_t::transpose_primal_dual_back_to_col( primal_size_h_)); if (!is_dual_slack_empty) { - CUBLAS_CHECK(cublasDgeam(handle_ptr_->get_cublas_handle(), + CUBLAS_CHECK(cublasGeam(handle_ptr_->get_cublas_handle(), CUBLAS_OP_T, CUBLAS_OP_N, primal_size_h_, @@ -1975,7 +2028,7 @@ void pdlp_solver_t::transpose_primal_dual_back_to_col( primal_size_h_)); } - CUBLAS_CHECK(cublasDgeam(handle_ptr_->get_cublas_handle(), + CUBLAS_CHECK(cublasGeam(handle_ptr_->get_cublas_handle(), CUBLAS_OP_T, CUBLAS_OP_N, dual_size_h_, @@ -2858,7 +2911,7 @@ pdlp_solver_t::get_current_termination_strategy() return current_termination_strategy_; } -#if MIP_INSTANTIATE_FLOAT +#if PDLP_INSTANTIATE_FLOAT template class pdlp_solver_t; template __global__ void compute_weights_initial_primal_weight_from_squared_norms( diff --git a/cpp/src/linear_programming/pdlp_warm_start_data.cu b/cpp/src/linear_programming/pdlp_warm_start_data.cu index 3145552fc..219d803ec 100644 --- a/cpp/src/linear_programming/pdlp_warm_start_data.cu +++ b/cpp/src/linear_programming/pdlp_warm_start_data.cu @@ -178,7 +178,7 @@ void pdlp_warm_start_data_t::check_sizes() "All dual vectors should be of same size"); } -#if MIP_INSTANTIATE_FLOAT +#if PDLP_INSTANTIATE_FLOAT template class pdlp_warm_start_data_t; #endif diff --git a/cpp/src/linear_programming/restart_strategy/localized_duality_gap_container.cu b/cpp/src/linear_programming/restart_strategy/localized_duality_gap_container.cu index 0938a3ecc..054901299 100644 --- a/cpp/src/linear_programming/restart_strategy/localized_duality_gap_container.cu +++ b/cpp/src/linear_programming/restart_strategy/localized_duality_gap_container.cu @@ -144,7 +144,7 @@ localized_duality_gap_container_t::view() return v; } -#if MIP_INSTANTIATE_FLOAT +#if PDLP_INSTANTIATE_FLOAT template struct localized_duality_gap_container_t; #endif #if MIP_INSTANTIATE_DOUBLE diff --git a/cpp/src/linear_programming/restart_strategy/pdlp_restart_strategy.cu b/cpp/src/linear_programming/restart_strategy/pdlp_restart_strategy.cu index 615a27658..41e5a3888 100644 --- a/cpp/src/linear_programming/restart_strategy/pdlp_restart_strategy.cu +++ b/cpp/src/linear_programming/restart_strategy/pdlp_restart_strategy.cu @@ -2523,7 +2523,7 @@ bool pdlp_restart_strategy_t::get_last_restart_was_average() const const typename localized_duality_gap_container_t::view_t duality_gap_view, \ F_TYPE* primal_product); -#if MIP_INSTANTIATE_FLOAT +#if PDLP_INSTANTIATE_FLOAT INSTANTIATE(float) #endif diff --git a/cpp/src/linear_programming/restart_strategy/weighted_average_solution.cu b/cpp/src/linear_programming/restart_strategy/weighted_average_solution.cu index d47fbba0c..717183f41 100644 --- a/cpp/src/linear_programming/restart_strategy/weighted_average_solution.cu +++ b/cpp/src/linear_programming/restart_strategy/weighted_average_solution.cu @@ -139,7 +139,7 @@ i_t weighted_average_solution_t::get_iterations_since_last_restart() c return iterations_since_last_restart_; } -#if MIP_INSTANTIATE_FLOAT +#if PDLP_INSTANTIATE_FLOAT template __global__ void add_weight_sums(const float* primal_weight, const float* dual_weight, float* sum_primal_solution_weights, diff --git a/cpp/src/linear_programming/saddle_point.cu b/cpp/src/linear_programming/saddle_point.cu index 727bcbf0b..b07d5486d 100644 --- a/cpp/src/linear_programming/saddle_point.cu +++ b/cpp/src/linear_programming/saddle_point.cu @@ -166,7 +166,7 @@ rmm::device_uvector& saddle_point_state_t::get_next_AtY() return next_AtY_; } -#if MIP_INSTANTIATE_FLOAT +#if PDLP_INSTANTIATE_FLOAT template class saddle_point_state_t; #endif diff --git a/cpp/src/linear_programming/solve.cu b/cpp/src/linear_programming/solve.cu index db15eed82..4943cef74 100644 --- a/cpp/src/linear_programming/solve.cu +++ b/cpp/src/linear_programming/solve.cu @@ -568,75 +568,81 @@ optimization_problem_solution_t run_pdlp(detail::problem_t& sol.get_solve_time()); } - const bool do_crossover = settings.crossover; - i_t crossover_info = 0; - if (do_crossover && sol.get_termination_status() == pdlp_termination_status_t::Optimal) { - crossover_info = -1; - - dual_simplex::lp_problem_t lp(problem.handle_ptr, 1, 1, 1); - dual_simplex::lp_solution_t initial_solution(1, 1); - translate_to_crossover_problem(problem, sol, lp, initial_solution); - dual_simplex::simplex_solver_settings_t dual_simplex_settings; - dual_simplex_settings.time_limit = timer.remaining_time(); - dual_simplex_settings.iteration_limit = settings.iteration_limit; - dual_simplex_settings.concurrent_halt = settings.concurrent_halt; - dual_simplex::lp_solution_t vertex_solution(lp.num_rows, lp.num_cols); - std::vector vstatus(lp.num_cols); - dual_simplex::crossover_status_t crossover_status = dual_simplex::crossover( - lp, dual_simplex_settings, initial_solution, start_time, vertex_solution, vstatus); - pdlp_termination_status_t termination_status = pdlp_termination_status_t::TimeLimit; - auto to_termination_status = [](dual_simplex::crossover_status_t status) { - switch (status) { - case dual_simplex::crossover_status_t::OPTIMAL: return pdlp_termination_status_t::Optimal; - case dual_simplex::crossover_status_t::PRIMAL_FEASIBLE: - return pdlp_termination_status_t::PrimalFeasible; - case dual_simplex::crossover_status_t::DUAL_FEASIBLE: - return pdlp_termination_status_t::NumericalError; - case dual_simplex::crossover_status_t::NUMERICAL_ISSUES: - return pdlp_termination_status_t::NumericalError; - case dual_simplex::crossover_status_t::CONCURRENT_LIMIT: - return pdlp_termination_status_t::ConcurrentLimit; - case dual_simplex::crossover_status_t::TIME_LIMIT: - return pdlp_termination_status_t::TimeLimit; - default: return pdlp_termination_status_t::NumericalError; - } - }; - termination_status = to_termination_status(crossover_status); - if (crossover_status == dual_simplex::crossover_status_t::OPTIMAL) { crossover_info = 0; } - rmm::device_uvector final_primal_solution = - cuopt::device_copy(vertex_solution.x, problem.handle_ptr->get_stream()); - rmm::device_uvector final_dual_solution = - cuopt::device_copy(vertex_solution.y, problem.handle_ptr->get_stream()); - rmm::device_uvector final_reduced_cost = - cuopt::device_copy(vertex_solution.z, problem.handle_ptr->get_stream()); - - // Should be filled with more information from dual simplex - std::vector< - typename optimization_problem_solution_t::additional_termination_information_t> - info(1); - info[0].primal_objective = vertex_solution.user_objective; - info[0].number_of_steps_taken = vertex_solution.iterations; - auto crossover_end = std::chrono::high_resolution_clock::now(); - auto crossover_duration = - std::chrono::duration_cast(crossover_end - start_solver); - info[0].solve_time = crossover_duration.count() / 1000.0; - auto sol_crossover = optimization_problem_solution_t(final_primal_solution, - final_dual_solution, - final_reduced_cost, - problem.objective_name, - problem.var_names, - problem.row_names, - std::move(info), - {termination_status}); - sol.copy_from(problem.handle_ptr, sol_crossover); - CUOPT_LOG_CONDITIONAL_INFO( - !settings.inside_mip, "Crossover status %s", sol.get_termination_status_string().c_str()); - } - if (settings.method == method_t::Concurrent && settings.concurrent_halt != nullptr && - crossover_info == 0 && sol.get_termination_status() == pdlp_termination_status_t::Optimal) { - // We finished. Tell dual simplex to stop if it is still running. - CUOPT_LOG_CONDITIONAL_INFO(!settings.inside_mip, "PDLP finished. Telling others to stop"); - *settings.concurrent_halt = 1; + if constexpr (!std::is_same_v) { + cuopt_expects(!settings.crossover, + error_type_t::ValidationError, + "PDLP with crossover is not supported for float precision. Set crossover=false or use double precision."); + } else { + const bool do_crossover = settings.crossover; + i_t crossover_info = 0; + if (do_crossover && sol.get_termination_status() == pdlp_termination_status_t::Optimal) { + crossover_info = -1; + + dual_simplex::lp_problem_t lp(problem.handle_ptr, 1, 1, 1); + dual_simplex::lp_solution_t initial_solution(1, 1); + translate_to_crossover_problem(problem, sol, lp, initial_solution); + dual_simplex::simplex_solver_settings_t dual_simplex_settings; + dual_simplex_settings.time_limit = timer.remaining_time(); + dual_simplex_settings.iteration_limit = settings.iteration_limit; + dual_simplex_settings.concurrent_halt = settings.concurrent_halt; + dual_simplex::lp_solution_t vertex_solution(lp.num_rows, lp.num_cols); + std::vector vstatus(lp.num_cols); + dual_simplex::crossover_status_t crossover_status = dual_simplex::crossover( + lp, dual_simplex_settings, initial_solution, start_time, vertex_solution, vstatus); + pdlp_termination_status_t termination_status = pdlp_termination_status_t::TimeLimit; + auto to_termination_status = [](dual_simplex::crossover_status_t status) { + switch (status) { + case dual_simplex::crossover_status_t::OPTIMAL: return pdlp_termination_status_t::Optimal; + case dual_simplex::crossover_status_t::PRIMAL_FEASIBLE: + return pdlp_termination_status_t::PrimalFeasible; + case dual_simplex::crossover_status_t::DUAL_FEASIBLE: + return pdlp_termination_status_t::NumericalError; + case dual_simplex::crossover_status_t::NUMERICAL_ISSUES: + return pdlp_termination_status_t::NumericalError; + case dual_simplex::crossover_status_t::CONCURRENT_LIMIT: + return pdlp_termination_status_t::ConcurrentLimit; + case dual_simplex::crossover_status_t::TIME_LIMIT: + return pdlp_termination_status_t::TimeLimit; + default: return pdlp_termination_status_t::NumericalError; + } + }; + termination_status = to_termination_status(crossover_status); + if (crossover_status == dual_simplex::crossover_status_t::OPTIMAL) { crossover_info = 0; } + rmm::device_uvector final_primal_solution = + cuopt::device_copy(vertex_solution.x, problem.handle_ptr->get_stream()); + rmm::device_uvector final_dual_solution = + cuopt::device_copy(vertex_solution.y, problem.handle_ptr->get_stream()); + rmm::device_uvector final_reduced_cost = + cuopt::device_copy(vertex_solution.z, problem.handle_ptr->get_stream()); + + // Should be filled with more information from dual simplex + std::vector< + typename optimization_problem_solution_t::additional_termination_information_t> + info(1); + info[0].primal_objective = vertex_solution.user_objective; + info[0].number_of_steps_taken = vertex_solution.iterations; + auto crossover_end = std::chrono::high_resolution_clock::now(); + auto crossover_duration = + std::chrono::duration_cast(crossover_end - start_solver); + info[0].solve_time = crossover_duration.count() / 1000.0; + auto sol_crossover = optimization_problem_solution_t(final_primal_solution, + final_dual_solution, + final_reduced_cost, + problem.objective_name, + problem.var_names, + problem.row_names, + std::move(info), + {termination_status}); + sol.copy_from(problem.handle_ptr, sol_crossover); + CUOPT_LOG_CONDITIONAL_INFO( + !settings.inside_mip, "Crossover status %s", sol.get_termination_status_string().c_str()); + } + if (settings.method == method_t::Concurrent && settings.concurrent_halt != nullptr && + crossover_info == 0 && sol.get_termination_status() == pdlp_termination_status_t::Optimal) { + // We finished. Tell dual simplex to stop if it is still running. + CUOPT_LOG_CONDITIONAL_INFO(!settings.inside_mip, "PDLP finished. Telling others to stop"); + *settings.concurrent_halt = 1; + } } return sol; } @@ -1055,14 +1061,23 @@ optimization_problem_solution_t solve_lp_with_method( const timer_t& timer, bool is_batch_mode) { - if (settings.method == method_t::DualSimplex) { - return run_dual_simplex(problem, settings, timer); - } else if (settings.method == method_t::Barrier) { - return run_barrier(problem, settings, timer); - } else if (settings.method == method_t::Concurrent) { - return run_concurrent(problem, settings, timer, is_batch_mode); + if constexpr (std::is_same_v) { + if (settings.method == method_t::DualSimplex) { + return run_dual_simplex(problem, settings, timer); + } else if (settings.method == method_t::Barrier) { + return run_barrier(problem, settings, timer); + } else if (settings.method == method_t::Concurrent) { + return run_concurrent(problem, settings, timer, is_batch_mode); + } else { + return run_pdlp(problem, settings, timer, is_batch_mode); + } } else { - return run_pdlp(problem, settings, timer, is_batch_mode); + // Float precision only supports PDLP without presolve/crossover + cuopt_expects(settings.method == method_t::PDLP, + error_type_t::ValidationError, + "Float precision only supports PDLP method. DualSimplex, Barrier, and Concurrent " + "require double precision."); + return run_pdlp(problem, settings, timer, is_batch_mode); } } @@ -1131,31 +1146,38 @@ optimization_problem_solution_t solve_lp( std::unique_ptr> presolver; auto run_presolve = settings.presolve; run_presolve = run_presolve && settings.get_pdlp_warm_start_data().total_pdlp_iterations_ == -1; - if (!run_presolve && !settings_const.inside_mip) { - CUOPT_LOG_INFO("Third-party presolve is disabled, skipping"); - } - if (run_presolve) { - detail::sort_csr(op_problem); - // allocate no more than 10% of the time limit to presolve. - // Note that this is not the presolve time, but the time limit for presolve. - // But no less than 1 second, to avoid early timeout triggering known crashes - const double presolve_time_limit = - std::max(1.0, std::min(0.1 * lp_timer.remaining_time(), 60.0)); - presolver = std::make_unique>(); - auto result = presolver->apply(op_problem, - cuopt::linear_programming::problem_category_t::LP, - settings.dual_postsolve, - settings.tolerances.absolute_primal_tolerance, - settings.tolerances.relative_primal_tolerance, - presolve_time_limit); - if (!result.has_value()) { - return optimization_problem_solution_t( - pdlp_termination_status_t::PrimalInfeasible, op_problem.get_handle_ptr()->get_stream()); + if constexpr (!std::is_same_v) { + cuopt_expects(!run_presolve, + error_type_t::ValidationError, + "Only double precision is supported with third-party presolve (papilo). Set presolve=false or use double precision."); + } else { + if (!run_presolve && !settings_const.inside_mip) { + CUOPT_LOG_INFO("Third-party presolve is disabled, skipping"); + } + + if (run_presolve) { + detail::sort_csr(op_problem); + // allocate no more than 10% of the time limit to presolve. + // Note that this is not the presolve time, but the time limit for presolve. + // But no less than 1 second, to avoid early timeout triggering known crashes + const double presolve_time_limit = + std::max(1.0, std::min(0.1 * lp_timer.remaining_time(), 60.0)); + presolver = std::make_unique>(); + auto result = presolver->apply(op_problem, + cuopt::linear_programming::problem_category_t::LP, + settings.dual_postsolve, + settings.tolerances.absolute_primal_tolerance, + settings.tolerances.relative_primal_tolerance, + presolve_time_limit); + if (!result.has_value()) { + return optimization_problem_solution_t( + pdlp_termination_status_t::PrimalInfeasible, op_problem.get_handle_ptr()->get_stream()); + } + problem = detail::problem_t(result->reduced_problem); + presolve_time = lp_timer.elapsed_time(); + CUOPT_LOG_INFO("Papilo presolve time: %f", presolve_time); } - problem = detail::problem_t(result->reduced_problem); - presolve_time = lp_timer.elapsed_time(); - CUOPT_LOG_INFO("Papilo presolve time: %f", presolve_time); } if (!settings_const.inside_mip) { @@ -1174,39 +1196,41 @@ optimization_problem_solution_t solve_lp( auto solution = solve_lp_with_method(problem, settings, lp_timer, is_batch_mode); - if (run_presolve) { - auto primal_solution = cuopt::device_copy(solution.get_primal_solution(), - op_problem.get_handle_ptr()->get_stream()); - auto dual_solution = - cuopt::device_copy(solution.get_dual_solution(), op_problem.get_handle_ptr()->get_stream()); - auto reduced_costs = - cuopt::device_copy(solution.get_reduced_cost(), op_problem.get_handle_ptr()->get_stream()); - bool status_to_skip = false; - - presolver->undo(primal_solution, - dual_solution, - reduced_costs, - cuopt::linear_programming::problem_category_t::LP, - status_to_skip, - settings.dual_postsolve, - op_problem.get_handle_ptr()->get_stream()); - - std::vector< - typename optimization_problem_solution_t::additional_termination_information_t> - term_vec = solution.get_additional_termination_informations(); - std::vector status_vec = solution.get_terminations_status(); - - // Create a new solution with the full problem solution - solution = - optimization_problem_solution_t(primal_solution, - dual_solution, - reduced_costs, - std::move(solution.get_pdlp_warm_start_data()), - op_problem.get_objective_name(), - op_problem.get_variable_names(), - op_problem.get_row_names(), - std::move(term_vec), - std::move(status_vec)); + if constexpr (std::is_same_v) { + if (run_presolve) { + auto primal_solution = cuopt::device_copy(solution.get_primal_solution(), + op_problem.get_handle_ptr()->get_stream()); + auto dual_solution = + cuopt::device_copy(solution.get_dual_solution(), op_problem.get_handle_ptr()->get_stream()); + auto reduced_costs = + cuopt::device_copy(solution.get_reduced_cost(), op_problem.get_handle_ptr()->get_stream()); + bool status_to_skip = false; + + presolver->undo(primal_solution, + dual_solution, + reduced_costs, + cuopt::linear_programming::problem_category_t::LP, + status_to_skip, + settings.dual_postsolve, + op_problem.get_handle_ptr()->get_stream()); + + std::vector< + typename optimization_problem_solution_t::additional_termination_information_t> + term_vec = solution.get_additional_termination_informations(); + std::vector status_vec = solution.get_terminations_status(); + + // Create a new solution with the full problem solution + solution = + optimization_problem_solution_t(primal_solution, + dual_solution, + reduced_costs, + std::move(solution.get_pdlp_warm_start_data()), + op_problem.get_objective_name(), + op_problem.get_variable_names(), + op_problem.get_row_names(), + std::move(term_vec), + std::move(status_vec)); + } } if (settings.sol_file != "") { @@ -1353,7 +1377,7 @@ optimization_problem_solution_t solve_lp( const cuopt::mps_parser::mps_data_model_t& data_model); \ template void set_pdlp_solver_mode(pdlp_solver_settings_t& settings); -#if MIP_INSTANTIATE_FLOAT +#if PDLP_INSTANTIATE_FLOAT INSTANTIATE(float) #endif diff --git a/cpp/src/linear_programming/solver_settings.cu b/cpp/src/linear_programming/solver_settings.cu index fc02b6f12..a38ed4fcc 100644 --- a/cpp/src/linear_programming/solver_settings.cu +++ b/cpp/src/linear_programming/solver_settings.cu @@ -368,7 +368,7 @@ pdlp_solver_settings_t::get_pdlp_warm_start_data_view() const noexcept return pdlp_warm_start_data_view_; } -#if MIP_INSTANTIATE_FLOAT +#if PDLP_INSTANTIATE_FLOAT template class pdlp_solver_settings_t; #endif diff --git a/cpp/src/linear_programming/solver_solution.cu b/cpp/src/linear_programming/solver_solution.cu index ff6234024..32fcc03de 100644 --- a/cpp/src/linear_programming/solver_solution.cu +++ b/cpp/src/linear_programming/solver_solution.cu @@ -448,7 +448,7 @@ void optimization_problem_solution_t::write_to_sol_file( std::string(filename), status, objective_value, var_names_, solution); } -#if MIP_INSTANTIATE_FLOAT +#if PDLP_INSTANTIATE_FLOAT template class optimization_problem_solution_t; #endif diff --git a/cpp/src/linear_programming/step_size_strategy/adaptive_step_size_strategy.cu b/cpp/src/linear_programming/step_size_strategy/adaptive_step_size_strategy.cu index 352070ce7..85ae32d18 100644 --- a/cpp/src/linear_programming/step_size_strategy/adaptive_step_size_strategy.cu +++ b/cpp/src/linear_programming/step_size_strategy/adaptive_step_size_strategy.cu @@ -596,7 +596,7 @@ adaptive_step_size_strategy_t::view() F_TYPE * dual_step_size, \ int* pdhg_iteration); -#if MIP_INSTANTIATE_FLOAT +#if PDLP_INSTANTIATE_FLOAT INSTANTIATE(float) #endif diff --git a/cpp/src/linear_programming/termination_strategy/convergence_information.cu b/cpp/src/linear_programming/termination_strategy/convergence_information.cu index 6247cc7ed..a38265818 100644 --- a/cpp/src/linear_programming/termination_strategy/convergence_information.cu +++ b/cpp/src/linear_programming/termination_strategy/convergence_information.cu @@ -986,7 +986,7 @@ convergence_information_t::to_primal_quality_adapter( primal_objective_.element(0, stream_view_)}; } -#if MIP_INSTANTIATE_FLOAT +#if PDLP_INSTANTIATE_FLOAT template class convergence_information_t; template __global__ void compute_remaining_stats_kernel( diff --git a/cpp/src/linear_programming/termination_strategy/infeasibility_information.cu b/cpp/src/linear_programming/termination_strategy/infeasibility_information.cu index 2f7e3b113..f5c37f7f8 100644 --- a/cpp/src/linear_programming/termination_strategy/infeasibility_information.cu +++ b/cpp/src/linear_programming/termination_strategy/infeasibility_information.cu @@ -745,7 +745,7 @@ typename infeasibility_information_t::view_t infeasibility_information return v; } -#if MIP_INSTANTIATE_FLOAT +#if PDLP_INSTANTIATE_FLOAT template class infeasibility_information_t; template __global__ void compute_remaining_stats_kernel( diff --git a/cpp/src/linear_programming/termination_strategy/termination_strategy.cu b/cpp/src/linear_programming/termination_strategy/termination_strategy.cu index 1041bd98a..3eb862dae 100644 --- a/cpp/src/linear_programming/termination_strategy/termination_strategy.cu +++ b/cpp/src/linear_programming/termination_strategy/termination_strategy.cu @@ -681,7 +681,7 @@ void pdlp_termination_strategy_t::print_termination_criteria(i_t itera bool per_constraint_residual, \ int batch_size); -#if MIP_INSTANTIATE_FLOAT +#if PDLP_INSTANTIATE_FLOAT INSTANTIATE(float) #endif diff --git a/cpp/src/linear_programming/translate.hpp b/cpp/src/linear_programming/translate.hpp index 19f6c024c..bf223d42d 100644 --- a/cpp/src/linear_programming/translate.hpp +++ b/cpp/src/linear_programming/translate.hpp @@ -133,7 +133,7 @@ void translate_to_crossover_problem(const detail::problem_t& problem, std::vector slack(problem.n_constraints); std::vector tmp_x = cuopt::host_copy(sol.get_primal_solution(), stream); stream.synchronize(); - dual_simplex::matrix_vector_multiply(lp.A, 1.0, tmp_x, 0.0, slack); + dual_simplex::matrix_vector_multiply(lp.A, f_t(1.0), tmp_x, f_t(0.0), slack); CUOPT_LOG_DEBUG("Multiplied A and x"); lp.A.col_start.resize(problem.n_variables + problem.n_constraints + 1); diff --git a/cpp/src/linear_programming/utilities/problem_checking.cu b/cpp/src/linear_programming/utilities/problem_checking.cu index 879707c66..2a1a77101 100644 --- a/cpp/src/linear_programming/utilities/problem_checking.cu +++ b/cpp/src/linear_programming/utilities/problem_checking.cu @@ -340,7 +340,7 @@ bool problem_checking_t::has_crossing_bounds( #define INSTANTIATE(F_TYPE) template class problem_checking_t; -#if MIP_INSTANTIATE_FLOAT +#if PDLP_INSTANTIATE_FLOAT INSTANTIATE(float) #endif diff --git a/cpp/src/math_optimization/solution_writer.cu b/cpp/src/math_optimization/solution_writer.cu index 01e9c1838..3cac57c59 100644 --- a/cpp/src/math_optimization/solution_writer.cu +++ b/cpp/src/math_optimization/solution_writer.cu @@ -9,15 +9,18 @@ #include #include "solution_writer.hpp" +#include + #include namespace cuopt::linear_programming { +template void solution_writer_t::write_solution_to_sol_file(const std::string& filename, const std::string& status, - const double objective_value, + const f_t objective_value, const std::vector& variable_names, - const std::vector& variable_values) + const std::vector& variable_values) { raft::common::nvtx::range fun_scope("write final solution to .sol file"); std::ofstream file(filename.data()); @@ -27,7 +30,7 @@ void solution_writer_t::write_solution_to_sol_file(const std::string& filename, return; } - file.precision(std::numeric_limits::max_digits10 + 1); + file.precision(std::numeric_limits::max_digits10 + 1); file << "# Status: " << status << std::endl; @@ -39,4 +42,22 @@ void solution_writer_t::write_solution_to_sol_file(const std::string& filename, } } +#if PDLP_INSTANTIATE_FLOAT +template void solution_writer_t::write_solution_to_sol_file( + const std::string& filename, + const std::string& status, + const float objective_value, + const std::vector& variable_names, + const std::vector& variable_values); +#endif + +#if MIP_INSTANTIATE_DOUBLE +template void solution_writer_t::write_solution_to_sol_file( + const std::string& filename, + const std::string& status, + const double objective_value, + const std::vector& variable_names, + const std::vector& variable_values); +#endif + } // namespace cuopt::linear_programming diff --git a/cpp/src/math_optimization/solution_writer.hpp b/cpp/src/math_optimization/solution_writer.hpp index 0890bf260..e187f6431 100644 --- a/cpp/src/math_optimization/solution_writer.hpp +++ b/cpp/src/math_optimization/solution_writer.hpp @@ -23,10 +23,11 @@ namespace cuopt::linear_programming { */ class solution_writer_t { public: + template static void write_solution_to_sol_file(const std::string& sol_file_path, const std::string& status, - const double objective_value, + const f_t objective_value, const std::vector& variable_names, - const std::vector& variable_values); + const std::vector& variable_values); }; } // namespace cuopt::linear_programming diff --git a/cpp/src/math_optimization/solver_settings.cu b/cpp/src/math_optimization/solver_settings.cu index 41c186e19..b21fa64b9 100644 --- a/cpp/src/math_optimization/solver_settings.cu +++ b/cpp/src/math_optimization/solver_settings.cu @@ -58,21 +58,21 @@ solver_settings_t::solver_settings_t() : pdlp_settings(), mip_settings // clang-format off // Float parameters float_parameters = { - {CUOPT_TIME_LIMIT, &mip_settings.time_limit, 0.0, std::numeric_limits::infinity(), std::numeric_limits::infinity()}, - {CUOPT_TIME_LIMIT, &pdlp_settings.time_limit, 0.0, std::numeric_limits::infinity(), std::numeric_limits::infinity()}, - {CUOPT_ABSOLUTE_DUAL_TOLERANCE, &pdlp_settings.tolerances.absolute_dual_tolerance, 0.0, 1e-1, 1e-4}, - {CUOPT_RELATIVE_DUAL_TOLERANCE, &pdlp_settings.tolerances.relative_dual_tolerance, 0.0, 1e-1, 1e-4}, - {CUOPT_ABSOLUTE_PRIMAL_TOLERANCE, &pdlp_settings.tolerances.absolute_primal_tolerance, 0.0, 1e-1, 1e-4}, - {CUOPT_RELATIVE_PRIMAL_TOLERANCE, &pdlp_settings.tolerances.relative_primal_tolerance, 0.0, 1e-1, 1e-4}, - {CUOPT_ABSOLUTE_GAP_TOLERANCE, &pdlp_settings.tolerances.absolute_gap_tolerance, 0.0, 1e-1, 1e-4}, - {CUOPT_RELATIVE_GAP_TOLERANCE, &pdlp_settings.tolerances.relative_gap_tolerance, 0.0, 1e-1, 1e-4}, - {CUOPT_MIP_ABSOLUTE_TOLERANCE, &mip_settings.tolerances.absolute_tolerance, 0.0, 1e-1, 1e-4}, - {CUOPT_MIP_RELATIVE_TOLERANCE, &mip_settings.tolerances.relative_tolerance, 0.0, 1e-1, 1e-4}, - {CUOPT_MIP_INTEGRALITY_TOLERANCE, &mip_settings.tolerances.integrality_tolerance, 0.0, 1e-1, 1e-5}, - {CUOPT_MIP_ABSOLUTE_GAP, &mip_settings.tolerances.absolute_mip_gap, 0.0, CUOPT_INFINITY, 1e-10}, - {CUOPT_MIP_RELATIVE_GAP, &mip_settings.tolerances.relative_mip_gap, 0.0, 1e-1, 1e-4}, - {CUOPT_PRIMAL_INFEASIBLE_TOLERANCE, &pdlp_settings.tolerances.primal_infeasible_tolerance, 0.0, 1e-1, 1e-10}, - {CUOPT_DUAL_INFEASIBLE_TOLERANCE, &pdlp_settings.tolerances.dual_infeasible_tolerance, 0.0, 1e-1, 1e-10} + {CUOPT_TIME_LIMIT, &mip_settings.time_limit, f_t(0.0), std::numeric_limits::infinity(), std::numeric_limits::infinity()}, + {CUOPT_TIME_LIMIT, &pdlp_settings.time_limit, f_t(0.0), std::numeric_limits::infinity(), std::numeric_limits::infinity()}, + {CUOPT_ABSOLUTE_DUAL_TOLERANCE, &pdlp_settings.tolerances.absolute_dual_tolerance, f_t(0.0), f_t(1e-1), f_t(1e-4)}, + {CUOPT_RELATIVE_DUAL_TOLERANCE, &pdlp_settings.tolerances.relative_dual_tolerance, f_t(0.0), f_t(1e-1), f_t(1e-4)}, + {CUOPT_ABSOLUTE_PRIMAL_TOLERANCE, &pdlp_settings.tolerances.absolute_primal_tolerance, f_t(0.0), f_t(1e-1), f_t(1e-4)}, + {CUOPT_RELATIVE_PRIMAL_TOLERANCE, &pdlp_settings.tolerances.relative_primal_tolerance, f_t(0.0), f_t(1e-1), f_t(1e-4)}, + {CUOPT_ABSOLUTE_GAP_TOLERANCE, &pdlp_settings.tolerances.absolute_gap_tolerance, f_t(0.0), f_t(1e-1), f_t(1e-4)}, + {CUOPT_RELATIVE_GAP_TOLERANCE, &pdlp_settings.tolerances.relative_gap_tolerance, f_t(0.0), f_t(1e-1), f_t(1e-4)}, + {CUOPT_MIP_ABSOLUTE_TOLERANCE, &mip_settings.tolerances.absolute_tolerance, f_t(0.0), f_t(1e-1), f_t(1e-4)}, + {CUOPT_MIP_RELATIVE_TOLERANCE, &mip_settings.tolerances.relative_tolerance, f_t(0.0), f_t(1e-1), f_t(1e-4)}, + {CUOPT_MIP_INTEGRALITY_TOLERANCE, &mip_settings.tolerances.integrality_tolerance, f_t(0.0), f_t(1e-1), f_t(1e-5)}, + {CUOPT_MIP_ABSOLUTE_GAP, &mip_settings.tolerances.absolute_mip_gap, f_t(0.0), std::numeric_limits::infinity(), f_t(1e-10)}, + {CUOPT_MIP_RELATIVE_GAP, &mip_settings.tolerances.relative_mip_gap, f_t(0.0), f_t(1e-1), f_t(1e-4)}, + {CUOPT_PRIMAL_INFEASIBLE_TOLERANCE, &pdlp_settings.tolerances.primal_infeasible_tolerance, f_t(0.0), f_t(1e-1), f_t(1e-10)}, + {CUOPT_DUAL_INFEASIBLE_TOLERANCE, &pdlp_settings.tolerances.dual_infeasible_tolerance, f_t(0.0), f_t(1e-1), f_t(1e-10)} }; // Int parameters diff --git a/cpp/src/mip/diversity/lns/rins.cu b/cpp/src/mip/diversity/lns/rins.cu index 7456b59ed..ab5d17b60 100644 --- a/cpp/src/mip/diversity/lns/rins.cu +++ b/cpp/src/mip/diversity/lns/rins.cu @@ -180,7 +180,7 @@ void rins_t::run_rins() total_calls++; node_count_at_last_rins = node_count.load(); - time_limit = std::min(time_limit, dm.timer.remaining_time()); + time_limit = std::min(time_limit, static_cast(dm.timer.remaining_time())); CUOPT_LOG_DEBUG("Running RINS on solution with objective %g, fixing %d/%d", best_sol.get_user_objective(), vars_to_fix.size(), @@ -287,22 +287,22 @@ void rins_t::run_rins() if (branch_and_bound_status == dual_simplex::mip_status_t::OPTIMAL) { CUOPT_LOG_DEBUG("RINS submip optimal"); // do goldilocks update - fixrate = std::max(fixrate - 0.05, settings.min_fixrate); - time_limit = std::max(time_limit - 2, settings.min_time_limit); + fixrate = std::max(fixrate - f_t(0.05), static_cast(settings.min_fixrate)); + time_limit = std::max(time_limit - f_t(2), static_cast(settings.min_time_limit)); } else if (branch_and_bound_status == dual_simplex::mip_status_t::TIME_LIMIT) { CUOPT_LOG_DEBUG("RINS submip time limit"); // do goldilocks update - fixrate = std::min(fixrate + 0.05, settings.max_fixrate); - time_limit = std::min(time_limit + 2, settings.max_time_limit); + fixrate = std::min(fixrate + f_t(0.05), static_cast(settings.max_fixrate)); + time_limit = std::min(time_limit + f_t(2), static_cast(settings.max_time_limit)); } else if (branch_and_bound_status == dual_simplex::mip_status_t::INFEASIBLE) { CUOPT_LOG_DEBUG("RINS submip infeasible"); // do goldilocks update, decreasing fixrate - fixrate = std::max(fixrate - 0.05, settings.min_fixrate); + fixrate = std::max(fixrate - f_t(0.05), static_cast(settings.min_fixrate)); } else { CUOPT_LOG_DEBUG("RINS solution not found"); // do goldilocks update - fixrate = std::min(fixrate + 0.05, settings.max_fixrate); - time_limit = std::min(time_limit + 2, settings.max_time_limit); + fixrate = std::min(fixrate + f_t(0.05), static_cast(settings.max_fixrate)); + time_limit = std::min(time_limit + f_t(2), static_cast(settings.max_time_limit)); } cpu_fj_thread.stop_cpu_solver(); diff --git a/cpp/src/mip/local_search/rounding/simple_rounding.cu b/cpp/src/mip/local_search/rounding/simple_rounding.cu index 48b525dbb..dd67f1a45 100644 --- a/cpp/src/mip/local_search/rounding/simple_rounding.cu +++ b/cpp/src/mip/local_search/rounding/simple_rounding.cu @@ -179,7 +179,7 @@ void invoke_correct_integers(solution_t& solution, f_t tol) template void invoke_correct_integers(solution_t & solution, \ F_TYPE tol); -#if MIP_INSTANTIATE_FLOAT +#if PDLP_INSTANTIATE_FLOAT INSTANTIATE(float) #endif diff --git a/cpp/src/mip/mip_constants.hpp b/cpp/src/mip/mip_constants.hpp index 66f5ebd27..cf04df9b0 100644 --- a/cpp/src/mip/mip_constants.hpp +++ b/cpp/src/mip/mip_constants.hpp @@ -11,3 +11,5 @@ #define MIP_INSTANTIATE_FLOAT CUOPT_INSTANTIATE_FLOAT #define MIP_INSTANTIATE_DOUBLE CUOPT_INSTANTIATE_DOUBLE + +#define PDLP_INSTANTIATE_FLOAT 1 diff --git a/cpp/src/mip/problem/problem.cu b/cpp/src/mip/problem/problem.cu index 8feaee523..74a6144a7 100644 --- a/cpp/src/mip/problem/problem.cu +++ b/cpp/src/mip/problem/problem.cu @@ -2070,7 +2070,7 @@ void problem_t::update_variable_bounds(const std::vector& var_ind RAFT_CHECK_CUDA(handle_ptr->get_stream()); } -#if MIP_INSTANTIATE_FLOAT +#if PDLP_INSTANTIATE_FLOAT template class problem_t; #endif diff --git a/cpp/src/mip/solution/solution.cu b/cpp/src/mip/solution/solution.cu index 9e9a2d75f..399ac6b6a 100644 --- a/cpp/src/mip/solution/solution.cu +++ b/cpp/src/mip/solution/solution.cu @@ -657,7 +657,7 @@ mip_solution_t solution_t::get_solution(bool output_feasible } } -#if MIP_INSTANTIATE_FLOAT +#if PDLP_INSTANTIATE_FLOAT template class solution_t; #endif diff --git a/cpp/src/mip/solver_solution.cu b/cpp/src/mip/solver_solution.cu index 2ce6d5700..c3fde8f3c 100644 --- a/cpp/src/mip/solver_solution.cu +++ b/cpp/src/mip/solver_solution.cu @@ -208,8 +208,8 @@ void mip_solution_t::write_to_sol_file(std::string_view filename, status = "Infeasible"; } - double objective_value = get_objective_value(); - auto& var_names = get_variable_names(); + f_t objective_value = get_objective_value(); + auto& var_names = get_variable_names(); std::vector solution; solution.resize(solution_.size()); raft::copy(solution.data(), solution_.data(), solution_.size(), stream_view.value()); @@ -233,7 +233,7 @@ void mip_solution_t::log_summary() const CUOPT_LOG_INFO("Total Solve Time: %f", get_total_solve_time()); } -#if MIP_INSTANTIATE_FLOAT +#if PDLP_INSTANTIATE_FLOAT template class mip_solution_t; #endif diff --git a/cpp/tests/linear_programming/pdlp_test.cu b/cpp/tests/linear_programming/pdlp_test.cu index 994fa89fe..17b9f61d0 100644 --- a/cpp/tests/linear_programming/pdlp_test.cu +++ b/cpp/tests/linear_programming/pdlp_test.cu @@ -20,8 +20,10 @@ #include #include #include + #include #include +#include #include #include @@ -45,10 +47,13 @@ namespace cuopt::linear_programming::test { -constexpr double afiro_primal_objective = -464; - +constexpr double afiro_primal_objective = -464.0; +#if PDLP_INSTANTIATE_FLOAT +constexpr float afiro_primal_objective_f32 = -464.0f; +#endif // Accept a 1% error -static bool is_incorrect_objective(double reference, double objective) +template +static bool is_incorrect_objective(f_t reference, f_t objective) { if (reference == 0) { return std::abs(objective) > 0.01; } if (objective == 0) { return std::abs(reference) > 0.01; } @@ -1867,6 +1872,110 @@ TEST(pdlp_class, some_climber_hit_iteration_limit) } } +#if PDLP_INSTANTIATE_FLOAT +TEST(pdlp_class, run_float32) +{ + const raft::handle_t handle_{}; + + auto path = make_path_absolute("linear_programming/afiro_original.mps"); + cuopt::mps_parser::mps_data_model_t op_problem = + cuopt::mps_parser::parse_mps(path, true); + + auto solver_settings = pdlp_solver_settings_t{}; + solver_settings.method = cuopt::linear_programming::method_t::PDLP; + + optimization_problem_solution_t solution = + solve_lp(&handle_, op_problem, solver_settings); + EXPECT_EQ((int)solution.get_termination_status(), CUOPT_TERIMINATION_STATUS_OPTIMAL); + + EXPECT_FALSE(is_incorrect_objective( + afiro_primal_objective_f32, + solution.get_additional_termination_information().primal_objective)); +} + +TEST(pdlp_class, float32_dual_simplex_throws_validation_error) +{ + const raft::handle_t handle_{}; + + auto path = make_path_absolute("linear_programming/afiro_original.mps"); + cuopt::mps_parser::mps_data_model_t op_problem = + cuopt::mps_parser::parse_mps(path, true); + + auto solver_settings = pdlp_solver_settings_t{}; + solver_settings.method = cuopt::linear_programming::method_t::DualSimplex; + + optimization_problem_solution_t solution = + solve_lp(&handle_, op_problem, solver_settings); + EXPECT_EQ(solution.get_error_status().get_error_type(), cuopt::error_type_t::ValidationError); +} + +TEST(pdlp_class, float32_barrier_throws_validation_error) +{ + const raft::handle_t handle_{}; + + auto path = make_path_absolute("linear_programming/afiro_original.mps"); + cuopt::mps_parser::mps_data_model_t op_problem = + cuopt::mps_parser::parse_mps(path, true); + + auto solver_settings = pdlp_solver_settings_t{}; + solver_settings.method = cuopt::linear_programming::method_t::Barrier; + + optimization_problem_solution_t solution = + solve_lp(&handle_, op_problem, solver_settings); + EXPECT_EQ(solution.get_error_status().get_error_type(), cuopt::error_type_t::ValidationError); +} + +TEST(pdlp_class, float32_concurrent_throws_validation_error) +{ + const raft::handle_t handle_{}; + + auto path = make_path_absolute("linear_programming/afiro_original.mps"); + cuopt::mps_parser::mps_data_model_t op_problem = + cuopt::mps_parser::parse_mps(path, true); + + auto solver_settings = pdlp_solver_settings_t{}; + solver_settings.method = cuopt::linear_programming::method_t::Concurrent; + + optimization_problem_solution_t solution = + solve_lp(&handle_, op_problem, solver_settings); + EXPECT_EQ(solution.get_error_status().get_error_type(), cuopt::error_type_t::ValidationError); +} + +TEST(pdlp_class, float32_presolve_throws_validation_error) +{ + const raft::handle_t handle_{}; + + auto path = make_path_absolute("linear_programming/afiro_original.mps"); + cuopt::mps_parser::mps_data_model_t op_problem = + cuopt::mps_parser::parse_mps(path, true); + + auto solver_settings = pdlp_solver_settings_t{}; + solver_settings.method = cuopt::linear_programming::method_t::PDLP; + solver_settings.presolve = true; + + optimization_problem_solution_t solution = + solve_lp(&handle_, op_problem, solver_settings); + EXPECT_EQ(solution.get_error_status().get_error_type(), cuopt::error_type_t::ValidationError); +} + +TEST(pdlp_class, float32_crossover_throws_validation_error) +{ + const raft::handle_t handle_{}; + + auto path = make_path_absolute("linear_programming/afiro_original.mps"); + cuopt::mps_parser::mps_data_model_t op_problem = + cuopt::mps_parser::parse_mps(path, true); + + auto solver_settings = pdlp_solver_settings_t{}; + solver_settings.method = cuopt::linear_programming::method_t::PDLP; + solver_settings.crossover = true; + + optimization_problem_solution_t solution = + solve_lp(&handle_, op_problem, solver_settings); + EXPECT_EQ(solution.get_error_status().get_error_type(), cuopt::error_type_t::ValidationError); +} +#endif + } // namespace cuopt::linear_programming::test CUOPT_TEST_PROGRAM_MAIN() From b9ac3630a9490285a81952d1af59ab69dbbc626c Mon Sep 17 00:00:00 2001 From: Nicolas Blin Date: Wed, 4 Feb 2026 11:11:30 +0100 Subject: [PATCH 02/10] updade run_pdlp to allow for fp32 --- .../linear_programming/cuopt/run_pdlp.cu | 78 ++++++++++++------- 1 file changed, 48 insertions(+), 30 deletions(-) diff --git a/benchmarks/linear_programming/cuopt/run_pdlp.cu b/benchmarks/linear_programming/cuopt/run_pdlp.cu index 229c72a49..78b4f42e9 100644 --- a/benchmarks/linear_programming/cuopt/run_pdlp.cu +++ b/benchmarks/linear_programming/cuopt/run_pdlp.cu @@ -77,6 +77,11 @@ static void parse_arguments(argparse::ArgumentParser& program) .choices(0, 1); program.add_argument("--solution-path").help("Path where solution file will be generated"); + + program.add_argument("--pdlp-fp32") + .help("Use FP32 (float) precision instead of FP64 (double). Only PDLP method without presolve and crossover is supported.") + .default_value(false) + .implicit_value(true); } static cuopt::linear_programming::pdlp_solver_mode_t string_to_pdlp_solver_mode( @@ -94,15 +99,16 @@ static cuopt::linear_programming::pdlp_solver_mode_t string_to_pdlp_solver_mode( return cuopt::linear_programming::pdlp_solver_mode_t::Stable3; } -static cuopt::linear_programming::pdlp_solver_settings_t create_solver_settings( +template +static cuopt::linear_programming::pdlp_solver_settings_t create_solver_settings( const argparse::ArgumentParser& program) { - cuopt::linear_programming::pdlp_solver_settings_t settings = - cuopt::linear_programming::pdlp_solver_settings_t{}; + cuopt::linear_programming::pdlp_solver_settings_t settings = + cuopt::linear_programming::pdlp_solver_settings_t{}; - settings.time_limit = program.get("--time-limit"); + settings.time_limit = static_cast(program.get("--time-limit")); settings.iteration_limit = program.get("--iteration-limit"); - settings.set_optimality_tolerance(program.get("--optimality-tolerance")); + settings.set_optimality_tolerance(static_cast(program.get("--optimality-tolerance"))); settings.pdlp_solver_mode = string_to_pdlp_solver_mode(program.get("--pdlp-solver-mode")); settings.method = static_cast(program.get("--method")); @@ -112,23 +118,12 @@ static cuopt::linear_programming::pdlp_solver_settings_t create_sol return settings; } -int main(int argc, char* argv[]) +template +static int run_solver(const argparse::ArgumentParser& program, const raft::handle_t& handle_) { - // Parse binary arguments - argparse::ArgumentParser program("solve_LP"); - parse_arguments(program); - - try { - program.parse_args(argc, argv); - } catch (const std::runtime_error& err) { - std::cerr << err.what() << std::endl; - std::cerr << program; - return 1; - } - // Initialize solver settings from binary arguments - cuopt::linear_programming::pdlp_solver_settings_t settings = - create_solver_settings(program); + cuopt::linear_programming::pdlp_solver_settings_t settings = + create_solver_settings(program); bool use_pdlp_solver_mode = true; if (program.is_used("--pdlp-hyper-params-path")) { @@ -137,20 +132,13 @@ int main(int argc, char* argv[]) use_pdlp_solver_mode = false; } - // Setup up RMM memory pool - auto memory_resource = make_pool(); - rmm::mr::set_current_device_resource(memory_resource.get()); - - // Initialize raft handle and running stream - const raft::handle_t handle_{}; - // Parse MPS file - cuopt::mps_parser::mps_data_model_t op_problem = - cuopt::mps_parser::parse_mps(program.get("--path")); + cuopt::mps_parser::mps_data_model_t op_problem = + cuopt::mps_parser::parse_mps(program.get("--path")); // Solve LP problem bool problem_checking = true; - cuopt::linear_programming::optimization_problem_solution_t solution = + cuopt::linear_programming::optimization_problem_solution_t solution = cuopt::linear_programming::solve_lp( &handle_, op_problem, settings, problem_checking, use_pdlp_solver_mode); @@ -160,3 +148,33 @@ int main(int argc, char* argv[]) return 0; } + +int main(int argc, char* argv[]) +{ + // Parse binary arguments + argparse::ArgumentParser program("solve_LP"); + parse_arguments(program); + + try { + program.parse_args(argc, argv); + } catch (const std::runtime_error& err) { + std::cerr << err.what() << std::endl; + std::cerr << program; + return 1; + } + + // Setup up RMM memory pool + auto memory_resource = make_pool(); + rmm::mr::set_current_device_resource(memory_resource.get()); + + // Initialize raft handle and running stream + const raft::handle_t handle_{}; + + // Run solver with appropriate precision + bool use_fp32 = program.get("--pdlp-fp32"); + if (use_fp32) { + return run_solver(program, handle_); + } else { + return run_solver(program, handle_); + } +} From c4e778efba8b54a15c5f6bbc2bcdb322077e9e60 Mon Sep 17 00:00:00 2001 From: Nicolas Blin Date: Wed, 11 Feb 2026 12:02:55 +0100 Subject: [PATCH 03/10] support fp32 with presolve --- .../linear_programming/cuopt/run_pdlp.cu | 20 +- cpp/src/linear_programming/solve.cu | 67 +++-- cpp/src/mip/presolve/gf2_presolve.cpp | 2 +- cpp/src/mip/presolve/third_party_presolve.cpp | 256 +++++++++++++----- cpp/src/mip/problem/presolve_data.cu | 2 +- cpp/tests/linear_programming/pdlp_test.cu | 33 ++- 6 files changed, 264 insertions(+), 116 deletions(-) diff --git a/benchmarks/linear_programming/cuopt/run_pdlp.cu b/benchmarks/linear_programming/cuopt/run_pdlp.cu index 78b4f42e9..f8b08c7e3 100644 --- a/benchmarks/linear_programming/cuopt/run_pdlp.cu +++ b/benchmarks/linear_programming/cuopt/run_pdlp.cu @@ -70,11 +70,10 @@ static void parse_arguments(argparse::ArgumentParser& program) "Path to PDLP hyper-params file to configure PDLP solver. Has priority over PDLP solver " "modes."); - program.add_argument("--presolve") - .help("enable/disable presolve (default: true for MIP problems, false for LP problems)") - .default_value(0) - .scan<'i', int>() - .choices(0, 1); + program.add_argument("--presolver") + .help("Presolver to use. Possible values: None, Papilo, PSLP, Default") + .default_value("Default") + .choices("None", "Papilo", "PSLP", "Default"); program.add_argument("--solution-path").help("Path where solution file will be generated"); @@ -99,6 +98,15 @@ static cuopt::linear_programming::pdlp_solver_mode_t string_to_pdlp_solver_mode( return cuopt::linear_programming::pdlp_solver_mode_t::Stable3; } +static cuopt::linear_programming::presolver_t string_to_presolver(const std::string& presolver) +{ + if (presolver == "None") return cuopt::linear_programming::presolver_t::None; + if (presolver == "Papilo") return cuopt::linear_programming::presolver_t::Papilo; + if (presolver == "PSLP") return cuopt::linear_programming::presolver_t::PSLP; + if (presolver == "Default") return cuopt::linear_programming::presolver_t::Default; + return cuopt::linear_programming::presolver_t::Default; +} + template static cuopt::linear_programming::pdlp_solver_settings_t create_solver_settings( const argparse::ArgumentParser& program) @@ -113,7 +121,7 @@ static cuopt::linear_programming::pdlp_solver_settings_t create_solver string_to_pdlp_solver_mode(program.get("--pdlp-solver-mode")); settings.method = static_cast(program.get("--method")); settings.crossover = program.get("--crossover"); - settings.presolve = program.get("--presolve"); + settings.presolver = string_to_presolver(program.get("--presolver")); return settings; } diff --git a/cpp/src/linear_programming/solve.cu b/cpp/src/linear_programming/solve.cu index d8b9fbe61..f141091bf 100644 --- a/cpp/src/linear_programming/solve.cu +++ b/cpp/src/linear_programming/solve.cu @@ -1126,6 +1126,7 @@ optimization_problem_solution_t solve_lp_with_method( } } else { // Float precision only supports PDLP without presolve/crossover + // TODO when running with cuopt_cli this doesn't show, should we just use CUOPT_LOG_INFO instead? cuopt_expects(settings.method == method_t::PDLP, error_type_t::ValidationError, "Float precision only supports PDLP method. DualSimplex, Barrier, and Concurrent " @@ -1300,41 +1301,39 @@ optimization_problem_solution_t solve_lp( auto solution = solve_lp_with_method(problem, settings, lp_timer, is_batch_mode); - if constexpr (std::is_same_v) { - if (run_presolve) { - auto primal_solution = cuopt::device_copy(solution.get_primal_solution(), - op_problem.get_handle_ptr()->get_stream()); - auto dual_solution = - cuopt::device_copy(solution.get_dual_solution(), op_problem.get_handle_ptr()->get_stream()); - auto reduced_costs = - cuopt::device_copy(solution.get_reduced_cost(), op_problem.get_handle_ptr()->get_stream()); - bool status_to_skip = false; - - presolver->undo(primal_solution, - dual_solution, - reduced_costs, - cuopt::linear_programming::problem_category_t::LP, - status_to_skip, - settings.dual_postsolve, - op_problem.get_handle_ptr()->get_stream()); + if (run_presolve) { + auto primal_solution = cuopt::device_copy(solution.get_primal_solution(), + op_problem.get_handle_ptr()->get_stream()); + auto dual_solution = + cuopt::device_copy(solution.get_dual_solution(), op_problem.get_handle_ptr()->get_stream()); + auto reduced_costs = + cuopt::device_copy(solution.get_reduced_cost(), op_problem.get_handle_ptr()->get_stream()); + bool status_to_skip = false; + + presolver->undo(primal_solution, + dual_solution, + reduced_costs, + cuopt::linear_programming::problem_category_t::LP, + status_to_skip, + settings.dual_postsolve, + op_problem.get_handle_ptr()->get_stream()); - std::vector< - typename optimization_problem_solution_t::additional_termination_information_t> - term_vec = solution.get_additional_termination_informations(); - std::vector status_vec = solution.get_terminations_status(); - - // Create a new solution with the full problem solution - solution = - optimization_problem_solution_t(primal_solution, - dual_solution, - reduced_costs, - std::move(solution.get_pdlp_warm_start_data()), - op_problem.get_objective_name(), - op_problem.get_variable_names(), - op_problem.get_row_names(), - std::move(term_vec), - std::move(status_vec)); - } + std::vector< + typename optimization_problem_solution_t::additional_termination_information_t> + term_vec = solution.get_additional_termination_informations(); + std::vector status_vec = solution.get_terminations_status(); + + // Create a new solution with the full problem solution + solution = + optimization_problem_solution_t(primal_solution, + dual_solution, + reduced_costs, + std::move(solution.get_pdlp_warm_start_data()), + op_problem.get_objective_name(), + op_problem.get_variable_names(), + op_problem.get_row_names(), + std::move(term_vec), + std::move(status_vec)); } if (settings.sol_file != "") { diff --git a/cpp/src/mip/presolve/gf2_presolve.cpp b/cpp/src/mip/presolve/gf2_presolve.cpp index b23526e15..af8ef4b77 100644 --- a/cpp/src/mip/presolve/gf2_presolve.cpp +++ b/cpp/src/mip/presolve/gf2_presolve.cpp @@ -247,7 +247,7 @@ papilo::PresolveStatus GF2Presolve::execute(const papilo::Problem& pro #define INSTANTIATE(F_TYPE) template class GF2Presolve; -#if MIP_INSTANTIATE_FLOAT +#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT INSTANTIATE(float) #endif diff --git a/cpp/src/mip/presolve/third_party_presolve.cpp b/cpp/src/mip/presolve/third_party_presolve.cpp index 3d0c17fde..ab38be653 100644 --- a/cpp/src/mip/presolve/third_party_presolve.cpp +++ b/cpp/src/mip/presolve/third_party_presolve.cpp @@ -30,6 +30,21 @@ namespace cuopt::linear_programming::detail { +// Helper to convert vector from one type to another (only when types differ) +template +std::vector convert_vector(const std::vector& src) +{ + if constexpr (std::is_same_v) { + return src; // No conversion needed + } else { + std::vector dst(src.size()); + for (size_t i = 0; i < src.size(); ++i) { + dst[i] = static_cast(src[i]); + } + return dst; + } +} + template papilo::Problem build_papilo_problem(const optimization_problem_t& op_problem, problem_category_t category, @@ -220,62 +235,70 @@ PSLPContext build_and_run_pslp_presolver(const optimization_problem_t& const auto& constr_ub = op_problem.get_constraint_upper_bounds(); const auto& var_types = op_problem.get_variable_types(); - // Copy data to host - std::vector h_coefficients(coefficients.size()); + // Copy data to host (using f_t type) + std::vector h_coefficients_ft(coefficients.size()); auto stream_view = op_problem.get_handle_ptr()->get_stream(); - raft::copy(h_coefficients.data(), coefficients.data(), coefficients.size(), stream_view); + raft::copy(h_coefficients_ft.data(), coefficients.data(), coefficients.size(), stream_view); std::vector h_offsets(offsets.size()); raft::copy(h_offsets.data(), offsets.data(), offsets.size(), stream_view); std::vector h_variables(variables.size()); raft::copy(h_variables.data(), variables.data(), variables.size(), stream_view); - std::vector h_obj_coeffs(obj_coeffs.size()); - raft::copy(h_obj_coeffs.data(), obj_coeffs.data(), obj_coeffs.size(), stream_view); - std::vector h_var_lb(var_lb.size()); - raft::copy(h_var_lb.data(), var_lb.data(), var_lb.size(), stream_view); - std::vector h_var_ub(var_ub.size()); - raft::copy(h_var_ub.data(), var_ub.data(), var_ub.size(), stream_view); - std::vector h_bounds(bounds.size()); - raft::copy(h_bounds.data(), bounds.data(), bounds.size(), stream_view); + std::vector h_obj_coeffs_ft(obj_coeffs.size()); + raft::copy(h_obj_coeffs_ft.data(), obj_coeffs.data(), obj_coeffs.size(), stream_view); + std::vector h_var_lb_ft(var_lb.size()); + raft::copy(h_var_lb_ft.data(), var_lb.data(), var_lb.size(), stream_view); + std::vector h_var_ub_ft(var_ub.size()); + raft::copy(h_var_ub_ft.data(), var_ub.data(), var_ub.size(), stream_view); + std::vector h_bounds_ft(bounds.size()); + raft::copy(h_bounds_ft.data(), bounds.data(), bounds.size(), stream_view); std::vector h_row_types(row_types.size()); raft::copy(h_row_types.data(), row_types.data(), row_types.size(), stream_view); - std::vector h_constr_lb(constr_lb.size()); - raft::copy(h_constr_lb.data(), constr_lb.data(), constr_lb.size(), stream_view); - std::vector h_constr_ub(constr_ub.size()); - raft::copy(h_constr_ub.data(), constr_ub.data(), constr_ub.size(), stream_view); + std::vector h_constr_lb_ft(constr_lb.size()); + raft::copy(h_constr_lb_ft.data(), constr_lb.data(), constr_lb.size(), stream_view); + std::vector h_constr_ub_ft(constr_ub.size()); + raft::copy(h_constr_ub_ft.data(), constr_ub.data(), constr_ub.size(), stream_view); std::vector h_var_types(var_types.size()); raft::copy(h_var_types.data(), var_types.data(), var_types.size(), stream_view); stream_view.synchronize(); if (maximize) { - for (size_t i = 0; i < h_obj_coeffs.size(); ++i) { - h_obj_coeffs[i] = -h_obj_coeffs[i]; + for (size_t i = 0; i < h_obj_coeffs_ft.size(); ++i) { + h_obj_coeffs_ft[i] = -h_obj_coeffs_ft[i]; } } - auto constr_bounds_empty = h_constr_lb.empty() && h_constr_ub.empty(); + auto constr_bounds_empty = h_constr_lb_ft.empty() && h_constr_ub_ft.empty(); if (constr_bounds_empty) { for (size_t i = 0; i < h_row_types.size(); ++i) { if (h_row_types[i] == 'L') { - h_constr_lb.push_back(-std::numeric_limits::infinity()); - h_constr_ub.push_back(h_bounds[i]); + h_constr_lb_ft.push_back(-std::numeric_limits::infinity()); + h_constr_ub_ft.push_back(h_bounds_ft[i]); } else if (h_row_types[i] == 'G') { - h_constr_lb.push_back(h_bounds[i]); - h_constr_ub.push_back(std::numeric_limits::infinity()); + h_constr_lb_ft.push_back(h_bounds_ft[i]); + h_constr_ub_ft.push_back(std::numeric_limits::infinity()); } else if (h_row_types[i] == 'E') { - h_constr_lb.push_back(h_bounds[i]); - h_constr_ub.push_back(h_bounds[i]); + h_constr_lb_ft.push_back(h_bounds_ft[i]); + h_constr_ub_ft.push_back(h_bounds_ft[i]); } } } // handle empty variable bounds - if (h_var_lb.empty()) { - h_var_lb = std::vector(num_cols, -std::numeric_limits::infinity()); + if (h_var_lb_ft.empty()) { + h_var_lb_ft = std::vector(num_cols, -std::numeric_limits::infinity()); } - if (h_var_ub.empty()) { - h_var_ub = std::vector(num_cols, std::numeric_limits::infinity()); + if (h_var_ub_ft.empty()) { + h_var_ub_ft = std::vector(num_cols, std::numeric_limits::infinity()); } + // Convert to double for PSLP API if necessary (PSLP only accepts double*) + std::vector h_coefficients = convert_vector(h_coefficients_ft); + std::vector h_obj_coeffs = convert_vector(h_obj_coeffs_ft); + std::vector h_var_lb = convert_vector(h_var_lb_ft); + std::vector h_var_ub = convert_vector(h_var_ub_ft); + std::vector h_constr_lb = convert_vector(h_constr_lb_ft); + std::vector h_constr_ub = convert_vector(h_constr_ub_ft); + // Call PSLP presolver ctx.settings = default_settings(); ctx.settings->verbose = false; @@ -331,7 +354,7 @@ optimization_problem_t build_optimization_problem_from_pslp( // PSLP does not allow setting the objective offset, so we add the original objective offset to // the reduced objective offset obj_offset += original_obj_offset; - op_problem.set_objective_offset(obj_offset); + op_problem.set_objective_offset(static_cast(obj_offset)); op_problem.set_maximize(maximize); op_problem.set_problem_category(problem_category_t::LP); @@ -343,21 +366,65 @@ optimization_problem_t build_optimization_problem_from_pslp( return op_problem; } - op_problem.set_csr_constraint_matrix( - reduced_prob->Ax, nnz, reduced_prob->Ai, nnz, reduced_prob->Ap, n_rows + 1); + if constexpr (std::is_same_v) { + // PSLP uses double internally, so we can use the data directly + op_problem.set_csr_constraint_matrix( + reduced_prob->Ax, nnz, reduced_prob->Ai, nnz, reduced_prob->Ap, n_rows + 1); - std::vector h_obj_coeffs(n_cols); - std::copy(reduced_prob->c, reduced_prob->c + n_cols, h_obj_coeffs.begin()); - if (maximize) { - for (size_t i = 0; i < n_cols; ++i) { - h_obj_coeffs[i] = -h_obj_coeffs[i]; + std::vector h_obj_coeffs(n_cols); + std::copy(reduced_prob->c, reduced_prob->c + n_cols, h_obj_coeffs.begin()); + if (maximize) { + for (size_t i = 0; i < n_cols; ++i) { + h_obj_coeffs[i] = -h_obj_coeffs[i]; + } + } + op_problem.set_objective_coefficients(h_obj_coeffs.data(), n_cols); + op_problem.set_constraint_lower_bounds(reduced_prob->lhs, n_rows); + op_problem.set_constraint_upper_bounds(reduced_prob->rhs, n_rows); + op_problem.set_variable_lower_bounds(reduced_prob->lbs, n_cols); + op_problem.set_variable_upper_bounds(reduced_prob->ubs, n_cols); + } else { + // Convert PSLP double arrays to f_t + // Constraint matrix values (Ax) + std::vector h_Ax(nnz); + for (int i = 0; i < nnz; ++i) { + h_Ax[i] = static_cast(reduced_prob->Ax[i]); + } + op_problem.set_csr_constraint_matrix( + h_Ax.data(), nnz, reduced_prob->Ai, nnz, reduced_prob->Ap, n_rows + 1); + + // Objective coefficients + std::vector h_obj_coeffs(n_cols); + for (int i = 0; i < n_cols; ++i) { + h_obj_coeffs[i] = static_cast(reduced_prob->c[i]); + } + if (maximize) { + for (int i = 0; i < n_cols; ++i) { + h_obj_coeffs[i] = -h_obj_coeffs[i]; + } + } + op_problem.set_objective_coefficients(h_obj_coeffs.data(), n_cols); + + // Constraint bounds + std::vector h_constr_lb(n_rows); + std::vector h_constr_ub(n_rows); + for (int i = 0; i < n_rows; ++i) { + h_constr_lb[i] = static_cast(reduced_prob->lhs[i]); + h_constr_ub[i] = static_cast(reduced_prob->rhs[i]); } + op_problem.set_constraint_lower_bounds(h_constr_lb.data(), n_rows); + op_problem.set_constraint_upper_bounds(h_constr_ub.data(), n_rows); + + // Variable bounds + std::vector h_var_lb(n_cols); + std::vector h_var_ub(n_cols); + for (int i = 0; i < n_cols; ++i) { + h_var_lb[i] = static_cast(reduced_prob->lbs[i]); + h_var_ub[i] = static_cast(reduced_prob->ubs[i]); + } + op_problem.set_variable_lower_bounds(h_var_lb.data(), n_cols); + op_problem.set_variable_upper_bounds(h_var_ub.data(), n_cols); } - op_problem.set_objective_coefficients(h_obj_coeffs.data(), n_cols); - op_problem.set_constraint_lower_bounds(reduced_prob->lhs, n_rows); - op_problem.set_constraint_upper_bounds(reduced_prob->rhs, n_rows); - op_problem.set_variable_lower_bounds(reduced_prob->lbs, n_cols); - op_problem.set_variable_upper_bounds(reduced_prob->ubs, n_cols); return op_problem; } @@ -396,6 +463,7 @@ optimization_problem_t build_optimization_problem( obj.coefficients[i] = -obj.coefficients[i]; } } + op_problem.set_objective_coefficients(obj.coefficients.data(), obj.coefficients.size()); auto& constraint_matrix = papilo_problem.getConstraintMatrix(); @@ -430,8 +498,9 @@ optimization_problem_t build_optimization_problem( i_t nnz = constraint_matrix.getNnz(); assert(offsets[nrows] == nnz); - const int* cols = constraint_matrix.getConstraintMatrix().getColumns(); - const f_t* coeffs = constraint_matrix.getConstraintMatrix().getValues(); + const int* cols = constraint_matrix.getConstraintMatrix().getColumns(); + const f_t* coeffs = constraint_matrix.getConstraintMatrix().getValues(); + op_problem.set_csr_constraint_matrix( &(coeffs[start]), nnz, &(cols[start]), nnz, offsets.data(), nrows + 1); @@ -497,7 +566,8 @@ void set_presolve_methods(papilo::Presolve& presolver, if (category == problem_category_t::MIP) { // cuOpt custom GF2 presolver - presolver.addPresolveMethod(uptr(new cuopt::linear_programming::detail::GF2Presolve())); + presolver.addPresolveMethod( + uptr(new cuopt::linear_programming::detail::GF2Presolve())); } // fast presolvers presolver.addPresolveMethod(uptr(new papilo::SingletonCols())); @@ -535,7 +605,7 @@ void set_presolve_options(papilo::Presolve& presolver, problem_category_t category, f_t absolute_tolerance, f_t relative_tolerance, - double time_limit, + f_t time_limit, bool dual_postsolve, i_t num_cpu_threads) { @@ -625,7 +695,7 @@ std::optional> third_party_presolve_t papilo_presolver; - set_presolve_methods(papilo_presolver, category, dual_postsolve); + set_presolve_methods(papilo_presolver, category, dual_postsolve); set_presolve_options(papilo_presolver, category, absolute_tolerance, @@ -633,7 +703,7 @@ std::optional> third_party_presolve_t( + set_presolve_parameters( papilo_presolver, category, op_problem.get_n_constraints(), op_problem.get_n_variables()); // Disable papilo logs @@ -697,12 +767,15 @@ void third_party_presolve_t::undo(rmm::device_uvector& primal_sol } if (status_to_skip) { return; } + std::vector primal_sol_vec_h(primal_solution.size()); raft::copy(primal_sol_vec_h.data(), primal_solution.data(), primal_solution.size(), stream_view); std::vector dual_sol_vec_h(dual_solution.size()); raft::copy(dual_sol_vec_h.data(), dual_solution.data(), dual_solution.size(), stream_view); std::vector reduced_costs_vec_h(reduced_costs.size()); - raft::copy(reduced_costs_vec_h.data(), reduced_costs.data(), reduced_costs.size(), stream_view); + raft::copy( + reduced_costs_vec_h.data(), reduced_costs.data(), reduced_costs.size(), stream_view); + papilo::Solution reduced_sol(primal_sol_vec_h); if (dual_postsolve) { reduced_sol.dual = dual_sol_vec_h; @@ -734,26 +807,71 @@ void third_party_presolve_t::undo_pslp(rmm::device_uvector& prima rmm::device_uvector& reduced_costs, rmm::cuda_stream_view stream_view) { - std::vector h_primal_solution(primal_solution.size()); - std::vector h_dual_solution(dual_solution.size()); - std::vector h_reduced_costs(reduced_costs.size()); - raft::copy(h_primal_solution.data(), primal_solution.data(), primal_solution.size(), stream_view); - raft::copy(h_dual_solution.data(), dual_solution.data(), dual_solution.size(), stream_view); - raft::copy(h_reduced_costs.data(), reduced_costs.data(), reduced_costs.size(), stream_view); - - postsolve( - pslp_presolver_, h_primal_solution.data(), h_dual_solution.data(), h_reduced_costs.data()); - - auto uncrushed_sol = pslp_presolver_->sol; - int n_cols = uncrushed_sol->dim_x; - int n_rows = uncrushed_sol->dim_y; - - primal_solution.resize(n_cols, stream_view); - dual_solution.resize(n_rows, stream_view); - reduced_costs.resize(n_cols, stream_view); - raft::copy(primal_solution.data(), uncrushed_sol->x, n_cols, stream_view); - raft::copy(dual_solution.data(), uncrushed_sol->y, n_rows, stream_view); - raft::copy(reduced_costs.data(), uncrushed_sol->z, n_cols, stream_view); + if constexpr (std::is_same_v) { + // PSLP uses double internally, so we can use the data directly + std::vector h_primal_solution(primal_solution.size()); + std::vector h_dual_solution(dual_solution.size()); + std::vector h_reduced_costs(reduced_costs.size()); + raft::copy( + h_primal_solution.data(), primal_solution.data(), primal_solution.size(), stream_view); + raft::copy(h_dual_solution.data(), dual_solution.data(), dual_solution.size(), stream_view); + raft::copy(h_reduced_costs.data(), reduced_costs.data(), reduced_costs.size(), stream_view); + stream_view.synchronize(); + + postsolve( + pslp_presolver_, h_primal_solution.data(), h_dual_solution.data(), h_reduced_costs.data()); + + auto uncrushed_sol = pslp_presolver_->sol; + int n_cols = uncrushed_sol->dim_x; + int n_rows = uncrushed_sol->dim_y; + + primal_solution.resize(n_cols, stream_view); + dual_solution.resize(n_rows, stream_view); + reduced_costs.resize(n_cols, stream_view); + raft::copy(primal_solution.data(), uncrushed_sol->x, n_cols, stream_view); + raft::copy(dual_solution.data(), uncrushed_sol->y, n_rows, stream_view); + raft::copy(reduced_costs.data(), uncrushed_sol->z, n_cols, stream_view); + } else { + // Convert f_t to double for PSLP postsolve API + std::vector h_primal_solution_ft(primal_solution.size()); + std::vector h_dual_solution_ft(dual_solution.size()); + std::vector h_reduced_costs_ft(reduced_costs.size()); + raft::copy( + h_primal_solution_ft.data(), primal_solution.data(), primal_solution.size(), stream_view); + raft::copy(h_dual_solution_ft.data(), dual_solution.data(), dual_solution.size(), stream_view); + raft::copy(h_reduced_costs_ft.data(), reduced_costs.data(), reduced_costs.size(), stream_view); + stream_view.synchronize(); + + std::vector h_primal_solution = convert_vector(h_primal_solution_ft); + std::vector h_dual_solution = convert_vector(h_dual_solution_ft); + std::vector h_reduced_costs = convert_vector(h_reduced_costs_ft); + + postsolve( + pslp_presolver_, h_primal_solution.data(), h_dual_solution.data(), h_reduced_costs.data()); + + auto uncrushed_sol = pslp_presolver_->sol; + int n_cols = uncrushed_sol->dim_x; + int n_rows = uncrushed_sol->dim_y; + + // Convert double results back to f_t and copy to device + std::vector h_primal_out(n_cols); + std::vector h_dual_out(n_rows); + std::vector h_reduced_costs_out(n_cols); + for (int i = 0; i < n_cols; ++i) { + h_primal_out[i] = static_cast(uncrushed_sol->x[i]); + h_reduced_costs_out[i] = static_cast(uncrushed_sol->z[i]); + } + for (int i = 0; i < n_rows; ++i) { + h_dual_out[i] = static_cast(uncrushed_sol->y[i]); + } + + primal_solution.resize(n_cols, stream_view); + dual_solution.resize(n_rows, stream_view); + reduced_costs.resize(n_cols, stream_view); + raft::copy(primal_solution.data(), h_primal_out.data(), n_cols, stream_view); + raft::copy(dual_solution.data(), h_dual_out.data(), n_rows, stream_view); + raft::copy(reduced_costs.data(), h_reduced_costs_out.data(), n_cols, stream_view); + } stream_view.synchronize(); } @@ -795,7 +913,7 @@ void papilo_postsolve_deleter::operator()(papilo::PostsolveStorage* pt delete ptr; } -#if MIP_INSTANTIATE_FLOAT +#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT template struct papilo_postsolve_deleter; template class third_party_presolve_t; #endif diff --git a/cpp/src/mip/problem/presolve_data.cu b/cpp/src/mip/problem/presolve_data.cu index d09754bc2..5cd9befdf 100644 --- a/cpp/src/mip/problem/presolve_data.cu +++ b/cpp/src/mip/problem/presolve_data.cu @@ -245,7 +245,7 @@ void presolve_data_t::papilo_uncrush_assignment( problem.handle_ptr->sync_stream(); } -#if MIP_INSTANTIATE_FLOAT +#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT template class presolve_data_t; #endif diff --git a/cpp/tests/linear_programming/pdlp_test.cu b/cpp/tests/linear_programming/pdlp_test.cu index 8fb7b579f..0bec43cd6 100644 --- a/cpp/tests/linear_programming/pdlp_test.cu +++ b/cpp/tests/linear_programming/pdlp_test.cu @@ -1962,7 +1962,7 @@ TEST(pdlp_class, float32_concurrent_throws_validation_error) EXPECT_EQ(solution.get_error_status().get_error_type(), cuopt::error_type_t::ValidationError); } -TEST(pdlp_class, float32_presolve_throws_validation_error) +TEST(pdlp_class, float32_papilo_presolve_works) { const raft::handle_t handle_{}; @@ -1970,13 +1970,36 @@ TEST(pdlp_class, float32_presolve_throws_validation_error) cuopt::mps_parser::mps_data_model_t op_problem = cuopt::mps_parser::parse_mps(path, true); - auto solver_settings = pdlp_solver_settings_t{}; - solver_settings.method = cuopt::linear_programming::method_t::PDLP; - solver_settings.presolve = true; + auto solver_settings = pdlp_solver_settings_t{}; + solver_settings.method = cuopt::linear_programming::method_t::PDLP; + solver_settings.presolver = cuopt::linear_programming::presolver_t::Papilo; optimization_problem_solution_t solution = solve_lp(&handle_, op_problem, solver_settings); - EXPECT_EQ(solution.get_error_status().get_error_type(), cuopt::error_type_t::ValidationError); + EXPECT_EQ((int)solution.get_termination_status(), CUOPT_TERIMINATION_STATUS_OPTIMAL); + EXPECT_FALSE(is_incorrect_objective( + afiro_primal_objective_f32, + solution.get_additional_termination_information().primal_objective)); +} + +TEST(pdlp_class, float32_pslp_presolve_works) +{ + const raft::handle_t handle_{}; + + auto path = make_path_absolute("linear_programming/afiro_original.mps"); + cuopt::mps_parser::mps_data_model_t op_problem = + cuopt::mps_parser::parse_mps(path, true); + + auto solver_settings = pdlp_solver_settings_t{}; + solver_settings.method = cuopt::linear_programming::method_t::PDLP; + solver_settings.presolver = cuopt::linear_programming::presolver_t::PSLP; + + optimization_problem_solution_t solution = + solve_lp(&handle_, op_problem, solver_settings); + EXPECT_EQ((int)solution.get_termination_status(), CUOPT_TERIMINATION_STATUS_OPTIMAL); + EXPECT_FALSE(is_incorrect_objective( + afiro_primal_objective_f32, + solution.get_additional_termination_information().primal_objective)); } TEST(pdlp_class, float32_crossover_throws_validation_error) From 6a726685e113bd97b3a26137e93f0ec9ef7e57fe Mon Sep 17 00:00:00 2001 From: Nicolas Blin Date: Wed, 11 Feb 2026 16:08:23 +0100 Subject: [PATCH 04/10] implement and toggle mixed precision --- cpp/src/linear_programming/cusparse_view.cu | 220 +++++++++++++++++- cpp/src/linear_programming/cusparse_view.hpp | 49 ++++ cpp/src/linear_programming/pdhg.cu | 108 ++++++--- cpp/src/linear_programming/pdlp.cu | 3 + cpp/src/linear_programming/pdlp_constants.hpp | 7 + 5 files changed, 350 insertions(+), 37 deletions(-) diff --git a/cpp/src/linear_programming/cusparse_view.cu b/cpp/src/linear_programming/cusparse_view.cu index 02332da03..35e5bbf97 100644 --- a/cpp/src/linear_programming/cusparse_view.cu +++ b/cpp/src/linear_programming/cusparse_view.cu @@ -10,6 +10,7 @@ #include #include +#include #include #include @@ -21,6 +22,14 @@ #include #include +#include +#include + +// Functor for double-to-float conversion on GPU +struct double_to_float_functor { + __host__ __device__ float operator()(double val) const { return static_cast(val); } +}; + namespace cuopt::linear_programming::detail { // cusparse_sp_mat_descr_wrapper_t implementation @@ -304,7 +313,12 @@ cusparse_view_t::cusparse_view_t( A_{op_problem_scaled.coefficients}, A_offsets_{op_problem_scaled.offsets}, A_indices_{op_problem_scaled.variables}, - climber_strategies_(climber_strategies) + climber_strategies_(climber_strategies), + A_float_{0, handle_ptr->get_stream()}, + A_T_float_{0, handle_ptr->get_stream()}, + buffer_non_transpose_mixed_{0, handle_ptr->get_stream()}, + buffer_transpose_mixed_{0, handle_ptr->get_stream()}, + mixed_precision_enabled_{false} { raft::common::nvtx::range fun_scope("Initializing cuSparse view"); @@ -583,6 +597,108 @@ cusparse_view_t::cusparse_view_t( handle_ptr->get_stream()); } #endif + + // Initialize mixed precision SpMV support + // Only when f_t = double and enable_mixed_precision_spmv is true + if constexpr (std::is_same_v && enable_mixed_precision_spmv) { + mixed_precision_enabled_ = true; + + // Create FP32 copies of A and A_T matrix values + A_float_.resize(op_problem_scaled.nnz, handle_ptr->get_stream()); + A_T_float_.resize(op_problem_scaled.nnz, handle_ptr->get_stream()); + + // Convert A values from double to float + thrust::transform(thrust::cuda::par.on(handle_ptr->get_stream()), + op_problem_scaled.coefficients.data(), + op_problem_scaled.coefficients.data() + op_problem_scaled.nnz, + A_float_.data(), + double_to_float_functor{}); + + // Convert A_T values from double to float + thrust::transform(thrust::cuda::par.on(handle_ptr->get_stream()), + A_T_.data(), + A_T_.data() + op_problem_scaled.nnz, + A_T_float_.data(), + double_to_float_functor{}); + + // Create FP32 matrix descriptors for mixed precision SpMV + RAFT_CUSPARSE_TRY(cusparseCreateCsr(&A_mixed_, + op_problem_scaled.n_constraints, + op_problem_scaled.n_variables, + op_problem_scaled.nnz, + const_cast(op_problem_scaled.offsets.data()), + const_cast(op_problem_scaled.variables.data()), + A_float_.data(), + CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_BASE_ZERO, + CUDA_R_32F)); + + RAFT_CUSPARSE_TRY(cusparseCreateCsr(&A_T_mixed_, + op_problem_scaled.n_variables, + op_problem_scaled.n_constraints, + op_problem_scaled.nnz, + const_cast(A_T_offsets_.data()), + const_cast(A_T_indices_.data()), + A_T_float_.data(), + CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_BASE_ZERO, + CUDA_R_32F)); + + // Compute buffer sizes for mixed precision SpMV + const rmm::device_scalar alpha_d{1.0, handle_ptr->get_stream()}; + const rmm::device_scalar beta_d{0.0, handle_ptr->get_stream()}; + + size_t buffer_size_non_transpose_mixed = + mixed_precision_spmv_buffersize(handle_ptr_->get_cusparse_handle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, + alpha_d.data(), + A_mixed_, + c, + beta_d.data(), + dual_solution, + CUSPARSE_SPMV_CSR_ALG2, + handle_ptr->get_stream()); + buffer_non_transpose_mixed_.resize(buffer_size_non_transpose_mixed, handle_ptr->get_stream()); + + size_t buffer_size_transpose_mixed = + mixed_precision_spmv_buffersize(handle_ptr_->get_cusparse_handle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, + alpha_d.data(), + A_T_mixed_, + dual_solution, + beta_d.data(), + c, + CUSPARSE_SPMV_CSR_ALG2, + handle_ptr->get_stream()); + buffer_transpose_mixed_.resize(buffer_size_transpose_mixed, handle_ptr->get_stream()); + +#if CUDA_VER_12_4_UP + // Preprocess mixed precision SpMV + mixed_precision_spmv_preprocess(handle_ptr_->get_cusparse_handle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, + alpha_d.data(), + A_mixed_, + c, + beta_d.data(), + dual_solution, + CUSPARSE_SPMV_CSR_ALG2, + buffer_non_transpose_mixed_.data(), + handle_ptr->get_stream()); + + mixed_precision_spmv_preprocess(handle_ptr_->get_cusparse_handle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, + alpha_d.data(), + A_T_mixed_, + dual_solution, + beta_d.data(), + c, + CUSPARSE_SPMV_CSR_ALG2, + buffer_transpose_mixed_.data(), + handle_ptr->get_stream()); +#endif + } } // Used by pdlp object for current and average termination condition @@ -625,7 +741,12 @@ cusparse_view_t::cusparse_view_t( A_{op_problem.coefficients}, A_offsets_{op_problem.offsets}, A_indices_{op_problem.variables}, - climber_strategies_(climber_strategies) + climber_strategies_(climber_strategies), + A_float_{0, handle_ptr->get_stream()}, + A_T_float_{0, handle_ptr->get_stream()}, + buffer_non_transpose_mixed_{0, handle_ptr->get_stream()}, + buffer_transpose_mixed_{0, handle_ptr->get_stream()}, + mixed_precision_enabled_{false} { #ifdef PDLP_DEBUG_MODE RAFT_CUDA_TRY(cudaDeviceSynchronize()); @@ -832,7 +953,12 @@ cusparse_view_t::cusparse_view_t( A_{existing_cusparse_view.A_}, A_offsets_{existing_cusparse_view.A_offsets_}, A_indices_{existing_cusparse_view.A_indices_}, - climber_strategies_(existing_cusparse_view.climber_strategies_) + climber_strategies_(existing_cusparse_view.climber_strategies_), + A_float_{0, handle_ptr->get_stream()}, + A_T_float_{0, handle_ptr->get_stream()}, + buffer_non_transpose_mixed_{0, handle_ptr->get_stream()}, + buffer_transpose_mixed_{0, handle_ptr->get_stream()}, + mixed_precision_enabled_{false} { #ifdef PDLP_DEBUG_MODE RAFT_CUDA_TRY(cudaDeviceSynchronize()); @@ -942,10 +1068,96 @@ cusparse_view_t::cusparse_view_t( A_(dummy_float), A_offsets_(dummy_int), A_indices_(dummy_int), - climber_strategies_(climber_strategies) + climber_strategies_(climber_strategies), + A_float_{0, handle_ptr->get_stream()}, + A_T_float_{0, handle_ptr->get_stream()}, + buffer_non_transpose_mixed_{0, handle_ptr->get_stream()}, + buffer_transpose_mixed_{0, handle_ptr->get_stream()}, + mixed_precision_enabled_{false} +{ +} + +// Update FP32 matrix copies after scaling (must be called after scale_problem()) +template +void cusparse_view_t::update_mixed_precision_matrices() +{ + if constexpr (std::is_same_v && enable_mixed_precision_spmv) { + if (!mixed_precision_enabled_) { return; } + + // The A_ and A_T_ references point to the scaled matrix data + // Update the FP32 copies with the scaled values + thrust::transform(thrust::cuda::par.on(handle_ptr_->get_stream()), + A_.data(), + A_.data() + A_.size(), + A_float_.data(), + double_to_float_functor{}); + + thrust::transform(thrust::cuda::par.on(handle_ptr_->get_stream()), + A_T_.data(), + A_T_.data() + A_T_.size(), + A_T_float_.data(), + double_to_float_functor{}); + + handle_ptr_->get_stream().synchronize(); + } +} + +// Mixed precision SpMV implementation: FP32 matrix with FP64 vectors and FP64 compute type +size_t mixed_precision_spmv_buffersize(cusparseHandle_t handle, + cusparseOperation_t opA, + const double* alpha, + cusparseSpMatDescr_t matA, // FP32 matrix + cusparseDnVecDescr_t vecX, // FP64 vector + const double* beta, + cusparseDnVecDescr_t vecY, // FP64 vector + cusparseSpMVAlg_t alg, + cudaStream_t stream) { + size_t bufferSize = 0; + RAFT_CUSPARSE_TRY(cusparseSetStream(handle, stream)); + RAFT_CUSPARSE_TRY(cusparseSpMV_bufferSize( + handle, opA, alpha, matA, vecX, beta, vecY, CUDA_R_64F, alg, &bufferSize)); + return bufferSize; } +void mixed_precision_spmv(cusparseHandle_t handle, + cusparseOperation_t opA, + const double* alpha, + cusparseSpMatDescr_t matA, // FP32 matrix + cusparseDnVecDescr_t vecX, // FP64 vector + const double* beta, + cusparseDnVecDescr_t vecY, // FP64 vector + cusparseSpMVAlg_t alg, + void* externalBuffer, + cudaStream_t stream) +{ + RAFT_CUSPARSE_TRY(cusparseSetStream(handle, stream)); + RAFT_CUSPARSE_TRY( + cusparseSpMV(handle, opA, alpha, matA, vecX, beta, vecY, CUDA_R_64F, alg, externalBuffer)); +} + +#if CUDA_VER_12_4_UP +void mixed_precision_spmv_preprocess(cusparseHandle_t handle, + cusparseOperation_t opA, + const double* alpha, + cusparseSpMatDescr_t matA, // FP32 matrix + cusparseDnVecDescr_t vecX, // FP64 vector + const double* beta, + cusparseDnVecDescr_t vecY, // FP64 vector + cusparseSpMVAlg_t alg, + void* externalBuffer, + cudaStream_t stream) +{ + static const auto func = + dynamic_load_runtime::function("cusparseSpMV_preprocess"); + if (func.has_value()) { + RAFT_CUSPARSE_TRY(cusparseSetStream(handle, stream)); + RAFT_CUSPARSE_TRY( + (*func)(handle, opA, alpha, matA, vecX, beta, vecY, CUDA_R_64F, alg, externalBuffer)); + } +} +#endif + #if PDLP_INSTANTIATE_FLOAT template class cusparse_sp_mat_descr_wrapper_t; template class cusparse_dn_vec_descr_wrapper_t; diff --git a/cpp/src/linear_programming/cusparse_view.hpp b/cpp/src/linear_programming/cusparse_view.hpp index 699c3aa6c..ffd4ebd7f 100644 --- a/cpp/src/linear_programming/cusparse_view.hpp +++ b/cpp/src/linear_programming/cusparse_view.hpp @@ -194,8 +194,57 @@ class cusparse_view_t { const rmm::device_uvector& A_indices_; const std::vector& climber_strategies_; + + // Mixed precision SpMV support (FP32 matrix with FP64 vectors/compute) + // Only used when enable_mixed_precision_spmv is true and f_t = double + rmm::device_uvector A_float_; // FP32 copy of A values + rmm::device_uvector A_T_float_; // FP32 copy of A_T values + cusparseSpMatDescr_t A_mixed_{nullptr}; // FP32 matrix descriptor for A + cusparseSpMatDescr_t A_T_mixed_{nullptr}; // FP32 matrix descriptor for A_T + rmm::device_uvector buffer_non_transpose_mixed_; // SpMV buffer for mixed precision A + rmm::device_uvector buffer_transpose_mixed_; // SpMV buffer for mixed precision A_T + bool mixed_precision_enabled_{false}; + + // Update FP32 matrix copies after scaling (must be called after scale_problem()) + void update_mixed_precision_matrices(); }; +// Mixed precision SpMV: FP32 matrix with FP64 vectors and FP64 compute type +// This is used for PDHG iterations when enable_mixed_precision_spmv is true +void mixed_precision_spmv(cusparseHandle_t handle, + cusparseOperation_t opA, + const double* alpha, + cusparseSpMatDescr_t matA, // FP32 matrix + cusparseDnVecDescr_t vecX, // FP64 vector + const double* beta, + cusparseDnVecDescr_t vecY, // FP64 vector + cusparseSpMVAlg_t alg, + void* externalBuffer, + cudaStream_t stream); + +size_t mixed_precision_spmv_buffersize(cusparseHandle_t handle, + cusparseOperation_t opA, + const double* alpha, + cusparseSpMatDescr_t matA, // FP32 matrix + cusparseDnVecDescr_t vecX, // FP64 vector + const double* beta, + cusparseDnVecDescr_t vecY, // FP64 vector + cusparseSpMVAlg_t alg, + cudaStream_t stream); + +#if CUDA_VER_12_4_UP +void mixed_precision_spmv_preprocess(cusparseHandle_t handle, + cusparseOperation_t opA, + const double* alpha, + cusparseSpMatDescr_t matA, // FP32 matrix + cusparseDnVecDescr_t vecX, // FP64 vector + const double* beta, + cusparseDnVecDescr_t vecY, // FP64 vector + cusparseSpMVAlg_t alg, + void* externalBuffer, + cudaStream_t stream); +#endif + #if CUDA_VER_12_4_UP template < typename T, diff --git a/cpp/src/linear_programming/pdhg.cu b/cpp/src/linear_programming/pdhg.cu index f094e37dd..6341494c0 100644 --- a/cpp/src/linear_programming/pdhg.cu +++ b/cpp/src/linear_programming/pdhg.cu @@ -249,17 +249,31 @@ void pdhg_solver_t::compute_next_dual_solution(rmm::device_uvectorget_cusparse_handle(), - CUSPARSE_OPERATION_NON_TRANSPOSE, - reusable_device_scalar_value_1_.data(), // 1 - cusparse_view_.A, - cusparse_view_.tmp_primal, - reusable_device_scalar_value_0_.data(), // 1 - cusparse_view_.dual_gradient, - CUSPARSE_SPMV_CSR_ALG2, - (f_t*)cusparse_view_.buffer_non_transpose.data(), - stream_view_)); + if constexpr (std::is_same_v && enable_mixed_precision_spmv) { + // Mixed precision SpMV: FP32 matrix with FP64 vectors + mixed_precision_spmv(handle_ptr_->get_cusparse_handle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, + reusable_device_scalar_value_1_.data(), // 1 + cusparse_view_.A_mixed_, + cusparse_view_.tmp_primal, + reusable_device_scalar_value_0_.data(), // 0 + cusparse_view_.dual_gradient, + CUSPARSE_SPMV_CSR_ALG2, + cusparse_view_.buffer_non_transpose_mixed_.data(), + stream_view_); + } else { + RAFT_CUSPARSE_TRY( + raft::sparse::detail::cusparsespmv(handle_ptr_->get_cusparse_handle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, + reusable_device_scalar_value_1_.data(), // 1 + cusparse_view_.A, + cusparse_view_.tmp_primal, + reusable_device_scalar_value_0_.data(), // 0 + cusparse_view_.dual_gradient, + CUSPARSE_SPMV_CSR_ALG2, + (f_t*)cusparse_view_.buffer_non_transpose.data(), + stream_view_)); + } // y - (sigma*dual_gradient) // max(min(0, sigma*constraint_upper+primal_product), sigma*constraint_lower+primal_product) @@ -287,17 +301,31 @@ void pdhg_solver_t::compute_At_y() // A_t @ y if (!batch_mode_) { - RAFT_CUSPARSE_TRY( - raft::sparse::detail::cusparsespmv(handle_ptr_->get_cusparse_handle(), - CUSPARSE_OPERATION_NON_TRANSPOSE, - reusable_device_scalar_value_1_.data(), - cusparse_view_.A_T, - cusparse_view_.dual_solution, - reusable_device_scalar_value_0_.data(), - cusparse_view_.current_AtY, - CUSPARSE_SPMV_CSR_ALG2, - (f_t*)cusparse_view_.buffer_transpose.data(), - stream_view_)); + if constexpr (std::is_same_v && enable_mixed_precision_spmv) { + // Mixed precision SpMV: FP32 matrix with FP64 vectors + mixed_precision_spmv(handle_ptr_->get_cusparse_handle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, + reusable_device_scalar_value_1_.data(), + cusparse_view_.A_T_mixed_, + cusparse_view_.dual_solution, + reusable_device_scalar_value_0_.data(), + cusparse_view_.current_AtY, + CUSPARSE_SPMV_CSR_ALG2, + cusparse_view_.buffer_transpose_mixed_.data(), + stream_view_); + } else { + RAFT_CUSPARSE_TRY( + raft::sparse::detail::cusparsespmv(handle_ptr_->get_cusparse_handle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, + reusable_device_scalar_value_1_.data(), + cusparse_view_.A_T, + cusparse_view_.dual_solution, + reusable_device_scalar_value_0_.data(), + cusparse_view_.current_AtY, + CUSPARSE_SPMV_CSR_ALG2, + (f_t*)cusparse_view_.buffer_transpose.data(), + stream_view_)); + } } else { RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsespmm( handle_ptr_->get_cusparse_handle(), @@ -319,17 +347,31 @@ void pdhg_solver_t::compute_A_x() { // A @ x if (!batch_mode_) { - RAFT_CUSPARSE_TRY( - raft::sparse::detail::cusparsespmv(handle_ptr_->get_cusparse_handle(), - CUSPARSE_OPERATION_NON_TRANSPOSE, - reusable_device_scalar_value_1_.data(), - cusparse_view_.A, - cusparse_view_.reflected_primal_solution, - reusable_device_scalar_value_0_.data(), - cusparse_view_.dual_gradient, - CUSPARSE_SPMV_CSR_ALG2, - (f_t*)cusparse_view_.buffer_non_transpose.data(), - stream_view_)); + if constexpr (std::is_same_v && enable_mixed_precision_spmv) { + // Mixed precision SpMV: FP32 matrix with FP64 vectors + mixed_precision_spmv(handle_ptr_->get_cusparse_handle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, + reusable_device_scalar_value_1_.data(), + cusparse_view_.A_mixed_, + cusparse_view_.reflected_primal_solution, + reusable_device_scalar_value_0_.data(), + cusparse_view_.dual_gradient, + CUSPARSE_SPMV_CSR_ALG2, + cusparse_view_.buffer_non_transpose_mixed_.data(), + stream_view_); + } else { + RAFT_CUSPARSE_TRY( + raft::sparse::detail::cusparsespmv(handle_ptr_->get_cusparse_handle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, + reusable_device_scalar_value_1_.data(), + cusparse_view_.A, + cusparse_view_.reflected_primal_solution, + reusable_device_scalar_value_0_.data(), + cusparse_view_.dual_gradient, + CUSPARSE_SPMV_CSR_ALG2, + (f_t*)cusparse_view_.buffer_non_transpose.data(), + stream_view_)); + } } else { RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsespmm( handle_ptr_->get_cusparse_handle(), diff --git a/cpp/src/linear_programming/pdlp.cu b/cpp/src/linear_programming/pdlp.cu index f68fa3e33..aa0f63d18 100644 --- a/cpp/src/linear_programming/pdlp.cu +++ b/cpp/src/linear_programming/pdlp.cu @@ -2086,6 +2086,9 @@ optimization_problem_solution_t pdlp_solver_t::run_solver(co initial_scaling_strategy_.scale_problem(); + // Update FP32 matrix copies for mixed precision SpMV after scaling + pdhg_solver_.get_cusparse_view().update_mixed_precision_matrices(); + if (!settings_.hyper_params.compute_initial_step_size_before_scaling && !settings_.get_initial_step_size().has_value()) compute_initial_step_size(); diff --git a/cpp/src/linear_programming/pdlp_constants.hpp b/cpp/src/linear_programming/pdlp_constants.hpp index cf17cc985..598b79ea6 100644 --- a/cpp/src/linear_programming/pdlp_constants.hpp +++ b/cpp/src/linear_programming/pdlp_constants.hpp @@ -71,4 +71,11 @@ template <> inline constexpr double safe_guard_for_extreme_values_in_primal_weight_computation = 1.0e-10; +// Mixed precision SpMV configuration for PDLP Stable3 mode +// When enabled, the constraint matrix A (and its transpose) are stored in FP32 +// while vectors and compute types remain in FP64 during PDHG iterations. +// This is NOT used during convergence checking which stays in full FP64. +// This only applies when f_t = double (FP64 mode). +inline constexpr bool enable_mixed_precision_spmv = true; + } // namespace cuopt::linear_programming::detail From ea37dd237d9245dca5a8b08a87de8ee6d10472f3 Mon Sep 17 00:00:00 2001 From: Nicolas Blin Date: Fri, 27 Feb 2026 14:33:40 +0100 Subject: [PATCH 05/10] cleanup and doc --- .../linear_programming/cuopt/run_pdlp.cu | 9 +- .../pdlp/solver_settings.hpp | 9 + cpp/src/pdlp/cpu_pdlp_warm_start_data.cu | 12 +- cpp/src/pdlp/cusparse_view.cu | 192 +++++++++--------- cpp/src/pdlp/cusparse_view.hpp | 6 +- cpp/src/pdlp/pdhg.cu | 94 +++++---- cpp/src/pdlp/pdhg.hpp | 3 +- cpp/src/pdlp/pdlp.cu | 3 +- cpp/src/pdlp/pdlp_constants.hpp | 7 - cpp/src/pdlp/solution_conversion.cu | 44 ++-- cpp/tests/linear_programming/pdlp_test.cu | 37 +++- docs/cuopt/source/lp-qp-features.rst | 14 ++ docs/cuopt/source/lp-qp-milp-settings.rst | 24 +++ 13 files changed, 279 insertions(+), 175 deletions(-) diff --git a/benchmarks/linear_programming/cuopt/run_pdlp.cu b/benchmarks/linear_programming/cuopt/run_pdlp.cu index a2e51cfd5..6e793997f 100644 --- a/benchmarks/linear_programming/cuopt/run_pdlp.cu +++ b/benchmarks/linear_programming/cuopt/run_pdlp.cu @@ -78,7 +78,12 @@ static void parse_arguments(argparse::ArgumentParser& program) program.add_argument("--solution-path").help("Path where solution file will be generated"); program.add_argument("--pdlp-fp32") - .help("Use FP32 (float) precision instead of FP64 (double). Only PDLP method without presolve and crossover is supported.") + .help("Use FP32 (float) precision instead of FP64 (double). Only supported for PDLP method without crossover.") + .default_value(false) + .implicit_value(true); + + program.add_argument("--mixed-precision-spmv") + .help("Enable mixed precision SpMV (FP32 matrix, FP64 vectors) during PDHG iterations. Only supported for PDLP method in FP64.") .default_value(false) .implicit_value(true); } @@ -122,6 +127,8 @@ static cuopt::linear_programming::pdlp_solver_settings_t create_solver settings.method = static_cast(program.get("--method")); settings.crossover = program.get("--crossover"); settings.presolver = string_to_presolver(program.get("--presolver")); + settings.mixed_precision_spmv = + program.get("--mixed-precision-spmv"); return settings; } diff --git a/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp b/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp index 61906c286..59236d353 100644 --- a/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp +++ b/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp @@ -239,6 +239,15 @@ class pdlp_solver_settings_t { i_t ordering{-1}; i_t barrier_dual_initial_point{-1}; bool eliminate_dense_columns{true}; + /** + * @brief Enable mixed precision SpMV during PDHG iterations (FP64 mode only). + * + * When true, the constraint matrix A and its transpose are stored in FP32 while + * vectors and compute type remain in FP64, reducing memory bandwidth during SpMV. + * Convergence checking and restarts always use the full FP64 matrix, so this does + * not reduce overall memory usage. Has no effect in FP32 mode. + */ + bool mixed_precision_spmv{false}; bool save_best_primal_so_far{false}; bool first_primal_feasible{false}; presolver_t presolver{presolver_t::Default}; diff --git a/cpp/src/pdlp/cpu_pdlp_warm_start_data.cu b/cpp/src/pdlp/cpu_pdlp_warm_start_data.cu index f9a73dff0..dd196ab66 100644 --- a/cpp/src/pdlp/cpu_pdlp_warm_start_data.cu +++ b/cpp/src/pdlp/cpu_pdlp_warm_start_data.cu @@ -108,14 +108,14 @@ pdlp_warm_start_data_t convert_to_gpu_warmstart( return gpu_data; } -// Explicit template instantiations +#if MIP_INSTANTIATE_DOUBLE template cpu_pdlp_warm_start_data_t convert_to_cpu_warmstart( const pdlp_warm_start_data_t&, rmm::cuda_stream_view); - -template pdlp_warm_start_data_t convert_to_gpu_warmstart( - const cpu_pdlp_warm_start_data_t&, rmm::cuda_stream_view); - -#if MIP_INSTANTIATE_FLOAT + template pdlp_warm_start_data_t convert_to_gpu_warmstart( + const cpu_pdlp_warm_start_data_t&, rmm::cuda_stream_view); +#endif + +#if PDLP_INSTANTIATE_FLOAT template cpu_pdlp_warm_start_data_t convert_to_cpu_warmstart( const pdlp_warm_start_data_t&, rmm::cuda_stream_view); diff --git a/cpp/src/pdlp/cusparse_view.cu b/cpp/src/pdlp/cusparse_view.cu index fa652f929..80f94f783 100644 --- a/cpp/src/pdlp/cusparse_view.cu +++ b/cpp/src/pdlp/cusparse_view.cu @@ -21,10 +21,8 @@ #include #include -#include -#include +#include -// Functor for double-to-float conversion on GPU struct double_to_float_functor { __host__ __device__ float operator()(double val) const { return static_cast(val); } }; @@ -285,7 +283,8 @@ cusparse_view_t::cusparse_view_t( rmm::device_uvector& _potential_next_dual_solution, rmm::device_uvector& _reflected_primal_solution, const std::vector& climber_strategies, - const pdlp_hyper_params::pdlp_hyper_params_t& hyper_params) + const pdlp_hyper_params::pdlp_hyper_params_t& hyper_params, + bool enable_mixed_precision_spmv) : batch_mode_(climber_strategies.size() > 1), handle_ptr_(handle_ptr), A{}, @@ -597,60 +596,79 @@ cusparse_view_t::cusparse_view_t( } #endif - // Initialize mixed precision SpMV support - // Only when f_t = double and enable_mixed_precision_spmv is true - if constexpr (std::is_same_v && enable_mixed_precision_spmv) { - mixed_precision_enabled_ = true; - - // Create FP32 copies of A and A_T matrix values - A_float_.resize(op_problem_scaled.nnz, handle_ptr->get_stream()); - A_T_float_.resize(op_problem_scaled.nnz, handle_ptr->get_stream()); - - // Convert A values from double to float - thrust::transform(thrust::cuda::par.on(handle_ptr->get_stream()), - op_problem_scaled.coefficients.data(), - op_problem_scaled.coefficients.data() + op_problem_scaled.nnz, - A_float_.data(), - double_to_float_functor{}); - - // Convert A_T values from double to float - thrust::transform(thrust::cuda::par.on(handle_ptr->get_stream()), - A_T_.data(), - A_T_.data() + op_problem_scaled.nnz, - A_T_float_.data(), - double_to_float_functor{}); - - // Create FP32 matrix descriptors for mixed precision SpMV - RAFT_CUSPARSE_TRY(cusparseCreateCsr(&A_mixed_, - op_problem_scaled.n_constraints, - op_problem_scaled.n_variables, - op_problem_scaled.nnz, - const_cast(op_problem_scaled.offsets.data()), - const_cast(op_problem_scaled.variables.data()), - A_float_.data(), - CUSPARSE_INDEX_32I, - CUSPARSE_INDEX_32I, - CUSPARSE_INDEX_BASE_ZERO, - CUDA_R_32F)); - - RAFT_CUSPARSE_TRY(cusparseCreateCsr(&A_T_mixed_, - op_problem_scaled.n_variables, - op_problem_scaled.n_constraints, - op_problem_scaled.nnz, - const_cast(A_T_offsets_.data()), - const_cast(A_T_indices_.data()), - A_T_float_.data(), - CUSPARSE_INDEX_32I, - CUSPARSE_INDEX_32I, - CUSPARSE_INDEX_BASE_ZERO, - CUDA_R_32F)); - - // Compute buffer sizes for mixed precision SpMV - const rmm::device_scalar alpha_d{1.0, handle_ptr->get_stream()}; - const rmm::device_scalar beta_d{0.0, handle_ptr->get_stream()}; - - size_t buffer_size_non_transpose_mixed = - mixed_precision_spmv_buffersize(handle_ptr_->get_cusparse_handle(), + if constexpr (std::is_same_v) { + if (enable_mixed_precision_spmv) { + mixed_precision_enabled_ = true; + + A_float_.resize(op_problem_scaled.nnz, handle_ptr->get_stream()); + A_T_float_.resize(op_problem_scaled.nnz, handle_ptr->get_stream()); + + cub::DeviceTransform::Transform(op_problem_scaled.coefficients.data(), + A_float_.data(), + op_problem_scaled.nnz, + double_to_float_functor{}, + handle_ptr->get_stream().value()); + + cub::DeviceTransform::Transform(A_T_.data(), + A_T_float_.data(), + op_problem_scaled.nnz, + double_to_float_functor{}, + handle_ptr->get_stream().value()); + + RAFT_CUSPARSE_TRY(cusparseCreateCsr(&A_mixed_, + op_problem_scaled.n_constraints, + op_problem_scaled.n_variables, + op_problem_scaled.nnz, + const_cast(op_problem_scaled.offsets.data()), + const_cast(op_problem_scaled.variables.data()), + A_float_.data(), + CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_BASE_ZERO, + CUDA_R_32F)); + + RAFT_CUSPARSE_TRY(cusparseCreateCsr(&A_T_mixed_, + op_problem_scaled.n_variables, + op_problem_scaled.n_constraints, + op_problem_scaled.nnz, + const_cast(A_T_offsets_.data()), + const_cast(A_T_indices_.data()), + A_T_float_.data(), + CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_BASE_ZERO, + CUDA_R_32F)); + + const rmm::device_scalar alpha_d{1.0, handle_ptr->get_stream()}; + const rmm::device_scalar beta_d{0.0, handle_ptr->get_stream()}; + + size_t buffer_size_non_transpose_mixed = + mixed_precision_spmv_buffersize(handle_ptr_->get_cusparse_handle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, + alpha_d.data(), + A_mixed_, + c, + beta_d.data(), + dual_solution, + CUSPARSE_SPMV_CSR_ALG2, + handle_ptr->get_stream()); + buffer_non_transpose_mixed_.resize(buffer_size_non_transpose_mixed, + handle_ptr->get_stream()); + + size_t buffer_size_transpose_mixed = + mixed_precision_spmv_buffersize(handle_ptr_->get_cusparse_handle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, + alpha_d.data(), + A_T_mixed_, + dual_solution, + beta_d.data(), + c, + CUSPARSE_SPMV_CSR_ALG2, + handle_ptr->get_stream()); + buffer_transpose_mixed_.resize(buffer_size_transpose_mixed, handle_ptr->get_stream()); + +#if CUDA_VER_12_4_UP + mixed_precision_spmv_preprocess(handle_ptr_->get_cusparse_handle(), CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_d.data(), A_mixed_, @@ -658,11 +676,10 @@ cusparse_view_t::cusparse_view_t( beta_d.data(), dual_solution, CUSPARSE_SPMV_CSR_ALG2, + buffer_non_transpose_mixed_.data(), handle_ptr->get_stream()); - buffer_non_transpose_mixed_.resize(buffer_size_non_transpose_mixed, handle_ptr->get_stream()); - size_t buffer_size_transpose_mixed = - mixed_precision_spmv_buffersize(handle_ptr_->get_cusparse_handle(), + mixed_precision_spmv_preprocess(handle_ptr_->get_cusparse_handle(), CUSPARSE_OPERATION_NON_TRANSPOSE, alpha_d.data(), A_T_mixed_, @@ -670,33 +687,10 @@ cusparse_view_t::cusparse_view_t( beta_d.data(), c, CUSPARSE_SPMV_CSR_ALG2, + buffer_transpose_mixed_.data(), handle_ptr->get_stream()); - buffer_transpose_mixed_.resize(buffer_size_transpose_mixed, handle_ptr->get_stream()); - -#if CUDA_VER_12_4_UP - // Preprocess mixed precision SpMV - mixed_precision_spmv_preprocess(handle_ptr_->get_cusparse_handle(), - CUSPARSE_OPERATION_NON_TRANSPOSE, - alpha_d.data(), - A_mixed_, - c, - beta_d.data(), - dual_solution, - CUSPARSE_SPMV_CSR_ALG2, - buffer_non_transpose_mixed_.data(), - handle_ptr->get_stream()); - - mixed_precision_spmv_preprocess(handle_ptr_->get_cusparse_handle(), - CUSPARSE_OPERATION_NON_TRANSPOSE, - alpha_d.data(), - A_T_mixed_, - dual_solution, - beta_d.data(), - c, - CUSPARSE_SPMV_CSR_ALG2, - buffer_transpose_mixed_.data(), - handle_ptr->get_stream()); #endif + } } } @@ -1080,22 +1074,20 @@ cusparse_view_t::cusparse_view_t( template void cusparse_view_t::update_mixed_precision_matrices() { - if constexpr (std::is_same_v && enable_mixed_precision_spmv) { + if constexpr (std::is_same_v) { if (!mixed_precision_enabled_) { return; } - // The A_ and A_T_ references point to the scaled matrix data - // Update the FP32 copies with the scaled values - thrust::transform(thrust::cuda::par.on(handle_ptr_->get_stream()), - A_.data(), - A_.data() + A_.size(), - A_float_.data(), - double_to_float_functor{}); - - thrust::transform(thrust::cuda::par.on(handle_ptr_->get_stream()), - A_T_.data(), - A_T_.data() + A_T_.size(), - A_T_float_.data(), - double_to_float_functor{}); + cub::DeviceTransform::Transform(A_.data(), + A_float_.data(), + A_.size(), + double_to_float_functor{}, + handle_ptr_->get_stream().value()); + + cub::DeviceTransform::Transform(A_T_.data(), + A_T_float_.data(), + A_T_.size(), + double_to_float_functor{}, + handle_ptr_->get_stream().value()); handle_ptr_->get_stream().synchronize(); } diff --git a/cpp/src/pdlp/cusparse_view.hpp b/cpp/src/pdlp/cusparse_view.hpp index e97531860..4ada26d32 100644 --- a/cpp/src/pdlp/cusparse_view.hpp +++ b/cpp/src/pdlp/cusparse_view.hpp @@ -90,7 +90,8 @@ class cusparse_view_t { rmm::device_uvector& _potential_next_dual_solution, rmm::device_uvector& _reflected_primal_solution, const std::vector& climber_strategies, - const pdlp_hyper_params::pdlp_hyper_params_t& hyper_params); + const pdlp_hyper_params::pdlp_hyper_params_t& hyper_params, + bool enable_mixed_precision_spmv); cusparse_view_t(raft::handle_t const* handle_ptr, const problem_t& op_problem, @@ -196,7 +197,7 @@ class cusparse_view_t { const std::vector& climber_strategies_; // Mixed precision SpMV support (FP32 matrix with FP64 vectors/compute) - // Only used when enable_mixed_precision_spmv is true and f_t = double + // Only used when mixed_precision_enabled_ is true and f_t = double rmm::device_uvector A_float_; // FP32 copy of A values rmm::device_uvector A_T_float_; // FP32 copy of A_T values cusparseSpMatDescr_t A_mixed_{nullptr}; // FP32 matrix descriptor for A @@ -210,7 +211,6 @@ class cusparse_view_t { }; // Mixed precision SpMV: FP32 matrix with FP64 vectors and FP64 compute type -// This is used for PDHG iterations when enable_mixed_precision_spmv is true void mixed_precision_spmv(cusparseHandle_t handle, cusparseOperation_t opA, const double* alpha, diff --git a/cpp/src/pdlp/pdhg.cu b/cpp/src/pdlp/pdhg.cu index 6c83b171c..a606eea8a 100644 --- a/cpp/src/pdlp/pdhg.cu +++ b/cpp/src/pdlp/pdhg.cu @@ -41,7 +41,8 @@ pdhg_solver_t::pdhg_solver_t( bool is_legacy_batch_mode, // Batch mode with streams const std::vector& climber_strategies, const pdlp_hyper_params::pdlp_hyper_params_t& hyper_params, - const std::vector>& new_bounds) + const std::vector>& new_bounds, + bool enable_mixed_precision_spmv) : batch_mode_(climber_strategies.size() > 1), handle_ptr_(handle_ptr), stream_view_(handle_ptr_->get_stream()), @@ -77,7 +78,8 @@ pdhg_solver_t::pdhg_solver_t( potential_next_dual_solution_, reflected_primal_, climber_strategies, - hyper_params}, + hyper_params, + enable_mixed_precision_spmv}, reusable_device_scalar_value_1_{1.0, stream_view_}, reusable_device_scalar_value_0_{0.0, stream_view_}, reusable_device_scalar_value_neg_1_{f_t(-1.0), stream_view_}, @@ -249,26 +251,28 @@ void pdhg_solver_t::compute_next_dual_solution(rmm::device_uvector && enable_mixed_precision_spmv) { - // Mixed precision SpMV: FP32 matrix with FP64 vectors - mixed_precision_spmv(handle_ptr_->get_cusparse_handle(), - CUSPARSE_OPERATION_NON_TRANSPOSE, - reusable_device_scalar_value_1_.data(), // 1 - cusparse_view_.A_mixed_, - cusparse_view_.tmp_primal, - reusable_device_scalar_value_0_.data(), // 0 - cusparse_view_.dual_gradient, - CUSPARSE_SPMV_CSR_ALG2, - cusparse_view_.buffer_non_transpose_mixed_.data(), - stream_view_); - } else { + if constexpr (std::is_same_v) { + if (cusparse_view_.mixed_precision_enabled_) { + mixed_precision_spmv(handle_ptr_->get_cusparse_handle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, + reusable_device_scalar_value_1_.data(), + cusparse_view_.A_mixed_, + cusparse_view_.tmp_primal, + reusable_device_scalar_value_0_.data(), + cusparse_view_.dual_gradient, + CUSPARSE_SPMV_CSR_ALG2, + cusparse_view_.buffer_non_transpose_mixed_.data(), + stream_view_); + } + } + if (!cusparse_view_.mixed_precision_enabled_) { RAFT_CUSPARSE_TRY( raft::sparse::detail::cusparsespmv(handle_ptr_->get_cusparse_handle(), CUSPARSE_OPERATION_NON_TRANSPOSE, - reusable_device_scalar_value_1_.data(), // 1 + reusable_device_scalar_value_1_.data(), cusparse_view_.A, cusparse_view_.tmp_primal, - reusable_device_scalar_value_0_.data(), // 0 + reusable_device_scalar_value_0_.data(), cusparse_view_.dual_gradient, CUSPARSE_SPMV_CSR_ALG2, (f_t*)cusparse_view_.buffer_non_transpose.data(), @@ -301,19 +305,21 @@ void pdhg_solver_t::compute_At_y() // A_t @ y if (!batch_mode_) { - if constexpr (std::is_same_v && enable_mixed_precision_spmv) { - // Mixed precision SpMV: FP32 matrix with FP64 vectors - mixed_precision_spmv(handle_ptr_->get_cusparse_handle(), - CUSPARSE_OPERATION_NON_TRANSPOSE, - reusable_device_scalar_value_1_.data(), - cusparse_view_.A_T_mixed_, - cusparse_view_.dual_solution, - reusable_device_scalar_value_0_.data(), - cusparse_view_.current_AtY, - CUSPARSE_SPMV_CSR_ALG2, - cusparse_view_.buffer_transpose_mixed_.data(), - stream_view_); - } else { + if constexpr (std::is_same_v) { + if (cusparse_view_.mixed_precision_enabled_) { + mixed_precision_spmv(handle_ptr_->get_cusparse_handle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, + reusable_device_scalar_value_1_.data(), + cusparse_view_.A_T_mixed_, + cusparse_view_.dual_solution, + reusable_device_scalar_value_0_.data(), + cusparse_view_.current_AtY, + CUSPARSE_SPMV_CSR_ALG2, + cusparse_view_.buffer_transpose_mixed_.data(), + stream_view_); + } + } + if (!cusparse_view_.mixed_precision_enabled_) { RAFT_CUSPARSE_TRY( raft::sparse::detail::cusparsespmv(handle_ptr_->get_cusparse_handle(), CUSPARSE_OPERATION_NON_TRANSPOSE, @@ -347,19 +353,21 @@ void pdhg_solver_t::compute_A_x() { // A @ x if (!batch_mode_) { - if constexpr (std::is_same_v && enable_mixed_precision_spmv) { - // Mixed precision SpMV: FP32 matrix with FP64 vectors - mixed_precision_spmv(handle_ptr_->get_cusparse_handle(), - CUSPARSE_OPERATION_NON_TRANSPOSE, - reusable_device_scalar_value_1_.data(), - cusparse_view_.A_mixed_, - cusparse_view_.reflected_primal_solution, - reusable_device_scalar_value_0_.data(), - cusparse_view_.dual_gradient, - CUSPARSE_SPMV_CSR_ALG2, - cusparse_view_.buffer_non_transpose_mixed_.data(), - stream_view_); - } else { + if constexpr (std::is_same_v) { + if (cusparse_view_.mixed_precision_enabled_) { + mixed_precision_spmv(handle_ptr_->get_cusparse_handle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, + reusable_device_scalar_value_1_.data(), + cusparse_view_.A_mixed_, + cusparse_view_.reflected_primal_solution, + reusable_device_scalar_value_0_.data(), + cusparse_view_.dual_gradient, + CUSPARSE_SPMV_CSR_ALG2, + cusparse_view_.buffer_non_transpose_mixed_.data(), + stream_view_); + } + } + if (!cusparse_view_.mixed_precision_enabled_) { RAFT_CUSPARSE_TRY( raft::sparse::detail::cusparsespmv(handle_ptr_->get_cusparse_handle(), CUSPARSE_OPERATION_NON_TRANSPOSE, diff --git a/cpp/src/pdlp/pdhg.hpp b/cpp/src/pdlp/pdhg.hpp index 8ff45ac0c..32722eae4 100644 --- a/cpp/src/pdlp/pdhg.hpp +++ b/cpp/src/pdlp/pdhg.hpp @@ -29,7 +29,8 @@ class pdhg_solver_t { bool is_legacy_batch_mode, const std::vector& climber_strategies, const pdlp_hyper_params::pdlp_hyper_params_t& hyper_params, - const std::vector>& new_bounds); + const std::vector>& new_bounds, + bool enable_mixed_precision_spmv = true); saddle_point_state_t& get_saddle_point_state(); cusparse_view_t& get_cusparse_view(); diff --git a/cpp/src/pdlp/pdlp.cu b/cpp/src/pdlp/pdlp.cu index a23e57f39..118f2a803 100644 --- a/cpp/src/pdlp/pdlp.cu +++ b/cpp/src/pdlp/pdlp.cu @@ -141,7 +141,8 @@ pdlp_solver_t::pdlp_solver_t(problem_t& op_problem, is_legacy_batch_mode, climber_strategies_, settings_.hyper_params, - settings_.new_bounds}, + settings_.new_bounds, + settings_.mixed_precision_spmv}, initial_scaling_strategy_{handle_ptr_, op_problem_scaled_, settings_.hyper_params.default_l_inf_ruiz_iterations, diff --git a/cpp/src/pdlp/pdlp_constants.hpp b/cpp/src/pdlp/pdlp_constants.hpp index 598b79ea6..cf17cc985 100644 --- a/cpp/src/pdlp/pdlp_constants.hpp +++ b/cpp/src/pdlp/pdlp_constants.hpp @@ -71,11 +71,4 @@ template <> inline constexpr double safe_guard_for_extreme_values_in_primal_weight_computation = 1.0e-10; -// Mixed precision SpMV configuration for PDLP Stable3 mode -// When enabled, the constraint matrix A (and its transpose) are stored in FP32 -// while vectors and compute types remain in FP64 during PDHG iterations. -// This is NOT used during convergence checking which stays in full FP64. -// This only applies when f_t = double (FP64 mode). -inline constexpr bool enable_mixed_precision_spmv = true; - } // namespace cuopt::linear_programming::detail diff --git a/cpp/src/pdlp/solution_conversion.cu b/cpp/src/pdlp/solution_conversion.cu index ea3c68126..a0d4f88ae 100644 --- a/cpp/src/pdlp/solution_conversion.cu +++ b/cpp/src/pdlp/solution_conversion.cu @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -134,6 +135,18 @@ cuopt::cython::mip_ret_t gpu_mip_solution_t::to_mip_ret_t() // CPU LP Solution Conversion // =========================== +namespace { +template +cuopt::cython::cpu_buffer to_cpu_buffer(std::vector& src) +{ + if constexpr (std::is_same_v) { + return std::move(src); + } else { + return cuopt::cython::cpu_buffer(src.begin(), src.end()); + } +} +} // namespace + template cuopt::cython::linear_programming_ret_t cpu_lp_solution_t::to_cpu_linear_programming_ret_t() @@ -142,22 +155,22 @@ cpu_lp_solution_t::to_cpu_linear_programming_ret_t() cuopt::cython::linear_programming_ret_t ret; cpu_solutions_t cpu; - cpu.primal_solution_ = std::move(primal_solution_); - cpu.dual_solution_ = std::move(dual_solution_); - cpu.reduced_cost_ = std::move(reduced_cost_); + cpu.primal_solution_ = to_cpu_buffer(primal_solution_); + cpu.dual_solution_ = to_cpu_buffer(dual_solution_); + cpu.reduced_cost_ = to_cpu_buffer(reduced_cost_); if (!pdlp_warm_start_data_.current_primal_solution_.empty()) { - cpu.current_primal_solution_ = std::move(pdlp_warm_start_data_.current_primal_solution_); - cpu.current_dual_solution_ = std::move(pdlp_warm_start_data_.current_dual_solution_); - cpu.initial_primal_average_ = std::move(pdlp_warm_start_data_.initial_primal_average_); - cpu.initial_dual_average_ = std::move(pdlp_warm_start_data_.initial_dual_average_); - cpu.current_ATY_ = std::move(pdlp_warm_start_data_.current_ATY_); - cpu.sum_primal_solutions_ = std::move(pdlp_warm_start_data_.sum_primal_solutions_); - cpu.sum_dual_solutions_ = std::move(pdlp_warm_start_data_.sum_dual_solutions_); + cpu.current_primal_solution_ = to_cpu_buffer(pdlp_warm_start_data_.current_primal_solution_); + cpu.current_dual_solution_ = to_cpu_buffer(pdlp_warm_start_data_.current_dual_solution_); + cpu.initial_primal_average_ = to_cpu_buffer(pdlp_warm_start_data_.initial_primal_average_); + cpu.initial_dual_average_ = to_cpu_buffer(pdlp_warm_start_data_.initial_dual_average_); + cpu.current_ATY_ = to_cpu_buffer(pdlp_warm_start_data_.current_ATY_); + cpu.sum_primal_solutions_ = to_cpu_buffer(pdlp_warm_start_data_.sum_primal_solutions_); + cpu.sum_dual_solutions_ = to_cpu_buffer(pdlp_warm_start_data_.sum_dual_solutions_); cpu.last_restart_duality_gap_primal_solution_ = - std::move(pdlp_warm_start_data_.last_restart_duality_gap_primal_solution_); + to_cpu_buffer(pdlp_warm_start_data_.last_restart_duality_gap_primal_solution_); cpu.last_restart_duality_gap_dual_solution_ = - std::move(pdlp_warm_start_data_.last_restart_duality_gap_dual_solution_); + to_cpu_buffer(pdlp_warm_start_data_.last_restart_duality_gap_dual_solution_); ret.initial_primal_weight_ = pdlp_warm_start_data_.initial_primal_weight_; ret.initial_step_size_ = pdlp_warm_start_data_.initial_step_size_; @@ -222,4 +235,11 @@ template cuopt::cython::linear_programming_ret_t cpu_lp_solution_t::to_cpu_linear_programming_ret_t(); template cuopt::cython::mip_ret_t cpu_mip_solution_t::to_cpu_mip_ret_t(); +#if PDLP_INSTANTIATE_FLOAT +template cuopt::cython::linear_programming_ret_t +gpu_lp_solution_t::to_linear_programming_ret_t(); +template cuopt::cython::linear_programming_ret_t +cpu_lp_solution_t::to_cpu_linear_programming_ret_t(); +#endif + } // namespace cuopt::linear_programming diff --git a/cpp/tests/linear_programming/pdlp_test.cu b/cpp/tests/linear_programming/pdlp_test.cu index 3736a796c..8f47cef65 100644 --- a/cpp/tests/linear_programming/pdlp_test.cu +++ b/cpp/tests/linear_programming/pdlp_test.cu @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include #include @@ -77,6 +77,41 @@ TEST(pdlp_class, run_double) afiro_primal_objective, solution.get_additional_termination_information().primal_objective)); } +TEST(pdlp_class, mixed_precision_spmv) +{ + const raft::handle_t handle_{}; + + auto path = make_path_absolute("linear_programming/afiro_original.mps"); + cuopt::mps_parser::mps_data_model_t op_problem = + cuopt::mps_parser::parse_mps(path, true); + + auto settings_mixed = pdlp_solver_settings_t{}; + settings_mixed.method = cuopt::linear_programming::method_t::PDLP; + settings_mixed.mixed_precision_spmv = true; + + optimization_problem_solution_t solution_mixed = + solve_lp(&handle_, op_problem, settings_mixed); + EXPECT_EQ((int)solution_mixed.get_termination_status(), CUOPT_TERIMINATION_STATUS_OPTIMAL); + EXPECT_FALSE(is_incorrect_objective( + afiro_primal_objective, + solution_mixed.get_additional_termination_information().primal_objective)); + + auto settings_full = pdlp_solver_settings_t{}; + settings_full.method = cuopt::linear_programming::method_t::PDLP; + settings_full.mixed_precision_spmv = false; + + optimization_problem_solution_t solution_full = + solve_lp(&handle_, op_problem, settings_full); + EXPECT_EQ((int)solution_full.get_termination_status(), CUOPT_TERIMINATION_STATUS_OPTIMAL); + EXPECT_FALSE(is_incorrect_objective( + afiro_primal_objective, + solution_full.get_additional_termination_information().primal_objective)); + + EXPECT_NEAR(solution_mixed.get_additional_termination_information().primal_objective, + solution_full.get_additional_termination_information().primal_objective, + 1e-2); +} + TEST(pdlp_class, run_double_very_low_accuracy) { const raft::handle_t handle_{}; diff --git a/docs/cuopt/source/lp-qp-features.rst b/docs/cuopt/source/lp-qp-features.rst index 4bd178ed5..949599876 100644 --- a/docs/cuopt/source/lp-qp-features.rst +++ b/docs/cuopt/source/lp-qp-features.rst @@ -157,6 +157,20 @@ Batch Mode Users can submit a set of problems which will be solved in a batch. Problems will be solved at the same time in parallel to fully utilize the GPU. Checkout :ref:`self-hosted client ` example in thin client. +FP32 Precision Mode +------------------- + +By default, PDLP operates in FP64 (double) precision. Users can switch to FP32 (float) precision for the entire solve. FP32 uses half the memory of FP64 and allows PDHG iterations to be on average twice as fast, but it may require more iterations to converge due to reduced numerical accuracy. FP32 mode is only supported with the PDLP method (not concurrent) and without crossover. + +.. note:: The default precision is FP64 (double). + +Mixed Precision SpMV +-------------------- + +When running PDLP in FP64 mode, users can enable mixed precision sparse matrix-vector products (SpMV) during PDHG iterations. In this mode, the constraint matrix and its transpose are stored in FP32 while vectors and the compute type remain in FP64. This allows SpMV operations to be faster thanks to reduced memory bandwidth requirements, while maintaining FP64 accuracy in the accumulation. This will make PDHG iterations faster while limiting the potential negative impact on convergence (compared to running in FP32 mode). Convergence checking and restart logic always use the full FP64 matrix, so this mode does not reduce memory usage since both the FP32 and FP64 copies of the matrix are kept in memory. Mixed precision SpMV only applies in FP64 mode and has no effect when running in FP32. + +.. note:: The default value is false. + Multi-GPU Mode -------------- diff --git a/docs/cuopt/source/lp-qp-milp-settings.rst b/docs/cuopt/source/lp-qp-milp-settings.rst index bd1372f70..de3de5c85 100644 --- a/docs/cuopt/source/lp-qp-milp-settings.rst +++ b/docs/cuopt/source/lp-qp-milp-settings.rst @@ -192,6 +192,30 @@ Per Constraint Residual .. note:: The default value is false. +FP32 Precision +^^^^^^^^^^^^^^ + +``CUOPT_PDLP_FP32`` controls whether PDLP should run in FP32 (float) precision instead of FP64 (double). +FP32 uses half the memory of FP64 and allows PDHG iterations to be on average twice as fast, +but it may require more iterations to converge due to reduced numerical accuracy. +FP32 mode is only supported with the PDLP method (not concurrent) and without crossover. + +.. note:: The default precision is FP64 (double). + +Mixed Precision SpMV +^^^^^^^^^^^^^^^^^^^^ + +``CUOPT_MIXED_PRECISION_SPMV`` controls whether PDLP should use mixed precision sparse matrix-vector +products (SpMV) during PDHG iterations. When enabled, the constraint matrix and its transpose are stored +in FP32 while vectors and the compute type remain in FP64. This allows SpMV operations to be faster +thanks to reduced memory bandwidth requirements, while maintaining FP64 accuracy in the accumulation. +This will make PDHG iterations faster while limiting the potential negative impact on convergence +(compared to running in FP32 mode). Convergence checking and restart logic always use the full FP64 +matrix, so this mode does not reduce memory usage since both the FP32 and FP64 copies of the matrix +are kept in memory. Mixed precision SpMV only applies in FP64 mode and has no effect when running in FP32. + +.. note:: The default value is false. + Barrier Solver Settings ^^^^^^^^^^^^^^^^^^^^^^^^ From f8f673a02a5cf5b4aea7bdca07115ec5e12b97d9 Mon Sep 17 00:00:00 2001 From: Nicolas Blin Date: Fri, 27 Feb 2026 14:50:26 +0100 Subject: [PATCH 06/10] update doc --- docs/cuopt/source/lp-qp-milp-settings.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/cuopt/source/lp-qp-milp-settings.rst b/docs/cuopt/source/lp-qp-milp-settings.rst index de3de5c85..4a8c4c44a 100644 --- a/docs/cuopt/source/lp-qp-milp-settings.rst +++ b/docs/cuopt/source/lp-qp-milp-settings.rst @@ -198,6 +198,7 @@ FP32 Precision ``CUOPT_PDLP_FP32`` controls whether PDLP should run in FP32 (float) precision instead of FP64 (double). FP32 uses half the memory of FP64 and allows PDHG iterations to be on average twice as fast, but it may require more iterations to converge due to reduced numerical accuracy. +For an alternative that maintains FP64 accuracy while improving performance, see :ref:`Mixed Precision SpMV`. FP32 mode is only supported with the PDLP method (not concurrent) and without crossover. .. note:: The default precision is FP64 (double). From 64c54571de322714c729e54c8cad11a463602783 Mon Sep 17 00:00:00 2001 From: Nicolas Blin Date: Fri, 27 Feb 2026 15:02:02 +0100 Subject: [PATCH 07/10] style cleanup --- .../linear_programming/cuopt/run_pdlp.cu | 18 ++- cpp/src/math_optimization/solution_writer.hpp | 2 +- cpp/src/mip_heuristics/mip_constants.hpp | 2 +- .../presolve/third_party_presolve.cpp | 24 ++- cpp/src/pdlp/cpu_pdlp_warm_start_data.cu | 6 +- cpp/src/pdlp/cusparse_view.cu | 3 +- cpp/src/pdlp/cusparse_view.hpp | 8 +- cpp/src/pdlp/pdlp.cu | 148 +++++++++--------- cpp/src/pdlp/solve.cu | 105 +++++++------ cpp/tests/linear_programming/pdlp_test.cu | 32 ++-- 10 files changed, 177 insertions(+), 171 deletions(-) diff --git a/benchmarks/linear_programming/cuopt/run_pdlp.cu b/benchmarks/linear_programming/cuopt/run_pdlp.cu index 6e793997f..2166a6474 100644 --- a/benchmarks/linear_programming/cuopt/run_pdlp.cu +++ b/benchmarks/linear_programming/cuopt/run_pdlp.cu @@ -78,12 +78,16 @@ static void parse_arguments(argparse::ArgumentParser& program) program.add_argument("--solution-path").help("Path where solution file will be generated"); program.add_argument("--pdlp-fp32") - .help("Use FP32 (float) precision instead of FP64 (double). Only supported for PDLP method without crossover.") + .help( + "Use FP32 (float) precision instead of FP64 (double). Only supported for PDLP method without " + "crossover.") .default_value(false) .implicit_value(true); program.add_argument("--mixed-precision-spmv") - .help("Enable mixed precision SpMV (FP32 matrix, FP64 vectors) during PDHG iterations. Only supported for PDLP method in FP64.") + .help( + "Enable mixed precision SpMV (FP32 matrix, FP64 vectors) during PDHG iterations. Only " + "supported for PDLP method in FP64.") .default_value(false) .implicit_value(true); } @@ -121,14 +125,14 @@ static cuopt::linear_programming::pdlp_solver_settings_t create_solver settings.time_limit = static_cast(program.get("--time-limit")); settings.iteration_limit = program.get("--iteration-limit"); - settings.set_optimality_tolerance(static_cast(program.get("--optimality-tolerance"))); + settings.set_optimality_tolerance( + static_cast(program.get("--optimality-tolerance"))); settings.pdlp_solver_mode = string_to_pdlp_solver_mode(program.get("--pdlp-solver-mode")); settings.method = static_cast(program.get("--method")); - settings.crossover = program.get("--crossover"); - settings.presolver = string_to_presolver(program.get("--presolver")); - settings.mixed_precision_spmv = - program.get("--mixed-precision-spmv"); + settings.crossover = program.get("--crossover"); + settings.presolver = string_to_presolver(program.get("--presolver")); + settings.mixed_precision_spmv = program.get("--mixed-precision-spmv"); return settings; } diff --git a/cpp/src/math_optimization/solution_writer.hpp b/cpp/src/math_optimization/solution_writer.hpp index e187f6431..0ac1b6446 100644 --- a/cpp/src/math_optimization/solution_writer.hpp +++ b/cpp/src/math_optimization/solution_writer.hpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ diff --git a/cpp/src/mip_heuristics/mip_constants.hpp b/cpp/src/mip_heuristics/mip_constants.hpp index cf04df9b0..47d3d22de 100644 --- a/cpp/src/mip_heuristics/mip_constants.hpp +++ b/cpp/src/mip_heuristics/mip_constants.hpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ diff --git a/cpp/src/mip_heuristics/presolve/third_party_presolve.cpp b/cpp/src/mip_heuristics/presolve/third_party_presolve.cpp index cdc7da4ec..bee0291b7 100644 --- a/cpp/src/mip_heuristics/presolve/third_party_presolve.cpp +++ b/cpp/src/mip_heuristics/presolve/third_party_presolve.cpp @@ -371,14 +371,14 @@ optimization_problem_t build_optimization_problem_from_pslp( op_problem.set_csr_constraint_matrix( reduced_prob->Ax, nnz, reduced_prob->Ai, nnz, reduced_prob->Ap, n_rows + 1); - std::vector h_obj_coeffs(n_cols); - std::copy(reduced_prob->c, reduced_prob->c + n_cols, h_obj_coeffs.begin()); - if (maximize) { - for (size_t i = 0; i < n_cols; ++i) { - h_obj_coeffs[i] = -h_obj_coeffs[i]; - } + std::vector h_obj_coeffs(n_cols); + std::copy(reduced_prob->c, reduced_prob->c + n_cols, h_obj_coeffs.begin()); + if (maximize) { + for (size_t i = 0; i < n_cols; ++i) { + h_obj_coeffs[i] = -h_obj_coeffs[i]; } - op_problem.set_objective_coefficients(h_obj_coeffs.data(), n_cols); + } + op_problem.set_objective_coefficients(h_obj_coeffs.data(), n_cols); op_problem.set_constraint_lower_bounds(reduced_prob->lhs, n_rows); op_problem.set_constraint_upper_bounds(reduced_prob->rhs, n_rows); op_problem.set_variable_lower_bounds(reduced_prob->lbs, n_cols); @@ -498,8 +498,8 @@ optimization_problem_t build_optimization_problem( i_t nnz = constraint_matrix.getNnz(); assert(offsets[nrows] == nnz); - const int* cols = constraint_matrix.getConstraintMatrix().getColumns(); - const f_t* coeffs = constraint_matrix.getConstraintMatrix().getValues(); + const int* cols = constraint_matrix.getConstraintMatrix().getColumns(); + const f_t* coeffs = constraint_matrix.getConstraintMatrix().getValues(); op_problem.set_csr_constraint_matrix( &(coeffs[start]), nnz, &(cols[start]), nnz, offsets.data(), nrows + 1); @@ -566,8 +566,7 @@ void set_presolve_methods(papilo::Presolve& presolver, if (category == problem_category_t::MIP) { // cuOpt custom GF2 presolver - presolver.addPresolveMethod( - uptr(new cuopt::linear_programming::detail::GF2Presolve())); + presolver.addPresolveMethod(uptr(new cuopt::linear_programming::detail::GF2Presolve())); } // fast presolvers presolver.addPresolveMethod(uptr(new papilo::SingletonCols())); @@ -773,8 +772,7 @@ void third_party_presolve_t::undo(rmm::device_uvector& primal_sol std::vector dual_sol_vec_h(dual_solution.size()); raft::copy(dual_sol_vec_h.data(), dual_solution.data(), dual_solution.size(), stream_view); std::vector reduced_costs_vec_h(reduced_costs.size()); - raft::copy( - reduced_costs_vec_h.data(), reduced_costs.data(), reduced_costs.size(), stream_view); + raft::copy(reduced_costs_vec_h.data(), reduced_costs.data(), reduced_costs.size(), stream_view); papilo::Solution reduced_sol(primal_sol_vec_h); if (dual_postsolve) { diff --git a/cpp/src/pdlp/cpu_pdlp_warm_start_data.cu b/cpp/src/pdlp/cpu_pdlp_warm_start_data.cu index dd196ab66..603805605 100644 --- a/cpp/src/pdlp/cpu_pdlp_warm_start_data.cu +++ b/cpp/src/pdlp/cpu_pdlp_warm_start_data.cu @@ -111,10 +111,10 @@ pdlp_warm_start_data_t convert_to_gpu_warmstart( #if MIP_INSTANTIATE_DOUBLE template cpu_pdlp_warm_start_data_t convert_to_cpu_warmstart( const pdlp_warm_start_data_t&, rmm::cuda_stream_view); - template pdlp_warm_start_data_t convert_to_gpu_warmstart( - const cpu_pdlp_warm_start_data_t&, rmm::cuda_stream_view); +template pdlp_warm_start_data_t convert_to_gpu_warmstart( + const cpu_pdlp_warm_start_data_t&, rmm::cuda_stream_view); #endif - + #if PDLP_INSTANTIATE_FLOAT template cpu_pdlp_warm_start_data_t convert_to_cpu_warmstart( const pdlp_warm_start_data_t&, rmm::cuda_stream_view); diff --git a/cpp/src/pdlp/cusparse_view.cu b/cpp/src/pdlp/cusparse_view.cu index 80f94f783..00903c986 100644 --- a/cpp/src/pdlp/cusparse_view.cu +++ b/cpp/src/pdlp/cusparse_view.cu @@ -652,8 +652,7 @@ cusparse_view_t::cusparse_view_t( dual_solution, CUSPARSE_SPMV_CSR_ALG2, handle_ptr->get_stream()); - buffer_non_transpose_mixed_.resize(buffer_size_non_transpose_mixed, - handle_ptr->get_stream()); + buffer_non_transpose_mixed_.resize(buffer_size_non_transpose_mixed, handle_ptr->get_stream()); size_t buffer_size_transpose_mixed = mixed_precision_spmv_buffersize(handle_ptr_->get_cusparse_handle(), diff --git a/cpp/src/pdlp/cusparse_view.hpp b/cpp/src/pdlp/cusparse_view.hpp index 4ada26d32..2eb3358fe 100644 --- a/cpp/src/pdlp/cusparse_view.hpp +++ b/cpp/src/pdlp/cusparse_view.hpp @@ -198,10 +198,10 @@ class cusparse_view_t { // Mixed precision SpMV support (FP32 matrix with FP64 vectors/compute) // Only used when mixed_precision_enabled_ is true and f_t = double - rmm::device_uvector A_float_; // FP32 copy of A values - rmm::device_uvector A_T_float_; // FP32 copy of A_T values - cusparseSpMatDescr_t A_mixed_{nullptr}; // FP32 matrix descriptor for A - cusparseSpMatDescr_t A_T_mixed_{nullptr}; // FP32 matrix descriptor for A_T + rmm::device_uvector A_float_; // FP32 copy of A values + rmm::device_uvector A_T_float_; // FP32 copy of A_T values + cusparseSpMatDescr_t A_mixed_{nullptr}; // FP32 matrix descriptor for A + cusparseSpMatDescr_t A_T_mixed_{nullptr}; // FP32 matrix descriptor for A_T rmm::device_uvector buffer_non_transpose_mixed_; // SpMV buffer for mixed precision A rmm::device_uvector buffer_transpose_mixed_; // SpMV buffer for mixed precision A_T bool mixed_precision_enabled_{false}; diff --git a/cpp/src/pdlp/pdlp.cu b/cpp/src/pdlp/pdlp.cu index 118f2a803..20fc4d1ce 100644 --- a/cpp/src/pdlp/pdlp.cu +++ b/cpp/src/pdlp/pdlp.cu @@ -1978,49 +1978,49 @@ void pdlp_solver_t::transpose_primal_dual_to_row( rmm::device_uvector dual_slack_transposed( is_dual_slack_empty ? 0 : primal_size_h_ * climber_strategies_.size(), stream_view_); -RAFT_CUBLAS_TRY(cublasSetStream(handle_ptr_->get_cublas_handle(), stream_view_)); + RAFT_CUBLAS_TRY(cublasSetStream(handle_ptr_->get_cublas_handle(), stream_view_)); CUBLAS_CHECK(cublasGeam(handle_ptr_->get_cublas_handle(), - CUBLAS_OP_T, - CUBLAS_OP_N, - climber_strategies_.size(), - primal_size_h_, - reusable_device_scalar_value_1_.data(), - primal_to_transpose.data(), - primal_size_h_, - reusable_device_scalar_value_0_.data(), - nullptr, - climber_strategies_.size(), - primal_transposed.data(), - climber_strategies_.size())); + CUBLAS_OP_T, + CUBLAS_OP_N, + climber_strategies_.size(), + primal_size_h_, + reusable_device_scalar_value_1_.data(), + primal_to_transpose.data(), + primal_size_h_, + reusable_device_scalar_value_0_.data(), + nullptr, + climber_strategies_.size(), + primal_transposed.data(), + climber_strategies_.size())); if (!is_dual_slack_empty) { CUBLAS_CHECK(cublasGeam(handle_ptr_->get_cublas_handle(), - CUBLAS_OP_T, - CUBLAS_OP_N, - climber_strategies_.size(), - primal_size_h_, - reusable_device_scalar_value_1_.data(), - dual_slack_to_transpose.data(), - primal_size_h_, - reusable_device_scalar_value_0_.data(), - nullptr, - climber_strategies_.size(), - dual_slack_transposed.data(), - climber_strategies_.size())); + CUBLAS_OP_T, + CUBLAS_OP_N, + climber_strategies_.size(), + primal_size_h_, + reusable_device_scalar_value_1_.data(), + dual_slack_to_transpose.data(), + primal_size_h_, + reusable_device_scalar_value_0_.data(), + nullptr, + climber_strategies_.size(), + dual_slack_transposed.data(), + climber_strategies_.size())); } CUBLAS_CHECK(cublasGeam(handle_ptr_->get_cublas_handle(), - CUBLAS_OP_T, - CUBLAS_OP_N, - climber_strategies_.size(), - dual_size_h_, - reusable_device_scalar_value_1_.data(), - dual_to_transpose.data(), - dual_size_h_, - reusable_device_scalar_value_0_.data(), - nullptr, - climber_strategies_.size(), - dual_transposed.data(), - climber_strategies_.size())); + CUBLAS_OP_T, + CUBLAS_OP_N, + climber_strategies_.size(), + dual_size_h_, + reusable_device_scalar_value_1_.data(), + dual_to_transpose.data(), + dual_size_h_, + reusable_device_scalar_value_0_.data(), + nullptr, + climber_strategies_.size(), + dual_transposed.data(), + climber_strategies_.size())); // Copy that holds the tranpose to the original vector raft::copy(primal_to_transpose.data(), @@ -2055,50 +2055,50 @@ void pdlp_solver_t::transpose_primal_dual_back_to_col( rmm::device_uvector dual_slack_transposed( is_dual_slack_empty ? 0 : primal_size_h_ * climber_strategies_.size(), stream_view_); - RAFT_CUBLAS_TRY(cublasSetStream(handle_ptr_->get_cublas_handle(), stream_view_)); + RAFT_CUBLAS_TRY(cublasSetStream(handle_ptr_->get_cublas_handle(), stream_view_)); CUBLAS_CHECK(cublasGeam(handle_ptr_->get_cublas_handle(), - CUBLAS_OP_T, - CUBLAS_OP_N, - primal_size_h_, - climber_strategies_.size(), - reusable_device_scalar_value_1_.data(), - primal_to_transpose.data(), - climber_strategies_.size(), - reusable_device_scalar_value_0_.data(), - nullptr, - primal_size_h_, - primal_transposed.data(), - primal_size_h_)); + CUBLAS_OP_T, + CUBLAS_OP_N, + primal_size_h_, + climber_strategies_.size(), + reusable_device_scalar_value_1_.data(), + primal_to_transpose.data(), + climber_strategies_.size(), + reusable_device_scalar_value_0_.data(), + nullptr, + primal_size_h_, + primal_transposed.data(), + primal_size_h_)); if (!is_dual_slack_empty) { CUBLAS_CHECK(cublasGeam(handle_ptr_->get_cublas_handle(), - CUBLAS_OP_T, - CUBLAS_OP_N, - primal_size_h_, - climber_strategies_.size(), - reusable_device_scalar_value_1_.data(), - dual_slack_to_transpose.data(), - climber_strategies_.size(), - reusable_device_scalar_value_0_.data(), - nullptr, - primal_size_h_, - dual_slack_transposed.data(), - primal_size_h_)); + CUBLAS_OP_T, + CUBLAS_OP_N, + primal_size_h_, + climber_strategies_.size(), + reusable_device_scalar_value_1_.data(), + dual_slack_to_transpose.data(), + climber_strategies_.size(), + reusable_device_scalar_value_0_.data(), + nullptr, + primal_size_h_, + dual_slack_transposed.data(), + primal_size_h_)); } CUBLAS_CHECK(cublasGeam(handle_ptr_->get_cublas_handle(), - CUBLAS_OP_T, - CUBLAS_OP_N, - dual_size_h_, - climber_strategies_.size(), - reusable_device_scalar_value_1_.data(), - dual_to_transpose.data(), - climber_strategies_.size(), - reusable_device_scalar_value_0_.data(), - nullptr, - dual_size_h_, - dual_transposed.data(), - dual_size_h_)); + CUBLAS_OP_T, + CUBLAS_OP_N, + dual_size_h_, + climber_strategies_.size(), + reusable_device_scalar_value_1_.data(), + dual_to_transpose.data(), + climber_strategies_.size(), + reusable_device_scalar_value_0_.data(), + nullptr, + dual_size_h_, + dual_transposed.data(), + dual_size_h_)); // Copy that holds the tranpose to the original vector raft::copy(primal_to_transpose.data(), diff --git a/cpp/src/pdlp/solve.cu b/cpp/src/pdlp/solve.cu index 850a19c6d..f7f7a5d63 100644 --- a/cpp/src/pdlp/solve.cu +++ b/cpp/src/pdlp/solve.cu @@ -606,57 +606,63 @@ optimization_problem_solution_t run_pdlp(detail::problem_t& if constexpr (!std::is_same_v) { cuopt_expects(!settings.crossover, - error_type_t::ValidationError, - "PDLP with crossover is not supported for float precision. Set crossover=false or use double precision."); - } else { - const bool do_crossover = settings.crossover; - i_t crossover_info = 0; + error_type_t::ValidationError, + "PDLP with crossover is not supported for float precision. Set crossover=false " + "or use double precision."); + } else { + const bool do_crossover = settings.crossover; + i_t crossover_info = 0; if (do_crossover && sol.get_termination_status() == pdlp_termination_status_t::Optimal) { crossover_info = -1; - dual_simplex::lp_problem_t lp(problem.handle_ptr, 1, 1, 1); - dual_simplex::lp_solution_t initial_solution(1, 1); - translate_to_crossover_problem(problem, sol, lp, initial_solution); - dual_simplex::simplex_solver_settings_t dual_simplex_settings; - dual_simplex_settings.time_limit = settings.time_limit; - dual_simplex_settings.iteration_limit = settings.iteration_limit; - dual_simplex_settings.concurrent_halt = settings.concurrent_halt; - dual_simplex::lp_solution_t vertex_solution(lp.num_rows, lp.num_cols); - std::vector vstatus(lp.num_cols); - dual_simplex::crossover_status_t crossover_status = dual_simplex::crossover( - lp, dual_simplex_settings, initial_solution, timer.get_tic_start(), vertex_solution, vstatus); - pdlp_termination_status_t termination_status = pdlp_termination_status_t::TimeLimit; - auto to_termination_status = [](dual_simplex::crossover_status_t status) { - switch (status) { - case dual_simplex::crossover_status_t::OPTIMAL: return pdlp_termination_status_t::Optimal; - case dual_simplex::crossover_status_t::PRIMAL_FEASIBLE: - return pdlp_termination_status_t::PrimalFeasible; - case dual_simplex::crossover_status_t::DUAL_FEASIBLE: - return pdlp_termination_status_t::NumericalError; - case dual_simplex::crossover_status_t::NUMERICAL_ISSUES: - return pdlp_termination_status_t::NumericalError; - case dual_simplex::crossover_status_t::CONCURRENT_LIMIT: - return pdlp_termination_status_t::ConcurrentLimit; - case dual_simplex::crossover_status_t::TIME_LIMIT: - return pdlp_termination_status_t::TimeLimit; - default: return pdlp_termination_status_t::NumericalError; - } - }; - termination_status = to_termination_status(crossover_status); - if (crossover_status == dual_simplex::crossover_status_t::OPTIMAL) { crossover_info = 0; } - rmm::device_uvector final_primal_solution = - cuopt::device_copy(vertex_solution.x, problem.handle_ptr->get_stream()); - rmm::device_uvector final_dual_solution = - cuopt::device_copy(vertex_solution.y, problem.handle_ptr->get_stream()); - rmm::device_uvector final_reduced_cost = - cuopt::device_copy(vertex_solution.z, problem.handle_ptr->get_stream()); - problem.handle_ptr->sync_stream(); - // Negate dual variables and reduced costs for maximization problems - if (problem.maximize) { - adjust_dual_solution_and_reduced_cost( - final_dual_solution, final_reduced_cost, problem.handle_ptr->get_stream()); + dual_simplex::lp_problem_t lp(problem.handle_ptr, 1, 1, 1); + dual_simplex::lp_solution_t initial_solution(1, 1); + translate_to_crossover_problem(problem, sol, lp, initial_solution); + dual_simplex::simplex_solver_settings_t dual_simplex_settings; + dual_simplex_settings.time_limit = settings.time_limit; + dual_simplex_settings.iteration_limit = settings.iteration_limit; + dual_simplex_settings.concurrent_halt = settings.concurrent_halt; + dual_simplex::lp_solution_t vertex_solution(lp.num_rows, lp.num_cols); + std::vector vstatus(lp.num_cols); + dual_simplex::crossover_status_t crossover_status = + dual_simplex::crossover(lp, + dual_simplex_settings, + initial_solution, + timer.get_tic_start(), + vertex_solution, + vstatus); + pdlp_termination_status_t termination_status = pdlp_termination_status_t::TimeLimit; + auto to_termination_status = [](dual_simplex::crossover_status_t status) { + switch (status) { + case dual_simplex::crossover_status_t::OPTIMAL: return pdlp_termination_status_t::Optimal; + case dual_simplex::crossover_status_t::PRIMAL_FEASIBLE: + return pdlp_termination_status_t::PrimalFeasible; + case dual_simplex::crossover_status_t::DUAL_FEASIBLE: + return pdlp_termination_status_t::NumericalError; + case dual_simplex::crossover_status_t::NUMERICAL_ISSUES: + return pdlp_termination_status_t::NumericalError; + case dual_simplex::crossover_status_t::CONCURRENT_LIMIT: + return pdlp_termination_status_t::ConcurrentLimit; + case dual_simplex::crossover_status_t::TIME_LIMIT: + return pdlp_termination_status_t::TimeLimit; + default: return pdlp_termination_status_t::NumericalError; + } + }; + termination_status = to_termination_status(crossover_status); + if (crossover_status == dual_simplex::crossover_status_t::OPTIMAL) { crossover_info = 0; } + rmm::device_uvector final_primal_solution = + cuopt::device_copy(vertex_solution.x, problem.handle_ptr->get_stream()); + rmm::device_uvector final_dual_solution = + cuopt::device_copy(vertex_solution.y, problem.handle_ptr->get_stream()); + rmm::device_uvector final_reduced_cost = + cuopt::device_copy(vertex_solution.z, problem.handle_ptr->get_stream()); problem.handle_ptr->sync_stream(); - } + // Negate dual variables and reduced costs for maximization problems + if (problem.maximize) { + adjust_dual_solution_and_reduced_cost( + final_dual_solution, final_reduced_cost, problem.handle_ptr->get_stream()); + problem.handle_ptr->sync_stream(); + } // Should be filled with more information from dual simplex std::vector< @@ -684,7 +690,7 @@ optimization_problem_solution_t run_pdlp(detail::problem_t& crossover_info == 0 && sol.get_termination_status() == pdlp_termination_status_t::Optimal) { // We finished. Tell dual simplex to stop if it is still running. CUOPT_LOG_CONDITIONAL_INFO(!settings.inside_mip, "PDLP finished. Telling others to stop"); - *settings.concurrent_halt = 1; + *settings.concurrent_halt = 1; } } return sol; @@ -1133,12 +1139,11 @@ optimization_problem_solution_t solve_lp_with_method( } } else { // Float precision only supports PDLP without presolve/crossover - // TODO when running with cuopt_cli this doesn't show, should we just use CUOPT_LOG_INFO instead? cuopt_expects(settings.method == method_t::PDLP, error_type_t::ValidationError, "Float precision only supports PDLP method. DualSimplex, Barrier, and Concurrent " "require double precision."); - return run_pdlp(problem, settings, timer, is_batch_mode); + return run_pdlp(problem, settings, timer, is_batch_mode); } } diff --git a/cpp/tests/linear_programming/pdlp_test.cu b/cpp/tests/linear_programming/pdlp_test.cu index 8f47cef65..0eb9fb60a 100644 --- a/cpp/tests/linear_programming/pdlp_test.cu +++ b/cpp/tests/linear_programming/pdlp_test.cu @@ -20,9 +20,9 @@ #include #include #include +#include #include #include -#include #include #include @@ -85,8 +85,8 @@ TEST(pdlp_class, mixed_precision_spmv) cuopt::mps_parser::mps_data_model_t op_problem = cuopt::mps_parser::parse_mps(path, true); - auto settings_mixed = pdlp_solver_settings_t{}; - settings_mixed.method = cuopt::linear_programming::method_t::PDLP; + auto settings_mixed = pdlp_solver_settings_t{}; + settings_mixed.method = cuopt::linear_programming::method_t::PDLP; settings_mixed.mixed_precision_spmv = true; optimization_problem_solution_t solution_mixed = @@ -96,8 +96,8 @@ TEST(pdlp_class, mixed_precision_spmv) afiro_primal_objective, solution_mixed.get_additional_termination_information().primal_objective)); - auto settings_full = pdlp_solver_settings_t{}; - settings_full.method = cuopt::linear_programming::method_t::PDLP; + auto settings_full = pdlp_solver_settings_t{}; + settings_full.method = cuopt::linear_programming::method_t::PDLP; settings_full.mixed_precision_spmv = false; optimization_problem_solution_t solution_full = @@ -1943,9 +1943,9 @@ TEST(pdlp_class, run_float32) solve_lp(&handle_, op_problem, solver_settings); EXPECT_EQ((int)solution.get_termination_status(), CUOPT_TERIMINATION_STATUS_OPTIMAL); - EXPECT_FALSE(is_incorrect_objective( - afiro_primal_objective_f32, - solution.get_additional_termination_information().primal_objective)); + EXPECT_FALSE( + is_incorrect_objective(afiro_primal_objective_f32, + solution.get_additional_termination_information().primal_objective)); } TEST(pdlp_class, float32_dual_simplex_throws_validation_error) @@ -2011,9 +2011,9 @@ TEST(pdlp_class, float32_papilo_presolve_works) optimization_problem_solution_t solution = solve_lp(&handle_, op_problem, solver_settings); EXPECT_EQ((int)solution.get_termination_status(), CUOPT_TERIMINATION_STATUS_OPTIMAL); - EXPECT_FALSE(is_incorrect_objective( - afiro_primal_objective_f32, - solution.get_additional_termination_information().primal_objective)); + EXPECT_FALSE( + is_incorrect_objective(afiro_primal_objective_f32, + solution.get_additional_termination_information().primal_objective)); } TEST(pdlp_class, float32_pslp_presolve_works) @@ -2031,9 +2031,9 @@ TEST(pdlp_class, float32_pslp_presolve_works) optimization_problem_solution_t solution = solve_lp(&handle_, op_problem, solver_settings); EXPECT_EQ((int)solution.get_termination_status(), CUOPT_TERIMINATION_STATUS_OPTIMAL); - EXPECT_FALSE(is_incorrect_objective( - afiro_primal_objective_f32, - solution.get_additional_termination_information().primal_objective)); + EXPECT_FALSE( + is_incorrect_objective(afiro_primal_objective_f32, + solution.get_additional_termination_information().primal_objective)); } TEST(pdlp_class, float32_crossover_throws_validation_error) @@ -2044,8 +2044,8 @@ TEST(pdlp_class, float32_crossover_throws_validation_error) cuopt::mps_parser::mps_data_model_t op_problem = cuopt::mps_parser::parse_mps(path, true); - auto solver_settings = pdlp_solver_settings_t{}; - solver_settings.method = cuopt::linear_programming::method_t::PDLP; + auto solver_settings = pdlp_solver_settings_t{}; + solver_settings.method = cuopt::linear_programming::method_t::PDLP; solver_settings.crossover = true; optimization_problem_solution_t solution = From 342ba3210174e60234b669fd1ee807f2a61bcc3a Mon Sep 17 00:00:00 2001 From: Nicolas Blin Date: Fri, 27 Feb 2026 15:41:24 +0100 Subject: [PATCH 08/10] use || for all pdlp float instanciation --- cpp/src/dual_simplex/sparse_matrix.cpp | 2 +- cpp/src/math_optimization/solution_writer.cu | 2 +- .../mip_heuristics/local_search/rounding/simple_rounding.cu | 2 +- cpp/src/mip_heuristics/problem/problem.cu | 2 +- cpp/src/mip_heuristics/solution/solution.cu | 2 +- cpp/src/mip_heuristics/solver_solution.cu | 2 +- cpp/src/pdlp/cpu_pdlp_warm_start_data.cu | 2 +- cpp/src/pdlp/cusparse_view.cu | 4 ++-- cpp/src/pdlp/initial_scaling_strategy/initial_scaling.cu | 2 +- .../optimal_batch_size_handler/optimal_batch_size_handler.cu | 2 +- cpp/src/pdlp/optimization_problem.cu | 2 +- cpp/src/pdlp/pdhg.cu | 2 +- cpp/src/pdlp/pdlp.cu | 2 +- cpp/src/pdlp/pdlp_warm_start_data.cu | 2 +- .../pdlp/restart_strategy/localized_duality_gap_container.cu | 2 +- cpp/src/pdlp/restart_strategy/pdlp_restart_strategy.cu | 2 +- cpp/src/pdlp/restart_strategy/weighted_average_solution.cu | 2 +- cpp/src/pdlp/saddle_point.cu | 2 +- cpp/src/pdlp/solution_conversion.cu | 2 +- cpp/src/pdlp/solve.cu | 2 +- cpp/src/pdlp/solver_settings.cu | 2 +- cpp/src/pdlp/solver_solution.cu | 2 +- .../pdlp/step_size_strategy/adaptive_step_size_strategy.cu | 2 +- cpp/src/pdlp/termination_strategy/convergence_information.cu | 2 +- .../pdlp/termination_strategy/infeasibility_information.cu | 2 +- cpp/src/pdlp/termination_strategy/termination_strategy.cu | 2 +- cpp/src/pdlp/utilities/problem_checking.cu | 2 +- cpp/tests/linear_programming/pdlp_test.cu | 4 ++-- 28 files changed, 30 insertions(+), 30 deletions(-) diff --git a/cpp/src/dual_simplex/sparse_matrix.cpp b/cpp/src/dual_simplex/sparse_matrix.cpp index 343cf4149..399114c28 100644 --- a/cpp/src/dual_simplex/sparse_matrix.cpp +++ b/cpp/src/dual_simplex/sparse_matrix.cpp @@ -933,7 +933,7 @@ f_t sparse_dot(const std::vector& xind, return dot; } -#if PDLP_INSTANTIATE_FLOAT +#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT // Minimal float instantiation for LP usage template class csc_matrix_t; template class csr_matrix_t; diff --git a/cpp/src/math_optimization/solution_writer.cu b/cpp/src/math_optimization/solution_writer.cu index f98eb28bf..880127546 100644 --- a/cpp/src/math_optimization/solution_writer.cu +++ b/cpp/src/math_optimization/solution_writer.cu @@ -42,7 +42,7 @@ void solution_writer_t::write_solution_to_sol_file(const std::string& filename, } } -#if PDLP_INSTANTIATE_FLOAT +#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT template void solution_writer_t::write_solution_to_sol_file( const std::string& filename, const std::string& status, diff --git a/cpp/src/mip_heuristics/local_search/rounding/simple_rounding.cu b/cpp/src/mip_heuristics/local_search/rounding/simple_rounding.cu index ee62ea906..4f3a015a6 100644 --- a/cpp/src/mip_heuristics/local_search/rounding/simple_rounding.cu +++ b/cpp/src/mip_heuristics/local_search/rounding/simple_rounding.cu @@ -179,7 +179,7 @@ void invoke_correct_integers(solution_t& solution, f_t tol) template void invoke_correct_integers(solution_t & solution, \ F_TYPE tol); -#if PDLP_INSTANTIATE_FLOAT +#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT INSTANTIATE(float) #endif diff --git a/cpp/src/mip_heuristics/problem/problem.cu b/cpp/src/mip_heuristics/problem/problem.cu index 87a668333..7990974d9 100644 --- a/cpp/src/mip_heuristics/problem/problem.cu +++ b/cpp/src/mip_heuristics/problem/problem.cu @@ -2292,7 +2292,7 @@ void problem_t::update_variable_bounds(const std::vector& var_ind RAFT_CHECK_CUDA(handle_ptr->get_stream()); } -#if PDLP_INSTANTIATE_FLOAT +#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT template class problem_t; #endif diff --git a/cpp/src/mip_heuristics/solution/solution.cu b/cpp/src/mip_heuristics/solution/solution.cu index ccfc842b8..531d54372 100644 --- a/cpp/src/mip_heuristics/solution/solution.cu +++ b/cpp/src/mip_heuristics/solution/solution.cu @@ -660,7 +660,7 @@ mip_solution_t solution_t::get_solution(bool output_feasible } } -#if PDLP_INSTANTIATE_FLOAT +#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT template class solution_t; #endif diff --git a/cpp/src/mip_heuristics/solver_solution.cu b/cpp/src/mip_heuristics/solver_solution.cu index 57d697ae2..e497a21c8 100644 --- a/cpp/src/mip_heuristics/solver_solution.cu +++ b/cpp/src/mip_heuristics/solver_solution.cu @@ -234,7 +234,7 @@ void mip_solution_t::log_summary() const CUOPT_LOG_INFO("Total Solve Time: %f", get_total_solve_time()); } -#if PDLP_INSTANTIATE_FLOAT +#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT template class mip_solution_t; #endif diff --git a/cpp/src/pdlp/cpu_pdlp_warm_start_data.cu b/cpp/src/pdlp/cpu_pdlp_warm_start_data.cu index 603805605..b078bc477 100644 --- a/cpp/src/pdlp/cpu_pdlp_warm_start_data.cu +++ b/cpp/src/pdlp/cpu_pdlp_warm_start_data.cu @@ -115,7 +115,7 @@ template pdlp_warm_start_data_t convert_to_gpu_warmstart( const cpu_pdlp_warm_start_data_t&, rmm::cuda_stream_view); #endif -#if PDLP_INSTANTIATE_FLOAT +#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT template cpu_pdlp_warm_start_data_t convert_to_cpu_warmstart( const pdlp_warm_start_data_t&, rmm::cuda_stream_view); diff --git a/cpp/src/pdlp/cusparse_view.cu b/cpp/src/pdlp/cusparse_view.cu index 00903c986..6e167b592 100644 --- a/cpp/src/pdlp/cusparse_view.cu +++ b/cpp/src/pdlp/cusparse_view.cu @@ -1148,7 +1148,7 @@ void mixed_precision_spmv_preprocess(cusparseHandle_t handle, } #endif -#if PDLP_INSTANTIATE_FLOAT +#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT template class cusparse_sp_mat_descr_wrapper_t; template class cusparse_dn_vec_descr_wrapper_t; template class cusparse_dn_mat_descr_wrapper_t; @@ -1162,7 +1162,7 @@ template class cusparse_view_t; #endif #if CUDA_VER_12_4_UP -#if PDLP_INSTANTIATE_FLOAT +#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT template void my_cusparsespmm_preprocess(cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, diff --git a/cpp/src/pdlp/initial_scaling_strategy/initial_scaling.cu b/cpp/src/pdlp/initial_scaling_strategy/initial_scaling.cu index 5562bfa80..b618550f6 100644 --- a/cpp/src/pdlp/initial_scaling_strategy/initial_scaling.cu +++ b/cpp/src/pdlp/initial_scaling_strategy/initial_scaling.cu @@ -858,7 +858,7 @@ pdlp_initial_scaling_strategy_t::view() int* A_T_offsets, \ int* A_T_indices); -#if PDLP_INSTANTIATE_FLOAT +#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT INSTANTIATE(float) #endif diff --git a/cpp/src/pdlp/optimal_batch_size_handler/optimal_batch_size_handler.cu b/cpp/src/pdlp/optimal_batch_size_handler/optimal_batch_size_handler.cu index 2fef13eb5..cbfb03618 100644 --- a/cpp/src/pdlp/optimal_batch_size_handler/optimal_batch_size_handler.cu +++ b/cpp/src/pdlp/optimal_batch_size_handler/optimal_batch_size_handler.cu @@ -434,7 +434,7 @@ int optimal_batch_size_handler(const optimization_problem_t& op_proble return 0; } -#if PDLP_INSTANTIATE_FLOAT +#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT template int optimal_batch_size_handler( const optimization_problem_t& op_problem, int max_batch_size); #endif diff --git a/cpp/src/pdlp/optimization_problem.cu b/cpp/src/pdlp/optimization_problem.cu index 8537c3114..302dd9cf1 100644 --- a/cpp/src/pdlp/optimization_problem.cu +++ b/cpp/src/pdlp/optimization_problem.cu @@ -1063,7 +1063,7 @@ bool optimization_problem_t::has_quadratic_objective() const return !Q_values_.empty(); } // NOTE: Explicitly instantiate all types here in order to avoid linker error -#if PDLP_INSTANTIATE_FLOAT +#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT template class optimization_problem_t; #endif #if MIP_INSTANTIATE_DOUBLE diff --git a/cpp/src/pdlp/pdhg.cu b/cpp/src/pdlp/pdhg.cu index a606eea8a..74df7fee0 100644 --- a/cpp/src/pdlp/pdhg.cu +++ b/cpp/src/pdlp/pdhg.cu @@ -1246,7 +1246,7 @@ rmm::device_uvector& pdhg_solver_t::get_dual_solution() return current_saddle_point_state_.get_dual_solution(); } -#if PDLP_INSTANTIATE_FLOAT +#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT template class pdhg_solver_t; #endif #if MIP_INSTANTIATE_DOUBLE diff --git a/cpp/src/pdlp/pdlp.cu b/cpp/src/pdlp/pdlp.cu index 20fc4d1ce..81f7e3815 100644 --- a/cpp/src/pdlp/pdlp.cu +++ b/cpp/src/pdlp/pdlp.cu @@ -2971,7 +2971,7 @@ pdlp_solver_t::get_current_termination_strategy() return current_termination_strategy_; } -#if PDLP_INSTANTIATE_FLOAT +#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT template class pdlp_solver_t; template __global__ void compute_weights_initial_primal_weight_from_squared_norms( diff --git a/cpp/src/pdlp/pdlp_warm_start_data.cu b/cpp/src/pdlp/pdlp_warm_start_data.cu index 9c293093a..80abf015d 100644 --- a/cpp/src/pdlp/pdlp_warm_start_data.cu +++ b/cpp/src/pdlp/pdlp_warm_start_data.cu @@ -178,7 +178,7 @@ void pdlp_warm_start_data_t::check_sizes() "All dual vectors should be of same size"); } -#if PDLP_INSTANTIATE_FLOAT +#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT template class pdlp_warm_start_data_t; #endif diff --git a/cpp/src/pdlp/restart_strategy/localized_duality_gap_container.cu b/cpp/src/pdlp/restart_strategy/localized_duality_gap_container.cu index b25fd3494..bb79e5b6e 100644 --- a/cpp/src/pdlp/restart_strategy/localized_duality_gap_container.cu +++ b/cpp/src/pdlp/restart_strategy/localized_duality_gap_container.cu @@ -144,7 +144,7 @@ localized_duality_gap_container_t::view() return v; } -#if PDLP_INSTANTIATE_FLOAT +#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT template struct localized_duality_gap_container_t; #endif #if MIP_INSTANTIATE_DOUBLE diff --git a/cpp/src/pdlp/restart_strategy/pdlp_restart_strategy.cu b/cpp/src/pdlp/restart_strategy/pdlp_restart_strategy.cu index 7656c7f2f..149e99a43 100644 --- a/cpp/src/pdlp/restart_strategy/pdlp_restart_strategy.cu +++ b/cpp/src/pdlp/restart_strategy/pdlp_restart_strategy.cu @@ -2523,7 +2523,7 @@ bool pdlp_restart_strategy_t::get_last_restart_was_average() const const typename localized_duality_gap_container_t::view_t duality_gap_view, \ F_TYPE* primal_product); -#if PDLP_INSTANTIATE_FLOAT +#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT INSTANTIATE(float) #endif diff --git a/cpp/src/pdlp/restart_strategy/weighted_average_solution.cu b/cpp/src/pdlp/restart_strategy/weighted_average_solution.cu index 5eb54eb21..70a448a9d 100644 --- a/cpp/src/pdlp/restart_strategy/weighted_average_solution.cu +++ b/cpp/src/pdlp/restart_strategy/weighted_average_solution.cu @@ -139,7 +139,7 @@ i_t weighted_average_solution_t::get_iterations_since_last_restart() c return iterations_since_last_restart_; } -#if PDLP_INSTANTIATE_FLOAT +#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT template __global__ void add_weight_sums(const float* primal_weight, const float* dual_weight, float* sum_primal_solution_weights, diff --git a/cpp/src/pdlp/saddle_point.cu b/cpp/src/pdlp/saddle_point.cu index 7cf653dba..157e7fa38 100644 --- a/cpp/src/pdlp/saddle_point.cu +++ b/cpp/src/pdlp/saddle_point.cu @@ -166,7 +166,7 @@ rmm::device_uvector& saddle_point_state_t::get_next_AtY() return next_AtY_; } -#if PDLP_INSTANTIATE_FLOAT +#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT template class saddle_point_state_t; #endif diff --git a/cpp/src/pdlp/solution_conversion.cu b/cpp/src/pdlp/solution_conversion.cu index a0d4f88ae..873a34064 100644 --- a/cpp/src/pdlp/solution_conversion.cu +++ b/cpp/src/pdlp/solution_conversion.cu @@ -235,7 +235,7 @@ template cuopt::cython::linear_programming_ret_t cpu_lp_solution_t::to_cpu_linear_programming_ret_t(); template cuopt::cython::mip_ret_t cpu_mip_solution_t::to_cpu_mip_ret_t(); -#if PDLP_INSTANTIATE_FLOAT +#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT template cuopt::cython::linear_programming_ret_t gpu_lp_solution_t::to_linear_programming_ret_t(); template cuopt::cython::linear_programming_ret_t diff --git a/cpp/src/pdlp/solve.cu b/cpp/src/pdlp/solve.cu index f7f7a5d63..cf44fc538 100644 --- a/cpp/src/pdlp/solve.cu +++ b/cpp/src/pdlp/solve.cu @@ -1568,7 +1568,7 @@ std::unique_ptr> solve_lp( const cuopt::mps_parser::mps_data_model_t& data_model); \ template void set_pdlp_solver_mode(pdlp_solver_settings_t& settings); -#if PDLP_INSTANTIATE_FLOAT +#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT INSTANTIATE(float) #endif diff --git a/cpp/src/pdlp/solver_settings.cu b/cpp/src/pdlp/solver_settings.cu index 683f5121a..7acfc7481 100644 --- a/cpp/src/pdlp/solver_settings.cu +++ b/cpp/src/pdlp/solver_settings.cu @@ -382,7 +382,7 @@ pdlp_solver_settings_t::get_pdlp_warm_start_data_view() const noexcept return pdlp_warm_start_data_view_; } -#if PDLP_INSTANTIATE_FLOAT +#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT template class pdlp_solver_settings_t; #endif diff --git a/cpp/src/pdlp/solver_solution.cu b/cpp/src/pdlp/solver_solution.cu index e516b95d7..10e6a8059 100644 --- a/cpp/src/pdlp/solver_solution.cu +++ b/cpp/src/pdlp/solver_solution.cu @@ -448,7 +448,7 @@ void optimization_problem_solution_t::write_to_sol_file( std::string(filename), status, objective_value, var_names_, solution); } -#if PDLP_INSTANTIATE_FLOAT +#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT template class optimization_problem_solution_t; #endif diff --git a/cpp/src/pdlp/step_size_strategy/adaptive_step_size_strategy.cu b/cpp/src/pdlp/step_size_strategy/adaptive_step_size_strategy.cu index bd6e8c63c..d17a88dd2 100644 --- a/cpp/src/pdlp/step_size_strategy/adaptive_step_size_strategy.cu +++ b/cpp/src/pdlp/step_size_strategy/adaptive_step_size_strategy.cu @@ -578,7 +578,7 @@ adaptive_step_size_strategy_t::view() F_TYPE * dual_step_size, \ int* pdhg_iteration); -#if PDLP_INSTANTIATE_FLOAT +#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT INSTANTIATE(float) #endif diff --git a/cpp/src/pdlp/termination_strategy/convergence_information.cu b/cpp/src/pdlp/termination_strategy/convergence_information.cu index 1bf6fead1..ab0c921cc 100644 --- a/cpp/src/pdlp/termination_strategy/convergence_information.cu +++ b/cpp/src/pdlp/termination_strategy/convergence_information.cu @@ -996,7 +996,7 @@ convergence_information_t::to_primal_quality_adapter( primal_objective_.element(0, stream_view_)}; } -#if PDLP_INSTANTIATE_FLOAT +#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT template class convergence_information_t; template __global__ void compute_remaining_stats_kernel( diff --git a/cpp/src/pdlp/termination_strategy/infeasibility_information.cu b/cpp/src/pdlp/termination_strategy/infeasibility_information.cu index bc87207e8..dbb35b732 100644 --- a/cpp/src/pdlp/termination_strategy/infeasibility_information.cu +++ b/cpp/src/pdlp/termination_strategy/infeasibility_information.cu @@ -745,7 +745,7 @@ typename infeasibility_information_t::view_t infeasibility_information return v; } -#if PDLP_INSTANTIATE_FLOAT +#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT template class infeasibility_information_t; template __global__ void compute_remaining_stats_kernel( diff --git a/cpp/src/pdlp/termination_strategy/termination_strategy.cu b/cpp/src/pdlp/termination_strategy/termination_strategy.cu index d682151db..7179df6a4 100644 --- a/cpp/src/pdlp/termination_strategy/termination_strategy.cu +++ b/cpp/src/pdlp/termination_strategy/termination_strategy.cu @@ -681,7 +681,7 @@ void pdlp_termination_strategy_t::print_termination_criteria(i_t itera bool per_constraint_residual, \ int batch_size); -#if PDLP_INSTANTIATE_FLOAT +#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT INSTANTIATE(float) #endif diff --git a/cpp/src/pdlp/utilities/problem_checking.cu b/cpp/src/pdlp/utilities/problem_checking.cu index a3e088d2a..b10850de2 100644 --- a/cpp/src/pdlp/utilities/problem_checking.cu +++ b/cpp/src/pdlp/utilities/problem_checking.cu @@ -340,7 +340,7 @@ bool problem_checking_t::has_crossing_bounds( #define INSTANTIATE(F_TYPE) template class problem_checking_t; -#if PDLP_INSTANTIATE_FLOAT +#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT INSTANTIATE(float) #endif diff --git a/cpp/tests/linear_programming/pdlp_test.cu b/cpp/tests/linear_programming/pdlp_test.cu index 0eb9fb60a..a46649be7 100644 --- a/cpp/tests/linear_programming/pdlp_test.cu +++ b/cpp/tests/linear_programming/pdlp_test.cu @@ -47,7 +47,7 @@ namespace cuopt::linear_programming::test { constexpr double afiro_primal_objective = -464.0; -#if PDLP_INSTANTIATE_FLOAT +#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT constexpr float afiro_primal_objective_f32 = -464.0f; #endif // Accept a 1% error @@ -1927,7 +1927,7 @@ TEST(pdlp_class, some_climber_hit_iteration_limit) } } -#if PDLP_INSTANTIATE_FLOAT +#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT TEST(pdlp_class, run_float32) { const raft::handle_t handle_{}; From 366fd6a96807725b67a8399b8065ffa1ae7d7360 Mon Sep 17 00:00:00 2001 From: Nicolas Blin Date: Fri, 27 Feb 2026 16:03:32 +0100 Subject: [PATCH 09/10] address PR comments --- cpp/src/math_optimization/solver_settings.cu | 6 +- cpp/src/pdlp/cusparse_view.cu | 84 +++++++++----------- cpp/src/pdlp/cusparse_view.hpp | 4 +- cpp/src/pdlp/pdhg.hpp | 2 +- cpp/src/pdlp/solve.cu | 22 +++-- 5 files changed, 59 insertions(+), 59 deletions(-) diff --git a/cpp/src/math_optimization/solver_settings.cu b/cpp/src/math_optimization/solver_settings.cu index 438390ebd..696f9d34d 100644 --- a/cpp/src/math_optimization/solver_settings.cu +++ b/cpp/src/math_optimization/solver_settings.cu @@ -69,10 +69,10 @@ solver_settings_t::solver_settings_t() : pdlp_settings(), mip_settings {CUOPT_MIP_ABSOLUTE_TOLERANCE, &mip_settings.tolerances.absolute_tolerance, f_t(0.0), f_t(1e-1), f_t(1e-4)}, {CUOPT_MIP_RELATIVE_TOLERANCE, &mip_settings.tolerances.relative_tolerance, f_t(0.0), f_t(1e-1), f_t(1e-4)}, {CUOPT_MIP_INTEGRALITY_TOLERANCE, &mip_settings.tolerances.integrality_tolerance, f_t(0.0), f_t(1e-1), f_t(1e-5)}, - {CUOPT_MIP_ABSOLUTE_GAP, &mip_settings.tolerances.absolute_mip_gap, f_t(0.0), std::numeric_limits::infinity(), f_t(1e-10)}, + {CUOPT_MIP_ABSOLUTE_GAP, &mip_settings.tolerances.absolute_mip_gap, f_t(0.0), std::numeric_limits::infinity(), std::max(f_t(1e-10), std::numeric_limits::epsilon())}, {CUOPT_MIP_RELATIVE_GAP, &mip_settings.tolerances.relative_mip_gap, f_t(0.0), f_t(1e-1), f_t(1e-4)}, - {CUOPT_PRIMAL_INFEASIBLE_TOLERANCE, &pdlp_settings.tolerances.primal_infeasible_tolerance, f_t(0.0), f_t(1e-1), f_t(1e-10)}, - {CUOPT_DUAL_INFEASIBLE_TOLERANCE, &pdlp_settings.tolerances.dual_infeasible_tolerance, f_t(0.0), f_t(1e-1), f_t(1e-10)}, + {CUOPT_PRIMAL_INFEASIBLE_TOLERANCE, &pdlp_settings.tolerances.primal_infeasible_tolerance, f_t(0.0), f_t(1e-1), std::max(f_t(1e-10), std::numeric_limits::epsilon())}, + {CUOPT_DUAL_INFEASIBLE_TOLERANCE, &pdlp_settings.tolerances.dual_infeasible_tolerance, f_t(0.0), f_t(1e-1), std::max(f_t(1e-10), std::numeric_limits::epsilon())}, {CUOPT_MIP_CUT_CHANGE_THRESHOLD, &mip_settings.cut_change_threshold, f_t(0.0), std::numeric_limits::infinity(), f_t(1e-3)}, {CUOPT_MIP_CUT_MIN_ORTHOGONALITY, &mip_settings.cut_min_orthogonality, f_t(0.0), f_t(1.0), f_t(0.5)} }; diff --git a/cpp/src/pdlp/cusparse_view.cu b/cpp/src/pdlp/cusparse_view.cu index 6e167b592..23ff3711d 100644 --- a/cpp/src/pdlp/cusparse_view.cu +++ b/cpp/src/pdlp/cusparse_view.cu @@ -597,47 +597,37 @@ cusparse_view_t::cusparse_view_t( #endif if constexpr (std::is_same_v) { - if (enable_mixed_precision_spmv) { + if (enable_mixed_precision_spmv && !batch_mode_) { mixed_precision_enabled_ = true; A_float_.resize(op_problem_scaled.nnz, handle_ptr->get_stream()); A_T_float_.resize(op_problem_scaled.nnz, handle_ptr->get_stream()); - cub::DeviceTransform::Transform(op_problem_scaled.coefficients.data(), - A_float_.data(), - op_problem_scaled.nnz, - double_to_float_functor{}, - handle_ptr->get_stream().value()); - - cub::DeviceTransform::Transform(A_T_.data(), - A_T_float_.data(), - op_problem_scaled.nnz, - double_to_float_functor{}, - handle_ptr->get_stream().value()); - - RAFT_CUSPARSE_TRY(cusparseCreateCsr(&A_mixed_, - op_problem_scaled.n_constraints, - op_problem_scaled.n_variables, - op_problem_scaled.nnz, - const_cast(op_problem_scaled.offsets.data()), - const_cast(op_problem_scaled.variables.data()), - A_float_.data(), - CUSPARSE_INDEX_32I, - CUSPARSE_INDEX_32I, - CUSPARSE_INDEX_BASE_ZERO, - CUDA_R_32F)); - - RAFT_CUSPARSE_TRY(cusparseCreateCsr(&A_T_mixed_, - op_problem_scaled.n_variables, - op_problem_scaled.n_constraints, - op_problem_scaled.nnz, - const_cast(A_T_offsets_.data()), - const_cast(A_T_indices_.data()), - A_T_float_.data(), - CUSPARSE_INDEX_32I, - CUSPARSE_INDEX_32I, - CUSPARSE_INDEX_BASE_ZERO, - CUDA_R_32F)); + RAFT_CUDA_TRY(cub::DeviceTransform::Transform(op_problem_scaled.coefficients.data(), + A_float_.data(), + op_problem_scaled.nnz, + double_to_float_functor{}, + handle_ptr->get_stream().value())); + + RAFT_CUDA_TRY(cub::DeviceTransform::Transform(A_T_.data(), + A_T_float_.data(), + op_problem_scaled.nnz, + double_to_float_functor{}, + handle_ptr->get_stream().value())); + + A_mixed_.create(op_problem_scaled.n_constraints, + op_problem_scaled.n_variables, + op_problem_scaled.nnz, + const_cast(op_problem_scaled.offsets.data()), + const_cast(op_problem_scaled.variables.data()), + A_float_.data()); + + A_T_mixed_.create(op_problem_scaled.n_variables, + op_problem_scaled.n_constraints, + op_problem_scaled.nnz, + const_cast(A_T_offsets_.data()), + const_cast(A_T_indices_.data()), + A_T_float_.data()); const rmm::device_scalar alpha_d{1.0, handle_ptr->get_stream()}; const rmm::device_scalar beta_d{0.0, handle_ptr->get_stream()}; @@ -1076,17 +1066,17 @@ void cusparse_view_t::update_mixed_precision_matrices() if constexpr (std::is_same_v) { if (!mixed_precision_enabled_) { return; } - cub::DeviceTransform::Transform(A_.data(), - A_float_.data(), - A_.size(), - double_to_float_functor{}, - handle_ptr_->get_stream().value()); - - cub::DeviceTransform::Transform(A_T_.data(), - A_T_float_.data(), - A_T_.size(), - double_to_float_functor{}, - handle_ptr_->get_stream().value()); + RAFT_CUDA_TRY(cub::DeviceTransform::Transform(A_.data(), + A_float_.data(), + A_.size(), + double_to_float_functor{}, + handle_ptr_->get_stream().value())); + + RAFT_CUDA_TRY(cub::DeviceTransform::Transform(A_T_.data(), + A_T_float_.data(), + A_T_.size(), + double_to_float_functor{}, + handle_ptr_->get_stream().value())); handle_ptr_->get_stream().synchronize(); } diff --git a/cpp/src/pdlp/cusparse_view.hpp b/cpp/src/pdlp/cusparse_view.hpp index 2eb3358fe..e83915e37 100644 --- a/cpp/src/pdlp/cusparse_view.hpp +++ b/cpp/src/pdlp/cusparse_view.hpp @@ -200,8 +200,8 @@ class cusparse_view_t { // Only used when mixed_precision_enabled_ is true and f_t = double rmm::device_uvector A_float_; // FP32 copy of A values rmm::device_uvector A_T_float_; // FP32 copy of A_T values - cusparseSpMatDescr_t A_mixed_{nullptr}; // FP32 matrix descriptor for A - cusparseSpMatDescr_t A_T_mixed_{nullptr}; // FP32 matrix descriptor for A_T + cusparse_sp_mat_descr_wrapper_t A_mixed_; // FP32 matrix descriptor for A + cusparse_sp_mat_descr_wrapper_t A_T_mixed_; // FP32 matrix descriptor for A_T rmm::device_uvector buffer_non_transpose_mixed_; // SpMV buffer for mixed precision A rmm::device_uvector buffer_transpose_mixed_; // SpMV buffer for mixed precision A_T bool mixed_precision_enabled_{false}; diff --git a/cpp/src/pdlp/pdhg.hpp b/cpp/src/pdlp/pdhg.hpp index 32722eae4..0a64e49ef 100644 --- a/cpp/src/pdlp/pdhg.hpp +++ b/cpp/src/pdlp/pdhg.hpp @@ -30,7 +30,7 @@ class pdhg_solver_t { const std::vector& climber_strategies, const pdlp_hyper_params::pdlp_hyper_params_t& hyper_params, const std::vector>& new_bounds, - bool enable_mixed_precision_spmv = true); + bool enable_mixed_precision_spmv = false); saddle_point_state_t& get_saddle_point_state(); cusparse_view_t& get_cusparse_view(); diff --git a/cpp/src/pdlp/solve.cu b/cpp/src/pdlp/solve.cu index cf44fc538..68526be45 100644 --- a/cpp/src/pdlp/solve.cu +++ b/cpp/src/pdlp/solve.cu @@ -583,6 +583,21 @@ optimization_problem_solution_t run_pdlp(detail::problem_t& const timer_t& timer, bool is_batch_mode) { + if constexpr (!std::is_same_v) { + cuopt_expects(!settings.crossover, + error_type_t::ValidationError, + "PDLP with crossover is not supported for float precision. Set crossover=false " + "or use double precision."); + cuopt_expects(!is_batch_mode, + error_type_t::ValidationError, + "PDLP batch mode is not supported for float precision. Use double precision."); + } + cuopt_expects( + !is_batch_mode || !settings.mixed_precision_spmv, + error_type_t::ValidationError, + "Mixed-precision SpMV is not supported in batch mode. Set mixed_precision_spmv=false " + "or disable batch mode."); + auto start_solver = std::chrono::high_resolution_clock::now(); timer_t timer_pdlp(timer.remaining_time()); auto sol = run_pdlp_solver(problem, settings, timer, is_batch_mode); @@ -604,12 +619,7 @@ optimization_problem_solution_t run_pdlp(detail::problem_t& sol.get_solve_time()); } - if constexpr (!std::is_same_v) { - cuopt_expects(!settings.crossover, - error_type_t::ValidationError, - "PDLP with crossover is not supported for float precision. Set crossover=false " - "or use double precision."); - } else { + if constexpr (std::is_same_v) { const bool do_crossover = settings.crossover; i_t crossover_info = 0; if (do_crossover && sol.get_termination_status() == pdlp_termination_status_t::Optimal) { From abf13f3e65b9e8870092401eed7d320ac6cb0289 Mon Sep 17 00:00:00 2001 From: Nicolas Blin Date: Fri, 27 Feb 2026 17:14:19 +0100 Subject: [PATCH 10/10] add forgotten parameter --- cpp/src/math_optimization/solver_settings.cu | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/src/math_optimization/solver_settings.cu b/cpp/src/math_optimization/solver_settings.cu index 696f9d34d..a211b418b 100644 --- a/cpp/src/math_optimization/solver_settings.cu +++ b/cpp/src/math_optimization/solver_settings.cu @@ -60,6 +60,7 @@ solver_settings_t::solver_settings_t() : pdlp_settings(), mip_settings float_parameters = { {CUOPT_TIME_LIMIT, &mip_settings.time_limit, f_t(0.0), std::numeric_limits::infinity(), std::numeric_limits::infinity()}, {CUOPT_TIME_LIMIT, &pdlp_settings.time_limit, f_t(0.0), std::numeric_limits::infinity(), std::numeric_limits::infinity()}, + {CUOPT_WORK_LIMIT, &mip_settings.work_limit, f_t(0.0), std::numeric_limits::infinity(), std::numeric_limits::infinity()}, {CUOPT_ABSOLUTE_DUAL_TOLERANCE, &pdlp_settings.tolerances.absolute_dual_tolerance, f_t(0.0), f_t(1e-1), f_t(1e-4)}, {CUOPT_RELATIVE_DUAL_TOLERANCE, &pdlp_settings.tolerances.relative_dual_tolerance, f_t(0.0), f_t(1e-1), f_t(1e-4)}, {CUOPT_ABSOLUTE_PRIMAL_TOLERANCE, &pdlp_settings.tolerances.absolute_primal_tolerance, f_t(0.0), f_t(1e-1), f_t(1e-4)},