From 0e15e12ea98d9231b6f4ae2109f477d0f8fd70eb Mon Sep 17 00:00:00 2001 From: John Ralls Date: Fri, 7 Mar 2025 13:19:10 -0800 Subject: [PATCH] Improve GncNumeric(std::string) to handle unicode spaces. --- libgnucash/engine/gnc-numeric.cpp | 10 +++++++--- libgnucash/engine/test/gtest-gnc-numeric.cpp | 4 ++-- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/libgnucash/engine/gnc-numeric.cpp b/libgnucash/engine/gnc-numeric.cpp index 62aacfaa60..629aca0a8c 100644 --- a/libgnucash/engine/gnc-numeric.cpp +++ b/libgnucash/engine/gnc-numeric.cpp @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -118,7 +119,9 @@ GncNumeric::GncNumeric(double d) : m_num(0), m_den(1) } using boost::regex; +using boost::u32regex; using boost::regex_search; +using boost::u32regex_search; using boost::smatch; @@ -251,7 +254,7 @@ GncNumeric::GncNumeric(const std::string &str, bool autoround) { static const std::string opt_signed_int("(-?[0-9]*)"); static const std::string opt_signed_separated_int("(-?[0-9]{1,3})"); static const std::string unsigned_int("([0-9]+)"); - static const std::string eu_separated_int("(?:[\\s'.]([0-9]{3}))?"); + static const std::string eu_separated_int("(?:[[:space:]'.]([0-9]{3}))?"); static const std::string en_separated_int("(?:\\,([0-9]{3}))?"); static const std::string eu_decimal_part("(?:\\,([0-9]+))?"); static const std::string en_decimal_part("(?:\\.([0-9]+))?"); @@ -272,7 +275,8 @@ GncNumeric::GncNumeric(const std::string &str, bool autoround) { static const regex hex_over_num(begin + hex_frag + slash + unsigned_int + end); static const regex num_over_hex(begin + opt_signed_int + slash + hex_frag + end); static const regex decimal(begin + opt_signed_int + "[.,]" + unsigned_int + end); - static const regex sep_decimal(begin + begin_group + eu_sep_decimal + or_op + en_sep_decimal + end_group + end); + static const u32regex sep_decimal = + boost::make_u32regex(begin + begin_group + eu_sep_decimal + or_op + en_sep_decimal + end_group + end); static const regex scientific("(?:(-?[0-9]+[.,]?)|(-?[0-9]*)[.,]([0-9]+))[Ee](-?[0-9]+)"); static const regex has_hex_prefix(".*0[xX]$"); smatch m, x; @@ -345,7 +349,7 @@ GncNumeric::GncNumeric(const std::string &str, bool autoround) { m_den = denom; return; } - if (regex_search(str, m, sep_decimal)) + if (u32regex_search(str, m, sep_decimal)) { /* There's a bit of magic here because of the complexity of * the regex. It supports two formats, one for locales that diff --git a/libgnucash/engine/test/gtest-gnc-numeric.cpp b/libgnucash/engine/test/gtest-gnc-numeric.cpp index 58bd1162ca..aca62033c7 100644 --- a/libgnucash/engine/test/gtest-gnc-numeric.cpp +++ b/libgnucash/engine/test/gtest-gnc-numeric.cpp @@ -156,8 +156,8 @@ TEST(gncnumeric_constructors, test_string_constructor) GncNumeric neg_continental_decimal("-123,456"); EXPECT_EQ(-123456, neg_continental_decimal.num()); EXPECT_EQ(1000, neg_continental_decimal.denom()); - ASSERT_NO_THROW(GncNumeric swiss_thousep_decimal("123 456 789,123")); - GncNumeric swiss_thousep_decimal("123 456 789,123"); + ASSERT_NO_THROW(GncNumeric swiss_thousep_decimal("123""\xe2\x80\xaf""456""\xe2\x80\xaf""789,123")); + GncNumeric swiss_thousep_decimal("123""\xe2\x80\xaf""456""\xe2\x80\xaf""789,123"); EXPECT_EQ(123456789123, swiss_thousep_decimal.num()); EXPECT_EQ(1000, swiss_thousep_decimal.denom()); GncNumeric from_scientific("1.234e4");