From e98d3534ced52b80994cb9e19b425c6662cfbcaa Mon Sep 17 00:00:00 2001 From: John Ralls Date: Fri, 23 Jun 2023 14:39:37 -0700 Subject: [PATCH] Bug 798901 - Wrong value for very small prices from Finance::Quote. Implement parsing number strings in scientific notation, avoiding interpreting hexadecimal integers that way, "e" being a hex digit. --- libgnucash/engine/gnc-numeric.cpp | 177 +++++++++++++------ libgnucash/engine/test/gtest-gnc-numeric.cpp | 12 ++ 2 files changed, 136 insertions(+), 53 deletions(-) diff --git a/libgnucash/engine/gnc-numeric.cpp b/libgnucash/engine/gnc-numeric.cpp index 1002ae83c2..6507e957a2 100644 --- a/libgnucash/engine/gnc-numeric.cpp +++ b/libgnucash/engine/gnc-numeric.cpp @@ -34,7 +34,9 @@ #include #include +#include #include +#include "gnc-int128.hpp" #include "qof.h" #include "gnc-numeric.hpp" @@ -113,14 +115,103 @@ GncNumeric::GncNumeric(double d) : m_num(0), m_den(1) } using boost::regex; -using boost::smatch; using boost::regex_search; -GncNumeric::GncNumeric(const std::string& str, bool autoround) +using boost::smatch; + + +static std::pair +reduce_number_pair(std::pairnum_pair, + const std::string& num_str, bool autoround) { + auto [n, d] = num_pair; + if (!autoround && n.isBig()) { + std::ostringstream errmsg; + errmsg << "Decimal string " << num_str + << "can't be represented in a GncNumeric without rounding."; + throw std::overflow_error(errmsg.str()); + } + while (n.isBig() && d > 0) { + n >>= 1; + d >>= 1; + } + if (n.isBig()) // Shouldn't happen, of course + { + std::ostringstream errmsg; + errmsg << "Decimal string " << num_str + << " can't be represented in a GncNumeric, even after reducing " + "denom to " + << d; + throw std::overflow_error(errmsg.str()); + } + return std::make_pair(static_cast(n), static_cast(d)); +} + +static std::pair +numeric_from_decimal_match(const std::string& integer, const std::string& decimal) +{ + auto neg = (!integer.empty() && integer[0] == '-'); + GncInt128 high((neg && integer.length() > 1) || (!neg && !integer.empty()) + ? stoll(integer) + : 0); + GncInt128 low{ decimal.empty() ? 0 : stoll(decimal)}; + auto exp10 = decimal.length(); + int64_t d = powten(exp10); + GncInt128 n = high * d + (neg ? -low : low); + while (exp10 > max_leg_digits) { + /* If the arg to powten is bigger than max_leg_digits + it returns 10**max_leg_digits so reduce exp10 by + that amount */ + n = n / powten(exp10 - max_leg_digits); + exp10 -= max_leg_digits; + } + return std::make_pair(n, d); +} + +static std::pair +numeric_from_scientific_match(smatch &m) +{ + int exp{m[4].matched ? stoi(m[4].str()) : 0}; + auto neg_exp{exp < 0}; + exp = neg_exp ? -exp : exp; + if (exp >= max_leg_digits) + { + std::ostringstream errmsg; + errmsg << "Exponent " << m[3].str() << " in match " << m[0].str() + << " exceeds range that GnuCash can parse."; + throw std::overflow_error(errmsg.str()); + } + + GncInt128 num, denom; + auto mult = powten(exp); + + if (m[1].matched) + { + denom = neg_exp ? mult : 1; + num = neg_exp ? stoll(m[1].str()) : mult * stoll(m[1].str()); + } + else + { + auto [d_num, d_denom] = numeric_from_decimal_match(m[2].str(), m[3].str()); + + if (neg_exp || d_denom > mult) + { + num = d_num; + denom = neg_exp ? d_denom * mult : d_denom / mult; + } + else + { + num = d_num * mult / d_denom; + denom = 1; + } + } + return std::make_pair(num, denom); +} + +GncNumeric::GncNumeric(const std::string &str, bool autoround) { static const std::string numer_frag("(-?[0-9]*)"); static const std::string denom_frag("([0-9]+)"); - static const std::string hex_frag("(0x[a-f0-9]+)"); - static const std::string slash( "[ \\t]*/[ \\t]*"); + static const std::string hex_frag("(0[xX][A-Fa-f0-9]+)"); + static const std::string slash("[ \\t]*/[ \\t]*"); /* The llvm standard C++ library refused to recognize the - in the * numer_frag pattern with the default ECMAScript syntax so we use the * awk syntax. @@ -132,33 +223,35 @@ GncNumeric::GncNumeric(const std::string& str, bool autoround) static const regex hex_over_num(hex_frag + slash + denom_frag); static const regex num_over_hex(numer_frag + slash + hex_frag); static const regex decimal(numer_frag + "[.,]" + denom_frag); - smatch m; -/* The order of testing the regexes is from the more restrictve to the less - * restrictive, as less-restrictive ones will match patterns that would also - * match the more-restrictive and so invoke the wrong construction. - */ + static const regex scientific("(?:(-?[0-9]+[.,]?)|(-?[0-9]*)[.,]([0-9]+))[Ee](-?[0-9]+)"); + static const regex has_hex_prefix(".*0[xX]$"); + smatch m, x; + /* The order of testing the regexes is from the more restrictve to the less + * restrictive, as less-restrictive ones will match patterns that would also + * match the more-restrictive and so invoke the wrong construction. + */ if (str.empty()) - throw std::invalid_argument("Can't construct a GncNumeric from an empty string."); + throw std::invalid_argument( + "Can't construct a GncNumeric from an empty string."); if (regex_search(str, m, hex_rational)) { GncNumeric n(stoll(m[1].str(), nullptr, 16), stoll(m[2].str(), nullptr, 16)); + m_num = n.num(); m_den = n.denom(); return; } if (regex_search(str, m, hex_over_num)) { - GncNumeric n(stoll(m[1].str(), nullptr, 16), - stoll(m[2].str())); + GncNumeric n(stoll(m[1].str(), nullptr, 16), stoll(m[2].str())); m_num = n.num(); m_den = n.denom(); return; } if (regex_search(str, m, num_over_hex)) { - GncNumeric n(stoll(m[1].str()), - stoll(m[2].str(), nullptr, 16)); + GncNumeric n(stoll(m[1].str()), stoll(m[2].str(), nullptr, 16)); m_num = n.num(); m_den = n.denom(); return; @@ -170,51 +263,29 @@ GncNumeric::GncNumeric(const std::string& str, bool autoround) m_den = n.denom(); return; } + if (regex_search(str, m, scientific) && ! regex_match(m.prefix().str(), x, has_hex_prefix)) + { + auto [num, denom] = + reduce_number_pair(numeric_from_scientific_match(m), + str, autoround); + m_num = num; + m_den = denom; + return; + } if (regex_search(str, m, decimal)) { - auto neg = (m[1].length() && m[1].str()[0] == '-'); - GncInt128 high((neg && m[1].length() > 1) || (!neg && m[1].length()) ? - stoll(m[1].str()) : 0); - GncInt128 low(stoll(m[2].str())); - auto exp10 = m[2].str().length(); - int64_t d = powten(exp10); - GncInt128 n = high * d + (neg ? -low : low); - while (exp10 > max_leg_digits) - { - /* If the arg to powten is bigger than max_leg_digits - it returns 10**max_leg_digits so reduce exp10 by - that amount */ - n = n / powten(exp10 - max_leg_digits); - exp10 -= max_leg_digits; - } - - if (!autoround && n.isBig()) - { - std::ostringstream errmsg; - errmsg << "Decimal string " << m[1].str() << "." << m[2].str() - << "can't be represented in a GncNumeric without rounding."; - throw std::overflow_error(errmsg.str()); - } - while (n.isBig() && d > 0) - { - n >>= 1; - d >>= 1; - } - if (n.isBig()) //Shouldn't happen, of course - { - std::ostringstream errmsg; - errmsg << "Decimal string " << m[1].str() << "." << m[2].str() - << " can't be represented in a GncNumeric, even after reducing denom to " << d; - throw std::overflow_error(errmsg.str()); - } - GncNumeric gncn(static_cast(n), d); - m_num = gncn.num(); - m_den = gncn.denom(); + std::string integer{m[1].matched ? m[1].str() : ""}; + std::string decimal{m[2].matched ? m[2].str() : ""}; + auto [num, denom] = + reduce_number_pair(numeric_from_decimal_match(integer, decimal), + str, autoround); + m_num = num; + m_den = denom; return; } if (regex_search(str, m, hex)) { - GncNumeric n(stoll(m[1].str(), nullptr, 16),INT64_C(1)); + GncNumeric n(stoll(m[1].str(), nullptr, 16), INT64_C(1)); m_num = n.num(); m_den = n.denom(); return; diff --git a/libgnucash/engine/test/gtest-gnc-numeric.cpp b/libgnucash/engine/test/gtest-gnc-numeric.cpp index 1eaf5b328c..8194359d1c 100644 --- a/libgnucash/engine/test/gtest-gnc-numeric.cpp +++ b/libgnucash/engine/test/gtest-gnc-numeric.cpp @@ -148,6 +148,18 @@ TEST(gncnumeric_constructors, test_string_constructor) GncNumeric neg_continental_decimal("-123,456"); EXPECT_EQ(-123456, neg_continental_decimal.num()); EXPECT_EQ(1000, neg_continental_decimal.denom()); + GncNumeric from_scientific("1.234e4"); + EXPECT_EQ(12340, from_scientific.num()); + EXPECT_EQ(1, from_scientific.denom()); + GncNumeric from_no_int_scientific(".234e4"); + EXPECT_EQ(2340, from_no_int_scientific.num()); + EXPECT_EQ(1, from_no_int_scientific.denom()); + GncNumeric from_int_only_scientific("1234e2"); + EXPECT_EQ(123400, from_int_only_scientific.num()); + EXPECT_EQ(1, from_int_only_scientific.denom()); + GncNumeric from_neg_sci("1.234e-2"); + EXPECT_EQ(1234, from_neg_sci.num()); + EXPECT_EQ(100000, from_neg_sci.denom()); ASSERT_NO_THROW(GncNumeric hex_rational("0x1e240/0x1c8")); GncNumeric hex_rational("0x1e240/0x1c8"); EXPECT_EQ(123456, hex_rational.num());