Improve GncNumeric(std::string) to handle unicode spaces.

pull/2061/head
John Ralls 1 year ago
parent 1fa4f71f59
commit 0e15e12ea9

@ -31,6 +31,7 @@
#include <cstdint>
#include <sstream>
#include <boost/regex.hpp>
#include <boost/regex/icu.hpp>
#include <boost/locale/encoding_utf.hpp>
#include <config.h>
@ -118,7 +119,9 @@ GncNumeric::GncNumeric(double d) : m_num(0), m_den(1)
}
using boost::regex;
using boost::u32regex;
using boost::regex_search;
using boost::u32regex_search;
using boost::smatch;
@ -251,7 +254,7 @@ GncNumeric::GncNumeric(const std::string &str, bool autoround) {
static const std::string opt_signed_int("(-?[0-9]*)");
static const std::string opt_signed_separated_int("(-?[0-9]{1,3})");
static const std::string unsigned_int("([0-9]+)");
static const std::string eu_separated_int("(?:[\\s'.]([0-9]{3}))?");
static const std::string eu_separated_int("(?:[[:space:]'.]([0-9]{3}))?");
static const std::string en_separated_int("(?:\\,([0-9]{3}))?");
static const std::string eu_decimal_part("(?:\\,([0-9]+))?");
static const std::string en_decimal_part("(?:\\.([0-9]+))?");
@ -272,7 +275,8 @@ GncNumeric::GncNumeric(const std::string &str, bool autoround) {
static const regex hex_over_num(begin + hex_frag + slash + unsigned_int + end);
static const regex num_over_hex(begin + opt_signed_int + slash + hex_frag + end);
static const regex decimal(begin + opt_signed_int + "[.,]" + unsigned_int + end);
static const regex sep_decimal(begin + begin_group + eu_sep_decimal + or_op + en_sep_decimal + end_group + end);
static const u32regex sep_decimal =
boost::make_u32regex(begin + begin_group + eu_sep_decimal + or_op + en_sep_decimal + end_group + end);
static const regex scientific("(?:(-?[0-9]+[.,]?)|(-?[0-9]*)[.,]([0-9]+))[Ee](-?[0-9]+)");
static const regex has_hex_prefix(".*0[xX]$");
smatch m, x;
@ -345,7 +349,7 @@ GncNumeric::GncNumeric(const std::string &str, bool autoround) {
m_den = denom;
return;
}
if (regex_search(str, m, sep_decimal))
if (u32regex_search(str, m, sep_decimal))
{
/* There's a bit of magic here because of the complexity of
* the regex. It supports two formats, one for locales that

@ -156,8 +156,8 @@ TEST(gncnumeric_constructors, test_string_constructor)
GncNumeric neg_continental_decimal("-123,456");
EXPECT_EQ(-123456, neg_continental_decimal.num());
EXPECT_EQ(1000, neg_continental_decimal.denom());
ASSERT_NO_THROW(GncNumeric swiss_thousep_decimal("123 456 789,123"));
GncNumeric swiss_thousep_decimal("123 456 789,123");
ASSERT_NO_THROW(GncNumeric swiss_thousep_decimal("123""\xe2\x80\xaf""456""\xe2\x80\xaf""789,123"));
GncNumeric swiss_thousep_decimal("123""\xe2\x80\xaf""456""\xe2\x80\xaf""789,123");
EXPECT_EQ(123456789123, swiss_thousep_decimal.num());
EXPECT_EQ(1000, swiss_thousep_decimal.denom());
GncNumeric from_scientific("1.234e4");

Loading…
Cancel
Save