From bddc64c57e116d8738f5e527e486e406002e6e16 Mon Sep 17 00:00:00 2001 From: Sherlock <119709043+agwekixj@users.noreply.github.com> Date: Wed, 17 Sep 2025 11:24:43 -0700 Subject: [PATCH] Bug 799662 - Importing accounts from UTF8 csv file does not work correctly Modifies the imports of Accounts from CSV, Bills & Invoices, and Customer & Vendors, to only call g_locale_to_utf8() if the file is determined to be not XML UTF-8 compliant. Removes the potential XML-prohibited codepoints. --- .../import-export/bi-import/dialog-bi-import.c | 17 ++++++++++++----- .../import-export/csv-imp/csv-account-import.c | 18 ++++++++++++++++-- .../customer-import/dialog-customer-import.c | 17 ++++++++++++----- 3 files changed, 40 insertions(+), 12 deletions(-) diff --git a/gnucash/import-export/bi-import/dialog-bi-import.c b/gnucash/import-export/bi-import/dialog-bi-import.c index 5a382028ba..f29743b933 100644 --- a/gnucash/import-export/bi-import/dialog-bi-import.c +++ b/gnucash/import-export/bi-import/dialog-bi-import.c @@ -36,6 +36,7 @@ #include #include +#include "gnc-glib-utils.h" #include "gnc-date.h" #include "gnc-ui.h" #include "gnc-ui-util.h" @@ -170,8 +171,15 @@ gnc_bi_import_read_file (const gchar * filename, const gchar * parser_regexp, if ((l > 0) && (line[l - 1] == '\n')) line[l - 1] = 0; - // convert line from locale into utf8 - line_utf8 = g_locale_to_utf8 (line, -1, NULL, NULL, NULL); + // if the line doesn't conform to UTF-8, try a default charcter set + // conversion based on locale + if (g_utf8_validate(line, -1, NULL)) + line_utf8 = line; + else + line_utf8 = g_locale_to_utf8 (line, -1, NULL, NULL, NULL); + + // Remove the potential XML-prohibited codepoints from the UTF-8 compliant string + gnc_utf8_strip_invalid(line_utf8); // parse the line match_info = NULL; // it seems, that in contrast to documentation, match_info is not always set -> g_match_info_free will segfault @@ -216,9 +224,8 @@ gnc_bi_import_read_file (const gchar * filename, const gchar * parser_regexp, } g_match_info_free (match_info); - match_info = 0; - g_free (line_utf8); - line_utf8 = 0; + if (line_utf8 != line) + g_free (line_utf8); } g_free (line); line = 0; diff --git a/gnucash/import-export/csv-imp/csv-account-import.c b/gnucash/import-export/csv-imp/csv-account-import.c index 867cf8c479..f575fee7dd 100644 --- a/gnucash/import-export/csv-imp/csv-account-import.c +++ b/gnucash/import-export/csv-imp/csv-account-import.c @@ -30,6 +30,7 @@ #include #include +#include "gnc-glib-utils.h" #include "gnc-ui-util.h" #include #include "Account.h" @@ -98,8 +99,21 @@ csv_import_read_file (GtkWindow *window, const gchar *filename, return RESULT_OPEN_FAILED; } - contents = g_locale_to_utf8 (locale_cont, -1, NULL, NULL, NULL); - g_free (locale_cont); + // if the contents don't conform to UTF-8, try a default charcter set + // conversion based on locale + if (g_utf8_validate(locale_cont, -1, NULL)) + { + contents = locale_cont; + } + else + { + contents = g_locale_to_utf8 (locale_cont, -1, NULL, NULL, NULL); + g_free (locale_cont); + } + + // Remove the potential XML-prohibited codepoints from the UTF-8 compliant content + gnc_utf8_strip_invalid(contents); + // compile the regular expression and check for errors err = NULL; diff --git a/gnucash/import-export/customer-import/dialog-customer-import.c b/gnucash/import-export/customer-import/dialog-customer-import.c index 793ae5ae06..2991be1756 100644 --- a/gnucash/import-export/customer-import/dialog-customer-import.c +++ b/gnucash/import-export/customer-import/dialog-customer-import.c @@ -34,6 +34,7 @@ #include #include +#include "gnc-glib-utils.h" #include "gnc-ui.h" #include "gnc-ui-util.h" #include "gnc-gui-query.h" @@ -135,8 +136,15 @@ gnc_customer_import_read_file (const gchar *filename, const gchar *parser_regexp if ((l > 0) && (line[l - 1] == '\n')) line[l - 1] = 0; - // convert line from locale into utf8 - line_utf8 = g_locale_to_utf8 (line, -1, NULL, NULL, NULL); + // if the line doesn't conform to UTF-8, try a default charcter set + // conversion based on locale + if (g_utf8_validate(line, -1, NULL)) + line_utf8 = line; + else + line_utf8 = g_locale_to_utf8 (line, -1, NULL, NULL, NULL); + + // Remove the potential XML-prohibited codepoints from the UTF-8 compliant string + gnc_utf8_strip_invalid(line_utf8); // parse the line match_info = NULL; // it seems, that in contrast to documentation, match_info is not always set -> g_match_info_free will segfault @@ -176,9 +184,8 @@ gnc_customer_import_read_file (const gchar *filename, const gchar *parser_regexp } g_match_info_free (match_info); - match_info = 0; - g_free (line_utf8); - line_utf8 = 0; + if (line_utf8 != line) + g_free (line_utf8); } g_free (line); line = 0;