From 14e4aca3cdd4e1b39b235262bb1b085dcfd1e2fc Mon Sep 17 00:00:00 2001 From: Jon Schewe Date: Fri, 27 Feb 2026 16:57:21 -0600 Subject: [PATCH 1/3] Enhancing debugging output Enhance the debugging to make it easier to track why some transactions aren't considered possible matches. Uncomment debug statements so that users dont' need to build their own binary to debug. --- gnucash/import-export/import-backend.cpp | 48 ++++++++++++------- gnucash/import-export/import-main-matcher.cpp | 6 +-- 2 files changed, 35 insertions(+), 19 deletions(-) diff --git a/gnucash/import-export/import-backend.cpp b/gnucash/import-export/import-backend.cpp index 0cbc03282c..66eef88c4a 100644 --- a/gnucash/import-export/import-backend.cpp +++ b/gnucash/import-export/import-backend.cpp @@ -593,9 +593,9 @@ void split_find_match (GNCImportTransInfo * trans_info, /* Amount heuristics */ auto downloaded_split_amount = gnc_numeric_to_double (xaccSplitGetAmount(new_trans_fsplit)); - /*DEBUG(" downloaded_split_amount=%f", downloaded_split_amount);*/ + DEBUG(" downloaded_split_amount=%f", downloaded_split_amount); auto match_split_amount = gnc_numeric_to_double(xaccSplitGetAmount(split)); - /*DEBUG(" match_split_amount=%f", match_split_amount);*/ + DEBUG(" match_split_amount=%f", match_split_amount); if (fabs(downloaded_split_amount - match_split_amount) < 1e-6) /* bug#347791: Double type shouldn't be compared for exact equality, so we're using fabs() instead. */ @@ -605,7 +605,7 @@ void split_find_match (GNCImportTransInfo * trans_info, -- gnc_numeric_equal is an expensive function call */ { prob = prob + 3; - /*DEBUG("heuristics: probability + 3 (amount)");*/ + DEBUG("heuristics: probability + 3 (amount)"); } else if (fabs (downloaded_split_amount - match_split_amount) <= fuzzy_amount_difference) @@ -614,7 +614,7 @@ void split_find_match (GNCImportTransInfo * trans_info, So you withdraw 100$ and get charged 101,25$ in the same transaction */ prob = prob + 2; - /*DEBUG("heuristics: probability + 2 (amount)");*/ + DEBUG("heuristics: probability + 2 (amount)"); } else { @@ -622,7 +622,7 @@ void split_find_match (GNCImportTransInfo * trans_info, threshold, it's very unlikely to be the same transaction so we give it an extra -5 penalty */ prob = prob - 5; - /* DEBUG("heuristics: probability - 1 (amount)"); */ + DEBUG("heuristics: probability - 1 (amount)"); } /* Date heuristics */ @@ -634,23 +634,28 @@ void split_find_match (GNCImportTransInfo * trans_info, differences. Whatever. On the other hand, the difference calculation itself will work regardless of month/year turnarounds. */ - /*DEBUG("diff day %d", datediff_day);*/ + auto download_time_str = qof_print_date(download_time); + auto match_time_str = qof_print_date(match_time); + DEBUG("Date download: %s vs match: %s", download_time_str, match_time_str); + g_free (download_time_str); + g_free (match_time_str); + DEBUG("diff day %lld", datediff_day); if (datediff_day == 0) { prob = prob + 3; - /*DEBUG("heuristics: probability + 3 (date)");*/ + DEBUG("heuristics: probability + 3 (date)"); } else if (datediff_day <= date_threshold) { prob = prob + 2; - /*DEBUG("heuristics: probability + 2 (date)");*/ + DEBUG("heuristics: probability + 2 (date)"); } else if (datediff_day > date_not_threshold) { /* Extra penalty if that split lies awfully far away from the given one. */ prob = prob - 5; - /*DEBUG("heuristics: probability - 5 (date)"); */ + DEBUG("heuristics: probability - 5 (date)"); /* Changed 2005-02-21: Revert the hard-limiting behaviour back to the previous large penalty. (Changed 2004-11-27: The penalty is so high that we can forget about this @@ -684,7 +689,7 @@ void split_find_match (GNCImportTransInfo * trans_info, { /* An exact match of the Check number gives a +4 */ prob += 4; - /*DEBUG("heuristics: probability + 4 (Check number)");*/ + DEBUG("heuristics: probability + 4 (Check number)"); } else if (strlen(new_trans_str) > 0 && strlen(split_str) > 0) { @@ -702,7 +707,7 @@ void split_find_match (GNCImportTransInfo * trans_info, { /* An exact match of memo gives a +2 */ prob = prob + 2; - /* DEBUG("heuristics: probability + 2 (memo)"); */ + DEBUG("heuristics: probability + 2 (memo)"); } else if ((strncasecmp(memo, xaccSplitGetMemo(split), strlen(xaccSplitGetMemo(split)) / 2) == 0)) @@ -712,7 +717,7 @@ void split_find_match (GNCImportTransInfo * trans_info, number some banks seem to include in the memo but someone should write something more sophisticated */ prob = prob + 1; - /*DEBUG("heuristics: probability + 1 (memo)"); */ + DEBUG("heuristics: probability + 1 (memo)"); } } @@ -725,7 +730,7 @@ void split_find_match (GNCImportTransInfo * trans_info, { /*An exact match of Description gives a +2 */ prob = prob + 2; - /*DEBUG("heuristics: probability + 2 (description)");*/ + DEBUG("heuristics: probability + 2 (description)"); } else if ((strncasecmp(descr, xaccTransGetDescription (xaccSplitGetParent(split)), @@ -736,13 +741,15 @@ void split_find_match (GNCImportTransInfo * trans_info, number some banks seem to include in the description but someone should write something more sophisticated */ prob = prob + 1; - /*DEBUG("heuristics: probability + 1 (description)"); */ + DEBUG("heuristics: probability + 1 (description)"); } } /* Is the probability high enough? Otherwise do nothing and return. */ - if (prob < display_threshold) - return; + if (prob < display_threshold) { + DEBUG("below threshold: %d", prob); + return; + } /* The probability is high enough, so allocate an object here. Allocating it only when it's actually being used is @@ -758,6 +765,7 @@ void split_find_match (GNCImportTransInfo * trans_info, /* Append that to the list. Do not use g_list_append because it is slow. The list is sorted afterwards anyway. */ trans_info->match_list = g_list_prepend(trans_info->match_list, match_info); + DEBUG("Added to list of possible matches: %d", prob); } /*********************************************************************** @@ -1052,6 +1060,12 @@ gboolean gnc_import_exists_online_id (Transaction *trans, GHashTable* acct_id_ha } auto online_id_exists = g_hash_table_contains (online_id_hash, source_online_id); + if (online_id_exists) + { + auto date_str = qof_print_date(xaccTransGetDate(trans)); + DEBUG("Transaction with online ID %s already exists, date: %s", source_online_id, date_str); + g_free (date_str); + } g_free (source_online_id); return online_id_exists; } @@ -1141,6 +1155,8 @@ gnc_import_TransInfo_init_matches (GNCImportTransInfo *trans_info, if (trans_info->match_list) { + DEBUG("Number of matches %d", g_list_length(trans_info->match_list)); + trans_info->match_list = g_list_sort(trans_info->match_list, compare_probability); auto best_match = static_cast(g_list_nth_data(trans_info->match_list, 0)); diff --git a/gnucash/import-export/import-main-matcher.cpp b/gnucash/import-export/import-main-matcher.cpp index 49a2036d0f..f8fb60ea05 100644 --- a/gnucash/import-export/import-main-matcher.cpp +++ b/gnucash/import-export/import-main-matcher.cpp @@ -536,7 +536,7 @@ on_matcher_ok_clicked (GtkButton *button, GNCImportMainMatcher *info) { g_assert (info); - /* DEBUG ("Begin") */ + DEBUG ("Begin"); GtkTreeModel *model = gtk_tree_view_get_model (info->view); GtkTreeIter iter; @@ -596,7 +596,7 @@ on_matcher_ok_clicked (GtkButton *button, GNCImportMainMatcher *info) gnc_gen_trans_list_delete (info); - /* DEBUG ("End") */ + DEBUG ("End"); g_list_free_full (accounts_modified, (GDestroyNotify)xaccAccountCommitEdit); /* Allow GUI refresh again upon commit completion. */ @@ -2268,7 +2268,7 @@ gnc_gen_trans_list_add_trans_internal (GNCImportMainMatcher *gui, Transaction *t { /* If it does, abort the process for this transaction, since it is already in the system. */ - DEBUG("%s", "Transaction with same online ID exists, destroying current transaction"); + DEBUG("Transaction with online ID exists, destroying current transaction"); xaccTransDestroy(trans); xaccTransCommitEdit(trans); return; From a44249c17ab6362612074ef9ef6a1485a5352737 Mon Sep 17 00:00:00 2001 From: Jon Schewe Date: Fri, 27 Feb 2026 16:59:52 -0600 Subject: [PATCH 2/3] Add null checking to string fields Check that strings aren't null before using them. Factor out access to strings to avoid extra calls to the xacc* methods. --- gnucash/import-export/import-backend.cpp | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/gnucash/import-export/import-backend.cpp b/gnucash/import-export/import-backend.cpp index 66eef88c4a..8f127eac39 100644 --- a/gnucash/import-export/import-backend.cpp +++ b/gnucash/import-export/import-backend.cpp @@ -667,7 +667,9 @@ void split_find_match (GNCImportTransInfo * trans_info, /* Check number heuristics */ auto new_trans_str = gnc_get_num_action(new_trans, new_trans_fsplit); - if (new_trans_str && *new_trans_str) + auto split_str = gnc_get_num_action (xaccSplitGetParent (split), split); + DEBUG("number download: '%s' to match: '%s'", new_trans_str, split_str); + if (new_trans_str && *new_trans_str && split_str && *split_str) { char *endptr; auto conversion_ok = true; @@ -679,7 +681,6 @@ void split_find_match (GNCImportTransInfo * trans_info, numbers on string and string empty */ conversion_ok = !(errno || endptr == new_trans_str); - auto split_str = gnc_get_num_action (xaccSplitGetParent (split), split); errno = 0; auto split_number = strtol(split_str, &endptr, 10); conversion_ok = !(errno || endptr == split_str); @@ -701,9 +702,11 @@ void split_find_match (GNCImportTransInfo * trans_info, /* Memo heuristics */ auto memo = xaccSplitGetMemo(new_trans_fsplit); - if (memo && *memo) + auto match_memo = xaccSplitGetMemo(split); + if (memo && *memo && match_memo && *match_memo) { - if (safe_strcasecmp(memo, xaccSplitGetMemo(split)) == 0) + DEBUG("memo download: '%s' to match: '%s'", memo, match_memo); + if (safe_strcasecmp(memo, match_memo) == 0) { /* An exact match of memo gives a +2 */ prob = prob + 2; @@ -723,10 +726,11 @@ void split_find_match (GNCImportTransInfo * trans_info, /* Description heuristics */ auto descr = xaccTransGetDescription(new_trans); - if (descr && *descr) + auto match_descr = xaccTransGetDescription(xaccSplitGetParent(split)); + if (descr && *descr && match_descr && *match_descr) { - if (safe_strcasecmp(descr, - xaccTransGetDescription(xaccSplitGetParent(split))) == 0) + DEBUG("description: download: '%s' to match: '%s'", descr, match_descr); + if (safe_strcasecmp(descr, match_descr) == 0) { /*An exact match of Description gives a +2 */ prob = prob + 2; From 563c186c99f87e57aaef6089d4ef3671eb753795 Mon Sep 17 00:00:00 2001 From: Jon Schewe Date: Fri, 27 Feb 2026 17:22:39 -0600 Subject: [PATCH 3/3] [Bug 799745] - Import matcher doesn't handle zero length memo and description properly When doing a fuzzy check on strings, check half the length of the longest string and only check if both strings are longer than 1 character. --- gnucash/import-export/import-backend.cpp | 45 +++++++++++++++--------- 1 file changed, 28 insertions(+), 17 deletions(-) diff --git a/gnucash/import-export/import-backend.cpp b/gnucash/import-export/import-backend.cpp index 8f127eac39..2028645597 100644 --- a/gnucash/import-export/import-backend.cpp +++ b/gnucash/import-export/import-backend.cpp @@ -712,15 +712,21 @@ void split_find_match (GNCImportTransInfo * trans_info, prob = prob + 2; DEBUG("heuristics: probability + 2 (memo)"); } - else if ((strncasecmp(memo, xaccSplitGetMemo(split), - strlen(xaccSplitGetMemo(split)) / 2) == 0)) + else { - /* Very primitive fuzzy match worth +1. This matches the - first 50% of the strings to skip annoying transaction - number some banks seem to include in the memo but someone - should write something more sophisticated */ - prob = prob + 1; - DEBUG("heuristics: probability + 1 (memo)"); + const size_t match_memo_len = strlen(match_memo); + const size_t memo_len = strlen(memo); + const size_t max_memo_len = match_memo_len > memo_len ? match_memo_len : memo_len; + if (match_memo_len > 1 && memo_len > 1 + && (strncasecmp(memo, match_memo, max_memo_len / 2) == 0)) + { + /* Very primitive fuzzy match worth +1. This matches up to the + first 50% of the longest string to skip annoying transaction + number some banks seem to include in the memo but someone + should write something more sophisticated */ + prob = prob + 1; + DEBUG("heuristics: probability + 1 (memo)"); + } } } @@ -736,16 +742,21 @@ void split_find_match (GNCImportTransInfo * trans_info, prob = prob + 2; DEBUG("heuristics: probability + 2 (description)"); } - else if ((strncasecmp(descr, - xaccTransGetDescription (xaccSplitGetParent(split)), - strlen(xaccTransGetDescription (new_trans)) / 2) == 0)) + else { - /* Very primitive fuzzy match worth +1. This matches the - first 50% of the strings to skip annoying transaction - number some banks seem to include in the description but someone - should write something more sophisticated */ - prob = prob + 1; - DEBUG("heuristics: probability + 1 (description)"); + const size_t match_descr_len = strlen(match_descr); + const size_t descr_len = strlen(descr); + const size_t max_descr_len = match_descr_len > descr_len ? match_descr_len : descr_len; + if (match_descr_len > 1 && descr_len > 1 + && (strncasecmp(descr, match_descr, max_descr_len / 2) == 0)) + { + /* Very primitive fuzzy match worth +1. This matches up to the + first 50% of the longest string to skip annoying transaction + number some banks seem to include in the description but someone + should write something more sophisticated */ + prob = prob + 1; + DEBUG("heuristics: probability + 1 (description)"); + } } }