From fbf4843f31b69eb588d6b47978aa1b8b2265324d Mon Sep 17 00:00:00 2001 From: lmat Date: Sat, 9 Dec 2017 19:49:03 -0800 Subject: [PATCH] Changed bayes import map design This commit introduces a new feature flag: GNC_FEATURE_GUID_FLAT_BAYESIAN. It signifies that the bayes import map data are stored flat and by guid. Any time bayes import map data are accessed, they are converted if necessary. --- gnucash/gnome-utils/gnc-file.c | 6 - libgnucash/engine/Account.cpp | 304 ++++++++++---------- libgnucash/engine/Account.h | 5 - libgnucash/engine/gnc-features.c | 27 ++ libgnucash/engine/gnc-features.h | 9 +- libgnucash/engine/kvp-frame.cpp | 9 +- libgnucash/engine/qofbook.cpp | 2 +- libgnucash/engine/test/gtest-import-map.cpp | 36 +-- 8 files changed, 207 insertions(+), 191 deletions(-) diff --git a/gnucash/gnome-utils/gnc-file.c b/gnucash/gnome-utils/gnc-file.c index 12d5e59764..aa88b2865a 100644 --- a/gnucash/gnome-utils/gnc-file.c +++ b/gnucash/gnome-utils/gnc-file.c @@ -1022,12 +1022,6 @@ RESTART: gnc_warning_dialog(NULL, "%s", message); g_free ( message ); } - - // Convert imap mappings from account full name to guid strings - qof_event_suspend(); - gnc_account_imap_convert_bayes (gnc_get_current_book()); - qof_event_resume(); - return TRUE; } diff --git a/libgnucash/engine/Account.cpp b/libgnucash/engine/Account.cpp index 67692b3145..f04c43a693 100644 --- a/libgnucash/engine/Account.cpp +++ b/libgnucash/engine/Account.cpp @@ -5270,6 +5270,160 @@ get_first_pass_probabilities(GncImportMatchMap * imap, GList * tokens) return ret; } +static std::string +look_for_old_separator_descendants (Account *root, std::string const & full_name, const gchar *separator) +{ + GList *top_accounts, *ptr; + gint found_len = 0; + gchar found_sep; + top_accounts = gnc_account_get_descendants (root); + PINFO("Incoming full_name is '%s', current separator is '%s'", full_name.c_str (), separator); + /* Go through list of top level accounts */ + for (ptr = top_accounts; ptr; ptr = g_list_next (ptr)) + { + const gchar *name = xaccAccountGetName (static_cast (ptr->data)); + // we are looking for the longest top level account that matches + if (g_str_has_prefix (full_name.c_str (), name)) + { + gint name_len = strlen (name); + const gchar old_sep = full_name[name_len]; + if (!g_ascii_isalnum (old_sep)) // test for non alpha numeric + { + if (name_len > found_len) + { + found_sep = full_name[name_len]; + found_len = name_len; + } + } + } + } + g_list_free (top_accounts); // Free the List + std::string new_name {full_name}; + if (found_len > 1) + std::replace (new_name.begin (), new_name.end (), found_sep, *separator); + PINFO ("Return full_name is '%s'", new_name.c_str ()); + return new_name; +} + +static std::string +get_guid_from_account_name (Account * root, std::string const & name) +{ + auto map_account = gnc_account_lookup_by_full_name (root, name.c_str ()); + if (!map_account) + { + auto temp_account_name = look_for_old_separator_descendants (root, name, + gnc_get_account_separator_string ()); + map_account = gnc_account_lookup_by_full_name (root, temp_account_name.c_str ()); + } + auto temp_guid = gnc::GUID {*xaccAccountGetGUID (map_account)}; + return temp_guid.to_string (); +} + +static FlatKvpEntry +convert_entry (KvpEntry entry, Account* root) +{ + /*We need to make a copy here.*/ + auto account_name = entry.first.back(); + if (!gnc::GUID::is_valid_guid (account_name)) + { + /* Earlier version stored the account name in the import map, and + * there were early beta versions of 2.7 that stored a GUID. + * If there is no GUID, we assume it's an account name. */ + /* Take off the account name and replace it with the GUID */ + entry.first.pop_back(); + auto guid_str = get_guid_from_account_name (root, account_name); + entry.first.emplace_back (guid_str); + } + std::string new_key {std::accumulate (entry.first.begin(), entry.first.end(), std::string {})}; + new_key = IMAP_FRAME_BAYES + new_key; + return {new_key, entry.second}; +} + +static std::vector +get_new_flat_imap (Account * acc) +{ + auto frame = qof_instance_get_slots (QOF_INSTANCE (acc)); + auto slot = frame->get_slot ({IMAP_FRAME_BAYES}); + if (!slot) + return {}; + auto imap_frame = slot->get (); + auto flat_kvp = imap_frame->flatten_kvp (); + auto root = gnc_account_get_root (acc); + std::vector ret; + for (auto const & flat_entry : flat_kvp) + { + auto converted_entry = convert_entry (flat_entry, root); + /*If the entry was invalid, we don't perpetuate it.*/ + if (converted_entry.first.size()) + ret.emplace_back (converted_entry); + } + return ret; +} + +static bool +convert_imap_account_bayes_to_flat (Account *acc) +{ + auto frame = qof_instance_get_slots (QOF_INSTANCE (acc)); + if (!frame->get_keys().size()) + return false; + auto new_imap = get_new_flat_imap(acc); + xaccAccountBeginEdit(acc); + frame->set({IMAP_FRAME_BAYES}, nullptr); + if (!new_imap.size ()) + { + xaccAccountCommitEdit(acc); + return false; + } + std::for_each(new_imap.begin(), new_imap.end(), [&frame] (FlatKvpEntry const & entry) { + frame->set({entry.first.c_str()}, entry.second); + }); + qof_instance_set_dirty (QOF_INSTANCE (acc)); + xaccAccountCommitEdit(acc); + return true; +} + +/* + * Checks for import map data and converts them when found. + */ +static bool +imap_convert_bayes_to_flat (QofBook * book) +{ + auto root = gnc_book_get_root_account (book); + auto accts = gnc_account_get_descendants_sorted (root); + bool ret = false; + for (auto ptr = accts; ptr; ptr = g_list_next (ptr)) + { + Account *acc = static_cast (ptr->data); + if (convert_imap_account_bayes_to_flat (acc)) + { + ret = true; + gnc_features_set_used (book, GNC_FEATURE_GUID_FLAT_BAYESIAN); + } + } + g_list_free (accts); + return ret; +} + +/* + * Here we check to see the state of import map data. + * + * If the GUID_FLAT_BAYESIAN feature flag is set, everything + * should be fine. + * + * If it is not set, there are two possibilities: import data + * are present from a previous version or not. If they are, + * they are converted, and the feature flag set. If there are + * no previous data, nothing is done. + */ +static void +check_import_map_data (QofBook *book) +{ + if (gnc_features_check_used (book, GNC_FEATURE_GUID_FLAT_BAYESIAN)) + return; + /* This function will set GNC_FEATURE_GUID_FLAT_BAYESIAN if necessary.*/ + imap_convert_bayes_to_flat (book); +} + static constexpr double threshold = .90 * probability_factor; /* 90% */ /** Look up an Account in the map */ @@ -5278,6 +5432,7 @@ gnc_account_imap_find_account_bayes (GncImportMatchMap *imap, GList *tokens) { if (!imap) return nullptr; + check_import_map_data (imap->book); auto first_pass = get_first_pass_probabilities(imap, tokens); if (!first_pass.size()) return nullptr; @@ -5330,7 +5485,7 @@ change_imap_entry (GncImportMatchMap *imap, std::string const & path, int64_t to // Add or Update the entry based on guid qof_instance_set_path_kvp (QOF_INSTANCE (imap->acc), &value, {path}); - gnc_features_set_used (imap->book, GNC_FEATURE_GUID_BAYESIAN); + gnc_features_set_used (imap->book, GNC_FEATURE_GUID_FLAT_BAYESIAN); } /** Updates the imap for a given account using a list of tokens */ @@ -5350,6 +5505,7 @@ gnc_account_imap_add_account_bayes (GncImportMatchMap *imap, LEAVE(" "); return; } + check_import_map_data (imap->book); g_return_if_fail (acc != NULL); account_fullname = gnc_account_get_full_name(acc); @@ -5462,6 +5618,7 @@ build_bayes (const char *key, KvpValue * value, GncImapInfo & imapInfo) GList * gnc_account_imap_get_info_bayes (Account *acc) { + check_import_map_data (gnc_account_get_book (acc)); /* A dummy object which is used to hold the specified account, and the list * of data about which we care. */ GncImapInfo imapInfo {acc, nullptr}; @@ -5533,151 +5690,6 @@ gnc_account_delete_map_entry (Account *acc, char *full_category, gboolean empty) g_free (full_category); } -/*******************************************************************************/ - -static std::string -look_for_old_separator_descendants (Account *root, std::string const & full_name, const gchar *separator) -{ - GList *top_accounts, *ptr; - gint found_len = 0; - gchar found_sep; - top_accounts = gnc_account_get_descendants (root); - PINFO("Incoming full_name is '%s', current separator is '%s'", full_name.c_str (), separator); - /* Go through list of top level accounts */ - for (ptr = top_accounts; ptr; ptr = g_list_next (ptr)) - { - const gchar *name = xaccAccountGetName (static_cast (ptr->data)); - // we are looking for the longest top level account that matches - if (g_str_has_prefix (full_name.c_str (), name)) - { - gint name_len = strlen (name); - const gchar old_sep = full_name[name_len]; - if (!g_ascii_isalnum (old_sep)) // test for non alpha numeric - { - if (name_len > found_len) - { - found_sep = full_name[name_len]; - found_len = name_len; - } - } - } - } - g_list_free (top_accounts); // Free the List - std::string new_name {full_name}; - if (found_len > 1) - std::replace (new_name.begin (), new_name.end (), found_sep, *separator); - PINFO("Return full_name is '%s'", new_name); - return new_name; -} - -static std::string -get_guid_from_account_name (Account * root, std::string const & name) -{ - auto map_account = gnc_account_lookup_by_full_name (root, name.c_str ()); - if (!map_account) - { - auto temp_account_name = look_for_old_separator_descendants (root, name, - gnc_get_account_separator_string ()); - map_account = gnc_account_lookup_by_full_name (root, temp_account_name.c_str ()); - } - auto temp_guid = gnc::GUID {*xaccAccountGetGUID (map_account)}; - return temp_guid.to_string (); -} - -static FlatKvpEntry -convert_entry (KvpEntry entry, Account* root) -{ - /*We need to make a copy here.*/ - auto account_name = entry.first.back(); - entry.first.pop_back(); - auto guid_str = get_guid_from_account_name (root, account_name); - entry.first.emplace_back ("/"); - entry.first.emplace_back (guid_str); - std::string new_key {std::accumulate (entry.first.begin(), entry.first.end(), std::string {})}; - new_key = IMAP_FRAME_BAYES + new_key; - return {new_key, entry.second}; -} - -static std::vector -get_new_guid_imap (Account * acc) -{ - auto frame = qof_instance_get_slots (QOF_INSTANCE (acc)); - auto slot = frame->get_slot ({IMAP_FRAME_BAYES}); - if (!slot) - return {}; - auto imap_frame = slot->get (); - auto flat_kvp = imap_frame->flatten_kvp (); - auto root = gnc_account_get_root (acc); - std::vector ret; - for (auto const & flat_entry : flat_kvp) - ret.emplace_back (convert_entry (flat_entry, root)); - return ret; -} - -static bool -convert_imap_account_bayes_to_guid (Account *acc) -{ - auto frame = qof_instance_get_slots (QOF_INSTANCE (acc)); - if (!frame->get_keys().size()) - return false; - auto new_imap = get_new_guid_imap(acc); - xaccAccountBeginEdit(acc); - frame->set({IMAP_FRAME_BAYES}, nullptr); - if (!new_imap.size ()) - { - xaccAccountCommitEdit(acc); - return false; - } - std::for_each(new_imap.begin(), new_imap.end(), [&frame] (FlatKvpEntry const & entry) { - frame->set({entry.first.c_str()}, entry.second); - }); - qof_instance_set_dirty (QOF_INSTANCE (acc)); - xaccAccountCommitEdit(acc); - return true; -} - -char const * run_once_key_to_guid {"changed-bayesian-to-guid"}; - -static void -imap_convert_bayes_to_guid (QofBook * book) -{ - auto root = gnc_book_get_root_account (book); - auto accts = gnc_account_get_descendants_sorted (root); - for (auto ptr = accts; ptr; ptr = g_list_next (ptr)) - { - Account *acc = static_cast (ptr->data); - if (convert_imap_account_bayes_to_guid (acc)) - gnc_features_set_used (book, GNC_FEATURE_GUID_BAYESIAN); - } - g_list_free (accts); -} - -static bool -run_once_key_set (char const * key, QofBook * book) -{ - GValue value G_VALUE_INIT; - qof_instance_get_path_kvp (QOF_INSTANCE(book), &value, {key}); - return G_VALUE_HOLDS_STRING(&value) && strcmp(g_value_get_string(&value), "true"); -} - -static void -set_run_once_key (char const * key, QofBook * book) -{ - GValue value G_VALUE_INIT; - g_value_init(&value, G_TYPE_BOOLEAN); - g_value_set_boolean(&value, TRUE); - qof_instance_set_path_kvp(QOF_INSTANCE (book), &value, {key}); -} - -void -gnc_account_imap_convert_bayes (QofBook *book) -{ - if (run_once_key_set (run_once_key_to_guid, book)) - return; - imap_convert_bayes_to_guid (book); - set_run_once_key (run_once_key_to_guid, book); -} - /* ================================================================ */ /* QofObject function implementation and registration */ diff --git a/libgnucash/engine/Account.h b/libgnucash/engine/Account.h index 2f80632f87..477d406af3 100644 --- a/libgnucash/engine/Account.h +++ b/libgnucash/engine/Account.h @@ -1449,11 +1449,6 @@ gchar *gnc_account_get_map_entry (Account *acc, const char *full_category); */ void gnc_account_delete_map_entry (Account *acc, char *full_category, gboolean empty); -/** Search for Bayesian entries with mappings based on full account name and change - * them to be based on the account guid - */ -void gnc_account_imap_convert_bayes (QofBook *book); - /** @} */ diff --git a/libgnucash/engine/gnc-features.c b/libgnucash/engine/gnc-features.c index f125602b06..0c58badfc8 100644 --- a/libgnucash/engine/gnc-features.c +++ b/libgnucash/engine/gnc-features.c @@ -46,6 +46,7 @@ static gncFeature known_features[] = { GNC_FEATURE_KVP_EXTRA_DATA, "Extra data for addresses, jobs or invoice entries (requires at least GnuCash 2.6.4)" }, { GNC_FEATURE_BOOK_CURRENCY, "User specifies a 'book-currency'; costs of other currencies/commodities tracked in terms of book-currency (requires at least GnuCash 2.7.0)" }, { GNC_FEATURE_GUID_BAYESIAN, "Use account GUID as key for Bayesian data (requires at least GnuCash 2.6.12)" }, + { GNC_FEATURE_GUID_FLAT_BAYESIAN, "Use account GUID as key for bayesian data and store KVP flat" }, { NULL }, }; @@ -148,6 +149,32 @@ void gnc_features_set_used (QofBook *book, const gchar *feature) } qof_book_set_feature (book, feature, description); +} +struct CheckFeature +{ + gchar const * checked_feature; + gboolean found; +}; +static void gnc_features_check_feature_cb (gpointer pkey, gpointer value, + gpointer data) +{ + const gchar *key = (const gchar*)pkey; + struct CheckFeature * check_data = data; + g_assert(data); + if (!g_strcmp0 (key, check_data->checked_feature)) + check_data->found = TRUE; } + +gboolean gnc_features_check_used (QofBook *book, const gchar * feature) +{ + GHashTable *features_used = qof_book_get_features (book); + struct CheckFeature check_data = {feature, FALSE}; + /* Setup the known_features hash table */ + gnc_features_init(); + g_hash_table_foreach (features_used, &gnc_features_check_feature_cb, &check_data); + g_hash_table_unref (features_used); + return check_data.found; +} + diff --git a/libgnucash/engine/gnc-features.h b/libgnucash/engine/gnc-features.h index feefe8d079..2beca420ed 100644 --- a/libgnucash/engine/gnc-features.h +++ b/libgnucash/engine/gnc-features.h @@ -50,6 +50,7 @@ extern "C" { #define GNC_FEATURE_KVP_EXTRA_DATA "Extra data in addresses, jobs or invoice entries" #define GNC_FEATURE_BOOK_CURRENCY "Use a Book-Currency" #define GNC_FEATURE_GUID_BAYESIAN "Account GUID based Bayesian data" +#define GNC_FEATURE_GUID_FLAT_BAYESIAN "Account GUID based bayesian with flat KVP" /** @} */ @@ -68,10 +69,14 @@ gchar *gnc_features_test_unknown (QofBook *book); */ void gnc_features_set_used (QofBook *book, const gchar *feature); +/* + * Returns true if the specified feature is used. + */ +gboolean gnc_features_check_used (QofBook *, char const * feature); + #ifdef __cplusplus } /* extern "C" */ -#endif - +#endif /*__cplusplus*/ #endif /* GNC_FEATURES_H */ /** @} */ /** @} */ diff --git a/libgnucash/engine/kvp-frame.cpp b/libgnucash/engine/kvp-frame.cpp index 6f0fc4ad32..3d09e2fe56 100644 --- a/libgnucash/engine/kvp-frame.cpp +++ b/libgnucash/engine/kvp-frame.cpp @@ -427,16 +427,15 @@ KvpFrame::flatten_kvp_impl(std::vector path, std::vector new_path {path}; + new_path.push_back("/"); if (entry.second->get_type() == KvpValue::Type::FRAME) { - std::vector send_path {path}; - send_path.push_back("/"); - send_path.push_back(entry.first); - entry.second->get()->flatten_kvp_impl(send_path, entries); + new_path.push_back(entry.first); + entry.second->get()->flatten_kvp_impl(new_path, entries); } else { - std::vector new_path {path}; new_path.emplace_back (entry.first); entries.emplace_back (new_path, entry.second); } diff --git a/libgnucash/engine/qofbook.cpp b/libgnucash/engine/qofbook.cpp index 82f998ed79..15d80209ec 100644 --- a/libgnucash/engine/qofbook.cpp +++ b/libgnucash/engine/qofbook.cpp @@ -1076,7 +1076,7 @@ static void add_feature_to_hash (const gchar *key, KvpValue *value, GHashTable * user_data) { gchar *descr = g_strdup(value->get()); - g_hash_table_insert (*(GHashTable**)user_data, (gchar*)key, descr); + g_hash_table_insert (user_data, (gchar*)key, descr); } GHashTable * diff --git a/libgnucash/engine/test/gtest-import-map.cpp b/libgnucash/engine/test/gtest-import-map.cpp index df72445db7..4f6c7cde7c 100644 --- a/libgnucash/engine/test/gtest-import-map.cpp +++ b/libgnucash/engine/test/gtest-import-map.cpp @@ -318,13 +318,8 @@ TEST_F(ImapBayesTest, AddAccountBayes) EXPECT_EQ(2, value->get()); } -TEST_F(ImapBayesTest, ConvertAccountBayes) +TEST_F(ImapBayesTest, ConvertBayesData) { - // prevent the embedded beginedit/committedit from doing anything - qof_instance_increase_editlevel(QOF_INSTANCE(t_bank_account)); - qof_instance_mark_clean(QOF_INSTANCE(t_bank_account)); - gnc_account_imap_add_account_bayes(t_imap, t_list1, t_expense_account1); //Food - gnc_account_imap_add_account_bayes(t_imap, t_list2, t_expense_account2); //Drink auto root = qof_instance_get_slots(QOF_INSTANCE(t_bank_account)); auto book = qof_instance_get_slots(QOF_INSTANCE(t_imap->book)); auto acct1_guid = guid_to_string (xaccAccountGetGUID(t_expense_account1)); //Food @@ -334,15 +329,6 @@ TEST_F(ImapBayesTest, ConvertAccountBayes) auto val1 = new KvpValue(static_cast(10)); auto val2 = new KvpValue(static_cast(5)); auto val3 = new KvpValue(static_cast(2)); - // Test for existing entries, all will be 1 - auto value = root->get_slot({std::string{IMAP_FRAME_BAYES} + "/" + foo + "/" + acct1_guid}); - EXPECT_EQ(1, value->get()); - value = root->get_slot({std::string{IMAP_FRAME_BAYES} + "/" + bar + "/" + acct1_guid}); - EXPECT_EQ(1, value->get()); - value = root->get_slot({std::string{IMAP_FRAME_BAYES} + "/" + baz + "/" + acct2_guid}); - EXPECT_EQ(1, value->get()); - value = root->get_slot({std::string{IMAP_FRAME_BAYES} + "/" + waldo + "/" + acct2_guid}); - EXPECT_EQ(1, value->get()); // Set up some old entries root->set_path({IMAP_FRAME_BAYES, "severely", "divided", "token", "Asset-Bank"}, val1); root->set_path({IMAP_FRAME_BAYES, salt, "Asset-Bank#Bank"}, new KvpValue{*val1}); @@ -350,13 +336,11 @@ TEST_F(ImapBayesTest, ConvertAccountBayes) root->set_path({IMAP_FRAME_BAYES, pork, "Expense#Food"}, new KvpValue{*val2}); root->set_path({IMAP_FRAME_BAYES, sausage, "Expense#Drink"}, val3); root->set_path({IMAP_FRAME_BAYES, foo, "Expense#Food"}, new KvpValue{*val2}); - EXPECT_EQ(1, qof_instance_get_editlevel(QOF_INSTANCE(t_bank_account))); - EXPECT_TRUE(qof_instance_get_dirty_flag(QOF_INSTANCE(t_bank_account))); - qof_instance_mark_clean(QOF_INSTANCE(t_bank_account)); - // Start Convert - gnc_account_imap_convert_bayes (t_imap->book); + root->set_path({IMAP_FRAME_BAYES, salt, acct1_guid}, new KvpValue{*val1}); + /*Calling into the imap functions should trigger a conversion.*/ + gnc_account_imap_add_account_bayes(t_imap, t_list5, t_expense_account2); //pork and sausage; account Food // convert from 'Asset-Bank' to 'Asset-Bank' guid - value = root->get_slot({std::string{IMAP_FRAME_BAYES} + "/severely/divided/token/" + acct3_guid}); + auto value = root->get_slot({std::string{IMAP_FRAME_BAYES} + "/severely/divided/token/" + acct3_guid}); EXPECT_EQ(10, value->get()); // convert from 'Asset-Bank#Bank' to 'Sav Bank' guid value = root->get_slot({std::string{IMAP_FRAME_BAYES} + "/" + salt + "/" + acct4_guid}); @@ -366,14 +350,14 @@ TEST_F(ImapBayesTest, ConvertAccountBayes) EXPECT_EQ(5, value->get()); // convert from 'Expense#Drink' to 'Drink' guid value = root->get_slot({std::string{IMAP_FRAME_BAYES} + "/" + sausage + "/" + acct2_guid}); - EXPECT_EQ(2, value->get()); + /*We put in 2, then called it once to bring it up to 3.*/ + EXPECT_EQ(3, value->get()); // convert from 'Expense#Food' to 'Food' guid but add to original value value = root->get_slot({std::string{IMAP_FRAME_BAYES} + "/" + foo + "/" + acct1_guid}); EXPECT_EQ(5, value->get()); - // Check for run once flag - auto vals = book->get_slot({"changed-bayesian-to-guid"}); - EXPECT_STREQ("true", vals->get()); - EXPECT_EQ(1, qof_instance_get_editlevel(QOF_INSTANCE(t_bank_account))); + // Keep GUID value from original + value = root->get_slot({std::string{IMAP_FRAME_BAYES} + "/" + salt + "/" + acct1_guid}); + EXPECT_EQ(10, value->get()); EXPECT_TRUE(qof_instance_get_dirty_flag(QOF_INSTANCE(t_bank_account))); }