From 682b5cf581895a5da3296cfbfc24acc9ece0185e Mon Sep 17 00:00:00 2001 From: Geert Janssens Date: Sat, 5 May 2018 12:42:17 +0200 Subject: [PATCH] Bug 795666 - Backslash "\" in Description field spoils CSV Import without helpful error message We've configure boost::tokenizer to take the backslash as the escape character However boost::tokenizer will throw if it encounters a sole backslash that's not an escape (it would expect two if a pure backslash is to be inserted). Avoid this by replacing lone backslashes (not part of escapes) with double backslashes before passing control to the tokenizer. --- gnucash/import-export/csv-imp/gnc-tokenizer-csv.cpp | 13 +++++++++++++ .../import-export/csv-imp/test/test-tokenizer.cpp | 13 ++----------- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/gnucash/import-export/csv-imp/gnc-tokenizer-csv.cpp b/gnucash/import-export/csv-imp/gnc-tokenizer-csv.cpp index 402900a791..b5ad206feb 100644 --- a/gnucash/import-export/csv-imp/gnc-tokenizer-csv.cpp +++ b/gnucash/import-export/csv-imp/gnc-tokenizer-csv.cpp @@ -63,6 +63,19 @@ int GncCsvTokenizer::tokenize() } // --- + // Deal with backslashes that are not meant to be escapes + // The boost::tokenizer with escaped_list_separator as we use + // it would choke on this. + auto bs_pos = line.find ('\\'); + while (bs_pos != std::string::npos) + { + if ((bs_pos == line.size()) || // got trailing single backslash + (line.find_first_of ("\"\\n", bs_pos + 1) != bs_pos + 1)) // backslash is not part of known escapes \\, \" or \n + line = line.substr(0, bs_pos) + "\\\\" + line.substr(bs_pos + 1); + bs_pos += 2; + bs_pos = line.find ('\\', bs_pos); + } + Tokenizer tok(line, sep); vec.assign(tok.begin(),tok.end()); m_tokenized_contents.push_back(vec); diff --git a/gnucash/import-export/csv-imp/test/test-tokenizer.cpp b/gnucash/import-export/csv-imp/test/test-tokenizer.cpp index c97902a9e7..fdbd1e10d4 100644 --- a/gnucash/import-export/csv-imp/test/test-tokenizer.cpp +++ b/gnucash/import-export/csv-imp/test/test-tokenizer.cpp @@ -138,17 +138,6 @@ TEST_F (GncTokenizerTest, tokenize_from_csv_file) * independently. */ -/* First test whether we're properly catching boost::tokenizer throws - * This happens when the input data has invalid escape sequences */ -TEST_F (GncTokenizerTest, tokenize_binary_data) -{ - GncCsvTokenizer *csvtok = dynamic_cast(csv_tok.get()); - csvtok->set_separators (","); - - set_utf8_contents (csv_tok, R"(\764Test,Something)"); - EXPECT_THROW (csv_tok->tokenize(), std::range_error); -} - /* This helper function will run the parse step on the given data * with the parser as configured by the calling test function. * This allows the same code to be used with different csv test strings @@ -185,6 +174,8 @@ static tokenize_csv_test_data comma_separated [] = { { "Date,Num,Description,Notes,Account,Deposit,Withdrawal,Balance", 8, { "Date","Num","Description","Notes","Account","Deposit","Withdrawal","Balance" } }, { "05/01/15,45,Acme Inc.,,Miscellaneous,,\"1,100.00\",", 8, { "05/01/15","45","Acme Inc.","","Miscellaneous","","1,100.00","" } }, { "05/01/15,45,Acme Inc.,,Miscellaneous,", 6, { "05/01/15","45","Acme Inc.","","Miscellaneous","",NULL,NULL } }, + { "Test\\ with backslash,nextfield", 2, { "Test\\ with backslash","nextfield",NULL,NULL,NULL,NULL,NULL,NULL } }, + { "Test with \\\" escaped quote,nextfield", 2, { "Test with \" escaped quote","nextfield",NULL,NULL,NULL,NULL,NULL,NULL } }, { NULL, 0, { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL } }, };