You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
gnucash/libgnucash/core-utils/gnc-unicode.h

145 lines
6.6 KiB

/********************************************************************
* gnc-icu-locale.h -- Localization with ICU. *
* *
* Copyright (C) 2025 John Ralls <jralls@ceridwen.us *
* *
* This program is free software; you can redistribute it and/or *
* modify it under the terms of the GNU General Public License as *
* published by the Free Software Foundation; either version 2 of *
* the License, or (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License*
* along with this program; if not, contact: *
* *
* Free Software Foundation Voice: +1-617-542-5942 *
* 51 Franklin Street, Fifth Floor Fax: +1-617-542-2652 *
* Boston, MA 02110-1301, USA gnu@gnu.org *
********************************************************************/
#pragma once
#include <stdbool.h>
/** @addtogroup Localization These functions perform string comparison
and collation according to the Unicode Common Locale Data
Repository rules. The CLDR specifies five levels of
comparison.
- The primary or base level considers all variant codepoints
representing a character to be equivalent regardless of case or
decorations like accents and vowel or tone marks.
- The secondary level differentiates between letters with
decorations but still ignores case.
- The tertiary level differentiates based on case, decorations,
and variants, for example A and Ⓐ.
- The Quaternary level differentiates words with punctuation, for
example "ab" and "a-b".
- Identical differentiates all codepoints with no implicit
normalization so a character constructed with combining marks
will compare different from the same character represented as a
single codepoint.
*/
#ifdef __cplusplus
extern "C"
{
#endif
/** Find the first Unicode-equivalent UTF-8-encoded substring in a
* UTF-8-encoded string comparing characters at the CLDR primary
* level, setting the starting position and length of the matching
* part of the string.
* @param needle The substring to search for
* @param haystack the string to search in
* @param output the position of needle in haystack
* @param output the length of the match
* @return true if needle is found in haystack
*/
bool gnc_unicode_has_substring_base_chars(const char* needle, const
char* haystack, int* position,
int* length);
/** Find the first Unicode-equivalent UTF-8-encoded substring in a
* UTF-8-encoded string comparing characters at the CLDR secondary
* level, setting the starting position and length of the matching
* part of the string.
* @param needle The substring to search for
* @param haystack the string to search in
* @param output the position of needle in haystack
* @param output the length of the match
* @return true if needle is found in haystack
*/
bool gnc_unicode_has_substring_accented_chars(const char* needle, const
char* haystack, int* position,
int* length);
/** Find the first Unicode-equivalent UTF-8-encoded substring in a
* UTF-8-encoded string comparing characters at the CLDR tertiary
* level, setting the starting position and length of the matching
* part of the string.
* @param needle The substring to search for
* @param haystack the string to search in
* @param output the position of needle in haystack
* @param output the length of the match
* @return true if needle is found in haystack
*/
bool gnc_unicode_has_substring_accented_case_sensitive(const char* needle, const
char* haystack, int* position,
int* length);
/** Find the first Unicode-equivalent UTF-8-encoded substring in a
* UTF-8-encoded string comparing characters at the CLDR identical
* level, setting the starting position and length of the matching
* part of the string.
* @param needle The substring to search for
* @param haystack the string to search in
* @param output the position of needle in haystack
* @param output the length of the match
* @return true if needle is found in haystack
*/
bool gnc_unicode_has_substring_identical(const char* needle, const
char* haystack, int* position,
int* length);
/** Compare two UTF-8 encoded strings for equivalence at the CLDR
* primary level in the current locale. Errors are logged to
* gnc.locale.
* @param one a string
* @param two another string
* @return 0 if one and two are equivalent, -1 if one is less that
* two, 1 if one is greater than two, -99 on error.
*/
int gnc_unicode_compare_base_chars(const char* one, const char* two);
/** Compare two UTF-8 encoded strings for equivalence at the CLDR
* secondary level in the current locale. Errors are logged to
* gnc.locale.
* @param one a string
* @param two another string
* @return 0 if one and two are equivalent, -1 if one is less that
* two, 1 if one is greater than two, -99 on error.
*/
int gnc_unicode_compare_accented_chars(const char* one, const char* two);
/** Compare two UTF-8 encoded strings for equivalence at the CLDR
* tertiary level in the current locale. Errors are logged to
* gnc.locale.
* @param one a string
* @param two another string
* @return 0 if one and two are equivalent, -1 if one is less that
* two, 1 if one is greater than two, -99 on error.
*/
int gnc_unicode_compare_accented_case_sensitive(const char* one, const char* two);
/** Compare two UTF-8 encoded strings for equivalence at the CLDR
* identical level in the current locale.
* @param one a string
* @param two another string
* @return 0 if one and two are equivalent, -1 if one is less that
* two, 1 if one is greater than two, -99 on error.
*/
int gnc_unicode_compare_identical(const char* one, const char* two);
#ifdef __cplusplus
}
#endif