mirror of https://github.com/Gnucash/gnucash
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
546 lines
12 KiB
546 lines
12 KiB
#include <goffice/goffice-config.h>
|
|
#include "go-format-match.h"
|
|
#include "go-format.h"
|
|
#include "format-impl.h"
|
|
#include "regutf8.h"
|
|
|
|
#include <glib/gi18n.h>
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
|
|
#define append_type(t) do { guint8 x = t; match_types = g_byte_array_append (match_types, &x, 1); } while (0)
|
|
|
|
static void
|
|
print_regex_error (int ret)
|
|
{
|
|
switch (ret) {
|
|
case REG_BADBR:
|
|
fprintf (stderr,
|
|
"There was an invalid `\\{...\\}' construct in the regular\n"
|
|
"expression. A valid `\\{...\\}' construct must contain either a\n"
|
|
"single number, or two numbers in increasing order separated by a\n"
|
|
"comma.\n");
|
|
break;
|
|
|
|
case REG_BADPAT:
|
|
fprintf (stderr,
|
|
"There was a syntax error in the regular expression.\n");
|
|
break;
|
|
|
|
case REG_BADRPT:
|
|
fprintf (stderr,
|
|
"A repetition operator such as `?' or `*' appeared in a bad\n"
|
|
"position (with no preceding subexpression to act on).\n");
|
|
break;
|
|
|
|
case REG_ECOLLATE:
|
|
fprintf (stderr,
|
|
"The regular expression referred to an invalid collating element\n"
|
|
"(one not defined in the current locale for string collation).\n");
|
|
break;
|
|
|
|
case REG_ECTYPE:
|
|
fprintf (stderr,
|
|
"The regular expression referred to an invalid character class name.\n");
|
|
break;
|
|
|
|
#if REG_EESCAPE != REG_BADPAT
|
|
case REG_EESCAPE:
|
|
fprintf (stderr,
|
|
"The regular expression ended with `\\'.\n");
|
|
break;
|
|
#endif
|
|
|
|
case REG_ESUBREG:
|
|
fprintf (stderr,
|
|
"There was an invalid number in the `\\DIGIT' construct.\n");
|
|
break;
|
|
|
|
case REG_EBRACK:
|
|
fprintf (stderr,
|
|
"There were unbalanced square brackets in the regular expression.\n");
|
|
break;
|
|
|
|
#if REG_EPAREN != REG_BADPAT
|
|
case REG_EPAREN:
|
|
fprintf (stderr,
|
|
"An extended regular expression had unbalanced parentheses, or a\n"
|
|
"basic regular expression had unbalanced `\\(' and `\\)'.\n");
|
|
break;
|
|
#endif
|
|
|
|
#if REG_EBRACE != REG_BADPAT
|
|
case REG_EBRACE:
|
|
fprintf (stderr,
|
|
"The regular expression had unbalanced `\\{' and `\\}'.\n");
|
|
break;
|
|
#endif
|
|
|
|
#ifdef REG_EBOL
|
|
case REG_EBOL:
|
|
fprintf (stderr, "Found ^ not at the beginning.\n");
|
|
break;
|
|
#endif
|
|
|
|
#ifdef REG_EEOL
|
|
case REG_EEOL:
|
|
fprintf (stderr, "Found $ not at the end.\n");
|
|
break;
|
|
#endif
|
|
|
|
case REG_ERANGE:
|
|
fprintf (stderr,
|
|
"One of the endpoints in a range expression was invalid.\n");
|
|
break;
|
|
|
|
case REG_ESPACE:
|
|
fprintf (stderr,
|
|
"`regcomp' ran out of memory.\n");
|
|
break;
|
|
|
|
default:
|
|
fprintf (stderr, "regexp error %d\n", ret);
|
|
}
|
|
}
|
|
|
|
/* Takes a list of strings (optionally include an * at the beginning
|
|
* that gets stripped, for i18n purposes). and returns a regexp that
|
|
* would match them */
|
|
static char *
|
|
create_option_list (char const *const *list)
|
|
{
|
|
int len = 0;
|
|
char const *const *p;
|
|
char *res;
|
|
|
|
for (p = list; *p; p++) {
|
|
char const *v = _(*p);
|
|
|
|
if (*v == '*')
|
|
v++;
|
|
len += strlen (v) + 1;
|
|
}
|
|
len += 5;
|
|
|
|
res = g_malloc (len);
|
|
res[0] = '(';
|
|
res[1] = 0;
|
|
for (p = list; *p; p++) {
|
|
char const *v = _(*p);
|
|
|
|
if (*v == '*')
|
|
v++;
|
|
|
|
strcat (res, v);
|
|
if (*(p + 1))
|
|
strcat (res, "|");
|
|
}
|
|
strcat (res, ")");
|
|
|
|
return res;
|
|
}
|
|
|
|
/* Create a regular expression for the given XL-style format.
|
|
* NOTE: the format as well as the regexp are UTF-8 encoded. */
|
|
static char *
|
|
format_create_regexp (gchar const *format, GByteArray **dest)
|
|
{
|
|
GString *regexp;
|
|
GByteArray *match_types;
|
|
char *str;
|
|
gboolean hour_seen = FALSE;
|
|
gboolean number_seen = FALSE;
|
|
gboolean fraction = FALSE;
|
|
|
|
g_return_val_if_fail (format != NULL, NULL);
|
|
|
|
#ifdef DEBUG_NUMBER_MATCH
|
|
printf ("'%s' = ", format);
|
|
#endif
|
|
regexp = g_string_new ("^");
|
|
match_types = g_byte_array_new ();
|
|
|
|
for (; *format; format = g_utf8_next_char (format)) {
|
|
gunichar c = g_utf8_get_char (format);
|
|
switch (c) {
|
|
case '*':
|
|
/* FIXME: I don't think this will work for '^'. */
|
|
if (format[1]) {
|
|
format++;
|
|
g_string_append_c (regexp, '[');
|
|
g_string_append_unichar (regexp, g_utf8_get_char (format));
|
|
g_string_append_c (regexp, ']');
|
|
g_string_append_c (regexp, '*');
|
|
}
|
|
break;
|
|
|
|
case 'P': case 'p':
|
|
if (format[1] == 'm' || format[1] == 'M')
|
|
format++;
|
|
break;
|
|
|
|
case '\\': {
|
|
if (format[1] != '\0')
|
|
format++;
|
|
go_regexp_quote1 (regexp, format);
|
|
break;
|
|
}
|
|
|
|
case '[' :
|
|
/* Currency symbol */
|
|
if (format[1] == '$') {
|
|
for (format += 2; *format && *format != ']' ; ++format)
|
|
g_string_append_c (regexp, *format);
|
|
if (*format != ']')
|
|
format--;
|
|
break;
|
|
} else if (format[1] == 'h' && format[2] == ']') {
|
|
g_string_append (regexp, "([-+]?[0-9]+)");
|
|
append_type (MATCH_CUMMULATIVE_HOURS);
|
|
hour_seen = TRUE;
|
|
format += 2;
|
|
break;
|
|
} else if (format[1] == 'm' && format[2] == ']') {
|
|
g_string_append (regexp, "([-+]?[0-9]+)");
|
|
append_type (hour_seen ? MATCH_MINUTE : MATCH_CUMMULATIVE_MINUTES);
|
|
format += 2;
|
|
break;
|
|
} else if (format[1] == 's' && format[2] == ']') {
|
|
g_string_append (regexp, "([-+]?[0-9]+)");
|
|
append_type (MATCH_CUMMULATIVE_SECONDS);
|
|
format += 2;
|
|
break;
|
|
}
|
|
|
|
case '%':
|
|
g_string_append (regexp, "%");
|
|
append_type (MATCH_PERCENT);
|
|
break;
|
|
|
|
case '#': case '0': case '.': case '+': case '?': {
|
|
gboolean include_sep = FALSE;
|
|
gboolean include_decimal = FALSE;
|
|
|
|
while (*format == '#' || *format == '0' || *format == '.' ||
|
|
*format == '-' || *format == 'E' || *format == 'e' ||
|
|
*format == '+' || *format == '?' || *format == ',') {
|
|
switch (*format) {
|
|
case ',': include_sep = TRUE; break;
|
|
case '.': include_decimal = TRUE; break;
|
|
}
|
|
format++;
|
|
}
|
|
format--;
|
|
|
|
if (format[1] == '/' && number_seen)
|
|
append_type (MATCH_NUMERATOR);
|
|
else
|
|
append_type (MATCH_NUMBER);
|
|
|
|
if (include_sep) {
|
|
/* Not strictly correct.
|
|
* There should be a limit of 1-3 digits.
|
|
* However, that creates problems when
|
|
* There are formats like
|
|
* $#,##0.00
|
|
* but not
|
|
* $###0.00
|
|
* as a result $1000 would not be recognized.
|
|
*/
|
|
g_string_append (regexp, "([-+]?[0-9]+(");
|
|
go_regexp_quote (regexp, format_get_thousand ()->str);
|
|
g_string_append (regexp, "[0-9]{3})*)");
|
|
append_type (MATCH_SKIP);
|
|
} else {
|
|
g_string_append (regexp, "([-+]?[0-9]+)");
|
|
}
|
|
|
|
if (include_decimal) {
|
|
g_string_append (regexp, "?(");
|
|
go_regexp_quote (regexp, format_get_decimal ()->str);
|
|
g_string_append (regexp, "[0-9]+([Ee][-+]?[0-9]+)?)");
|
|
append_type (MATCH_NUMBER_DECIMALS);
|
|
}
|
|
|
|
number_seen = TRUE;
|
|
break;
|
|
}
|
|
|
|
case 'h':
|
|
case 'H':
|
|
hour_seen = TRUE;
|
|
if (format[1] == 'h' || format[1] == 'H')
|
|
format++;
|
|
|
|
g_string_append (regexp, "([0-9][0-9]?)");
|
|
append_type (MATCH_HOUR);
|
|
break;
|
|
|
|
case 'M':
|
|
case 'm':
|
|
if (hour_seen) {
|
|
if (format[1] == 'm' || format[1] == 'M')
|
|
format++;
|
|
g_string_append (regexp, "([0-9][0-9]?)");
|
|
append_type (MATCH_MINUTE);
|
|
hour_seen = FALSE;
|
|
} else {
|
|
if (format[1] == 'm' || format[1] == 'M') {
|
|
if (format[2] == 'm' || format[2] == 'M') {
|
|
if (format[3] == 'm' || format[3] == 'M') {
|
|
char *l;
|
|
|
|
l = create_option_list (month_long);
|
|
g_string_append (regexp, l);
|
|
g_free (l);
|
|
|
|
append_type (MATCH_MONTH_FULL);
|
|
format++;
|
|
} else {
|
|
char *l;
|
|
|
|
l = create_option_list (month_short);
|
|
g_string_append (regexp, l);
|
|
g_free (l);
|
|
|
|
append_type (MATCH_MONTH_SHORT);
|
|
}
|
|
format++;
|
|
} else {
|
|
g_string_append (regexp, "([0-9][0-9]?)");
|
|
append_type (MATCH_MONTH_NUMBER);
|
|
}
|
|
format++;
|
|
} else {
|
|
g_string_append (regexp, "([0-9][0-9]?)");
|
|
append_type (MATCH_MONTH_NUMBER);
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 's':
|
|
case 'S':
|
|
/* ICK!
|
|
* ICK!
|
|
* 'm' is ambiguous. It can be months or minutes.
|
|
*/
|
|
{
|
|
int l = match_types->len;
|
|
if (l > 0 && match_types->data[l - 1] == MATCH_MONTH_NUMBER)
|
|
match_types->data[l - 1] = MATCH_MINUTE;
|
|
}
|
|
|
|
if (format[1] == 's' || format[1] == 'S')
|
|
format++;
|
|
g_string_append (regexp, "([0-9][0-9]?)");
|
|
append_type (MATCH_SECOND);
|
|
break;
|
|
|
|
case 'D':
|
|
case 'd':
|
|
if (format[1] == 'd' || format[1] == 'D') {
|
|
if (format[2] == 'd' || format[2] == 'D') {
|
|
if (format[3] == 'd' || format[3] == 'D') {
|
|
char *l;
|
|
|
|
l = create_option_list (day_long);
|
|
g_string_append (regexp, l);
|
|
g_free (l);
|
|
|
|
append_type (MATCH_DAY_FULL);
|
|
format++;
|
|
} else {
|
|
char *l;
|
|
|
|
l = create_option_list (day_short);
|
|
g_string_append (regexp, l);
|
|
g_free (l);
|
|
}
|
|
format++;
|
|
} else {
|
|
g_string_append (regexp, "([0-9][0-9]?)");
|
|
append_type (MATCH_DAY_NUMBER);
|
|
}
|
|
format++;
|
|
} else {
|
|
g_string_append (regexp, "([0-9][0-9]?)");
|
|
append_type (MATCH_DAY_NUMBER);
|
|
}
|
|
break;
|
|
|
|
case 'Y':
|
|
case 'y':
|
|
if (format[1] == 'y' || format[1] == 'Y') {
|
|
if (format[2] == 'y' || format[2] == 'Y') {
|
|
if (format[3] == 'y' || format[3] == 'Y') {
|
|
g_string_append (regexp, "([0-9][0-9][0-9][0-9])");
|
|
append_type (MATCH_YEAR_FULL);
|
|
format++;
|
|
}
|
|
format++;
|
|
} else {
|
|
g_string_append (regexp, "([0-9][0-9]?)");
|
|
append_type (MATCH_YEAR_SHORT);
|
|
}
|
|
format++;
|
|
} else {
|
|
g_string_append (regexp, "([0-9][0-9]?)");
|
|
append_type (MATCH_YEAR_SHORT);
|
|
}
|
|
break;
|
|
|
|
case ';':
|
|
/* TODO : Is it ok to only match the first entry ?? */
|
|
/* FIXME: What is this? */
|
|
while (*format)
|
|
format = g_utf8_next_char (format);
|
|
format = g_utf8_prev_char (format);
|
|
break;
|
|
|
|
case 'A': case 'a':
|
|
if (*(format + 1) == 'm' || *(format + 1) == 'M') {
|
|
if (*(format + 2) == '/') {
|
|
if (*(format + 3) == 'P' || *(format + 3) == 'p') {
|
|
if (*(format + 4) == 'm' || *(format + 4) == 'M') {
|
|
format++;
|
|
}
|
|
format++;
|
|
}
|
|
format++;
|
|
}
|
|
format++;
|
|
}
|
|
g_string_append (regexp, "([Aa]|[Pp])[Mm]?");
|
|
append_type (MATCH_AMPM);
|
|
break;
|
|
|
|
case '"':
|
|
/* Matches a string */
|
|
format++;
|
|
while (*format != '"') {
|
|
if (*format == 0)
|
|
goto error;
|
|
format = go_regexp_quote1 (regexp, format);
|
|
}
|
|
break;
|
|
|
|
case '@':
|
|
g_string_append (regexp, "(.*)");
|
|
append_type (MATCH_STRING_CONSTANT);
|
|
break;
|
|
|
|
case '_':
|
|
if (format[1]) {
|
|
g_string_append (regexp, "[ ]?");
|
|
format++;
|
|
}
|
|
break;
|
|
|
|
case '/':
|
|
g_string_append_c (regexp, '/');
|
|
if (number_seen) {
|
|
fraction = TRUE;
|
|
/* Fraction. Ick. */
|
|
if (strncmp (regexp->str, "^([-+]?[0-9]+) ", 15) == 0) {
|
|
g_string_erase (regexp, 14, 1);
|
|
g_string_insert (regexp, 13, " +|");
|
|
/* FIXME: The final regexp won't match a plain digit sequence. */
|
|
}
|
|
|
|
g_string_append_c (regexp, '(');
|
|
while (format[1] == '?' || g_ascii_isdigit (format[1])) {
|
|
format++;
|
|
g_string_append (regexp, "[0-9]");
|
|
}
|
|
|
|
g_string_append (regexp, ") *");
|
|
append_type (MATCH_DENOMINATOR);
|
|
}
|
|
break;
|
|
|
|
#if 0
|
|
/* these were here explicitly before adding default.
|
|
* Leave them explicit for now as documentation.
|
|
*/
|
|
/* Default appears fine for this. */
|
|
case 0x00a3: /* GBP sign. */
|
|
case 0x00a5: /* JPY sign. */
|
|
case 0x20ac: /* EUR sign. */
|
|
case '^':
|
|
case '|':
|
|
case ']':
|
|
case '$':
|
|
case ':':
|
|
case '-':
|
|
case ' ':
|
|
case '(':
|
|
case ')':
|
|
|
|
#endif
|
|
default :
|
|
go_regexp_quote1 (regexp, format);
|
|
}
|
|
}
|
|
|
|
g_string_append_c (regexp, '$');
|
|
|
|
str = g_string_free (regexp, FALSE);
|
|
*dest = match_types;
|
|
|
|
#ifdef DEBUG_NUMBER_MATCH
|
|
printf ("'%s'\n",str);
|
|
#endif
|
|
return str;
|
|
|
|
error:
|
|
g_string_free (regexp, TRUE);
|
|
g_byte_array_free (match_types, TRUE);
|
|
return NULL;
|
|
}
|
|
|
|
gboolean
|
|
format_match_create (GOFormatElement *fmt)
|
|
{
|
|
GByteArray *match_tags;
|
|
char *regexp;
|
|
GORegexp r;
|
|
int ret;
|
|
|
|
g_return_val_if_fail (fmt != NULL, FALSE);
|
|
g_return_val_if_fail (fmt->regexp_str == NULL, FALSE);
|
|
g_return_val_if_fail (fmt->match_tags == NULL, FALSE);
|
|
g_return_val_if_fail (strcmp (fmt->format, "General"), FALSE);
|
|
|
|
regexp = format_create_regexp (fmt->format, &match_tags);
|
|
if (!regexp) {
|
|
fmt->regexp_str = NULL;
|
|
fmt->match_tags = NULL;
|
|
return FALSE;
|
|
}
|
|
|
|
ret = go_regcomp (&r, regexp, REG_EXTENDED | REG_ICASE);
|
|
if (ret != 0) {
|
|
g_warning ("expression [%s] produced [%s]", fmt->format, regexp);
|
|
print_regex_error (ret);
|
|
g_free (regexp);
|
|
return FALSE;
|
|
}
|
|
|
|
fmt->regexp_str = regexp;
|
|
fmt->regexp = r;
|
|
fmt->match_tags = match_tags;
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
void
|
|
format_match_release (GOFormatElement *fmt)
|
|
{
|
|
if (fmt->regexp_str != NULL) {
|
|
g_free (fmt->regexp_str);
|
|
go_regfree (&fmt->regexp);
|
|
g_byte_array_free (fmt->match_tags, TRUE);
|
|
}
|
|
}
|
|
|