You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
gnucash/lib/goffice/split/number-match.c

1296 lines
29 KiB

/* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
/*
* number-match.c: This file includes the support for matching
* entered strings as numbers (by trying to apply one of the existing
* cell formats).
*
* The idea is simple: we create a regular expression from the format
* string that would match a value entered in that format. Then, on
* lookup we try to match the string against every regular expression
* we have: if a match is found, then we decode the number using a
* precomputed parallel-list of subexpressions.
*
* Author:
* Miguel de Icaza (miguel@gnu.org)
*/
#include <config.h>
#include <glib/gi18n.h>
#include "gnumeric.h"
#include "number-match.h"
#include "dates.h"
#include "numbers.h"
#include "gutils.h"
#include "datetime.h"
#include "style.h"
#include "format.h"
#include "value.h"
#include "mathfunc.h"
#include "str.h"
#include "regutf8.h"
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <locale.h>
#include <math.h>
#undef DEBUG_NUMBER_MATCH
/*
* Takes a list of strings (optionally include an * at the beginning
* that gets stripped, for i18n purposes). and returns a regexp that
* would match them
*/
static char *
create_option_list (char const *const *list)
{
int len = 0;
char const *const *p;
char *res;
for (p = list; *p; p++) {
char const *v = _(*p);
if (*v == '*')
v++;
len += strlen (v) + 1;
}
len += 5;
res = g_malloc (len);
res[0] = '(';
res[1] = 0;
for (p = list; *p; p++) {
char const *v = _(*p);
if (*v == '*')
v++;
strcat (res, v);
if (*(p + 1))
strcat (res, "|");
}
strcat (res, ")");
return res;
}
typedef enum {
MATCH_DAY_FULL = 1,
MATCH_DAY_NUMBER = 2,
MATCH_MONTH_FULL = 3,
MATCH_MONTH_SHORT = 4,
MATCH_MONTH_NUMBER = 5,
MATCH_YEAR_FULL = 6,
MATCH_YEAR_SHORT = 7,
MATCH_HOUR = 8,
MATCH_MINUTE = 9,
MATCH_SECOND = 10,
MATCH_AMPM = 11,
MATCH_NUMBER = 12,
MATCH_NUMBER_DECIMALS = 13,
MATCH_PERCENT = 14,
MATCH_SKIP = 15,
MATCH_STRING_CONSTANT = 16,
MATCH_CUMMULATIVE_HOURS = 17,
MATCH_CUMMULATIVE_MINUTES = 18,
MATCH_CUMMULATIVE_SECONDS = 19,
MATCH_NUMERATOR = 20,
MATCH_DENOMINATOR = 21
} MatchType;
#define append_type(t) do { guint8 x = t; match_types = g_byte_array_append (match_types, &x, 1); } while (0)
/*
* format_create_regexp:
* Create a regular expression for the given XL-style format. Note:
* the format as well as the regexp are UTF-8 encoded.
*/
static char *
format_create_regexp (unsigned char const *format, GByteArray **dest)
{
GString *regexp;
GByteArray *match_types;
char *str;
gboolean hour_seen = FALSE;
gboolean number_seen = FALSE;
gboolean fraction = FALSE;
g_return_val_if_fail (format != NULL, NULL);
#ifdef DEBUG_NUMBER_MATCH
printf ("'%s' = ", format);
#endif
regexp = g_string_new ("^");
match_types = g_byte_array_new ();
for (; *format; format = g_utf8_next_char (format)) {
gunichar c = g_utf8_get_char (format);
switch (c) {
case '*':
/* FIXME: I don't think this will work for '^'. */
if (format[1]) {
format++;
g_string_append_c (regexp, '[');
g_string_append_unichar (regexp, g_utf8_get_char (format));
g_string_append_c (regexp, ']');
g_string_append_c (regexp, '*');
}
break;
case 'P': case 'p':
if (format[1] == 'm' || format[1] == 'M')
format++;
break;
case '\\': {
if (format[1] != '\0')
format++;
gnumeric_regexp_quote1 (regexp, format);
break;
}
case '[' :
/* Currency symbol */
if (format[1] == '$') {
for (format += 2; *format && *format != ']' ; ++format)
g_string_append_c (regexp, *format);
if (*format == ']')
++format;
break;
} else if (format[1] == 'h' && format[2] == ']') {
g_string_append (regexp, "([-+]?[0-9]+)");
append_type (MATCH_CUMMULATIVE_HOURS);
hour_seen = TRUE;
format += 2;
break;
} else if (format[1] == 'm' && format[2] == ']') {
g_string_append (regexp, "([-+]?[0-9]+)");
append_type (hour_seen ? MATCH_MINUTE : MATCH_CUMMULATIVE_MINUTES);
format += 2;
break;
} else if (format[1] == 's' && format[2] == ']') {
g_string_append (regexp, "([-+]?[0-9]+)");
append_type (MATCH_CUMMULATIVE_SECONDS);
format += 2;
break;
}
case '%':
g_string_append (regexp, "%");
append_type (MATCH_PERCENT);
break;
case '#': case '0': case '.': case '+': case '?': {
gboolean include_sep = FALSE;
gboolean include_decimal = FALSE;
while (*format == '#' || *format == '0' || *format == '.' ||
*format == '-' || *format == 'E' || *format == 'e' ||
*format == '+' || *format == '?' || *format == ',') {
switch (*format) {
case ',': include_sep = TRUE; break;
case '.': include_decimal = TRUE; break;
}
format++;
}
format--;
if (format[1] == '/' && number_seen)
append_type (MATCH_NUMERATOR);
else
append_type (MATCH_NUMBER);
if (include_sep) {
/* Not strictly correct.
* There should be a limit of 1-3 digits.
* However, that creates problems when
* There are formats like
* $#,##0.00
* but not
* $###0.00
* as a result $1000 would not be recognized.
*/
g_string_append (regexp, "([-+]?[0-9]+(");
gnumeric_regexp_quote (regexp, format_get_thousand ()->str);
g_string_append (regexp, "[0-9]{3})*)");
append_type (MATCH_SKIP);
} else {
g_string_append (regexp, "([-+]?[0-9]+)");
}
if (include_decimal) {
g_string_append (regexp, "?(");
gnumeric_regexp_quote (regexp, format_get_decimal ()->str);
g_string_append (regexp, "[0-9]+([Ee][-+]?[0-9]+)?)");
append_type (MATCH_NUMBER_DECIMALS);
}
number_seen = TRUE;
break;
}
case 'h':
case 'H':
hour_seen = TRUE;
if (format[1] == 'h' || format[1] == 'H')
format++;
g_string_append (regexp, "([0-9][0-9]?)");
append_type (MATCH_HOUR);
break;
case 'M':
case 'm':
if (hour_seen) {
if (format[1] == 'm' || format[1] == 'M')
format++;
g_string_append (regexp, "([0-9][0-9]?)");
append_type (MATCH_MINUTE);
hour_seen = FALSE;
} else {
if (format[1] == 'm' || format[1] == 'M') {
if (format[2] == 'm' || format[2] == 'M') {
if (format[3] == 'm' || format[3] == 'M') {
char *l;
l = create_option_list (month_long);
g_string_append (regexp, l);
g_free (l);
append_type (MATCH_MONTH_FULL);
format++;
} else {
char *l;
l = create_option_list (month_short);
g_string_append (regexp, l);
g_free (l);
append_type (MATCH_MONTH_SHORT);
}
format++;
} else {
g_string_append (regexp, "([0-9][0-9]?)");
append_type (MATCH_MONTH_NUMBER);
}
format++;
} else {
g_string_append (regexp, "([0-9][0-9]?)");
append_type (MATCH_MONTH_NUMBER);
}
}
break;
case 's':
case 'S':
/* ICK!
* ICK!
* 'm' is ambiguous. It can be months or minutes.
*/
{
int l = match_types->len;
if (l > 0 && match_types->data[l - 1] == MATCH_MONTH_NUMBER)
match_types->data[l - 1] = MATCH_MINUTE;
}
if (format[1] == 's' || format[1] == 'S')
format++;
g_string_append (regexp, "([0-9][0-9]?)");
append_type (MATCH_SECOND);
break;
case 'D':
case 'd':
if (format[1] == 'd' || format[1] == 'D') {
if (format[2] == 'd' || format[2] == 'D') {
if (format[3] == 'd' || format[3] == 'D') {
char *l;
l = create_option_list (day_long);
g_string_append (regexp, l);
g_free (l);
append_type (MATCH_DAY_FULL);
format++;
} else {
char *l;
l = create_option_list (day_short);
g_string_append (regexp, l);
g_free (l);
}
format++;
} else {
g_string_append (regexp, "([0-9][0-9]?)");
append_type (MATCH_DAY_NUMBER);
}
format++;
} else {
g_string_append (regexp, "([0-9][0-9]?)");
append_type (MATCH_DAY_NUMBER);
}
break;
case 'Y':
case 'y':
if (format[1] == 'y' || format[1] == 'Y') {
if (format[2] == 'y' || format[2] == 'Y') {
if (format[3] == 'y' || format[3] == 'Y') {
g_string_append (regexp, "([0-9][0-9][0-9][0-9])");
append_type (MATCH_YEAR_FULL);
format++;
}
format++;
} else {
g_string_append (regexp, "([0-9][0-9]?)");
append_type (MATCH_YEAR_SHORT);
}
format++;
} else {
g_string_append (regexp, "([0-9][0-9]?)");
append_type (MATCH_YEAR_SHORT);
}
break;
case ';':
/* TODO : Is it ok to only match the first entry ?? */
/* FIXME: What is this? */
while (*format)
format = g_utf8_next_char (format);
format = g_utf8_prev_char (format);
break;
case 'A': case 'a':
if (*(format + 1) == 'm' || *(format + 1) == 'M') {
if (*(format + 2) == '/') {
if (*(format + 3) == 'P' || *(format + 3) == 'p') {
if (*(format + 4) == 'm' || *(format + 4) == 'M') {
format++;
}
format++;
}
format++;
}
format++;
}
g_string_append (regexp, "([Aa]|[Pp])[Mm]?");
append_type (MATCH_AMPM);
break;
case '"':
/* Matches a string */
format++;
while (*format != '"') {
if (*format == 0)
goto error;
format = gnumeric_regexp_quote1 (regexp, format);
}
break;
case '@':
g_string_append (regexp, "(.*)");
append_type (MATCH_STRING_CONSTANT);
break;
case '_':
if (format[1]) {
g_string_append (regexp, "[ ]?");
format++;
}
break;
case '/':
g_string_append_c (regexp, '/');
if (number_seen) {
fraction = TRUE;
/* Fraction. Ick. */
if (strncmp (regexp->str, "^([-+]?[0-9]+) ", 15) == 0) {
g_string_erase (regexp, 14, 1);
g_string_insert (regexp, 13, " +|");
/* FIXME: The final regexp won't match a plain digit sequence. */
}
while (format[1] == '?' || g_ascii_isdigit (format[1]))
format++;
g_string_append (regexp, "([0-9]+) *");
append_type (MATCH_DENOMINATOR);
}
break;
#if 0
/* these were here explicitly before adding default.
* Leave them explicit for now as documentation.
*/
/* Default appears fine for this. */
case 0x00a3: /* GBP sign. */
case 0x00a5: /* JPY sign. */
case 0x20ac: /* EUR sign. */
case '^':
case '|':
case ']':
case '$':
case ':':
case '-':
case ' ':
case '(':
case ')':
#endif
default :
gnumeric_regexp_quote1 (regexp, format);
}
}
g_string_append_c (regexp, '$');
str = g_string_free (regexp, FALSE);
*dest = match_types;
#ifdef DEBUG_NUMBER_MATCH
printf ("'%s'\n",str);
#endif
return str;
error:
g_string_free (regexp, TRUE);
g_byte_array_free (match_types, TRUE);
return NULL;
}
static void
print_regex_error (int ret)
{
switch (ret) {
case REG_BADBR:
fprintf (stderr,
"There was an invalid `\\{...\\}' construct in the regular\n"
"expression. A valid `\\{...\\}' construct must contain either a\n"
"single number, or two numbers in increasing order separated by a\n"
"comma.\n");
break;
case REG_BADPAT:
fprintf (stderr,
"There was a syntax error in the regular expression.\n");
break;
case REG_BADRPT:
fprintf (stderr,
"A repetition operator such as `?' or `*' appeared in a bad\n"
"position (with no preceding subexpression to act on).\n");
break;
case REG_ECOLLATE:
fprintf (stderr,
"The regular expression referred to an invalid collating element\n"
"(one not defined in the current locale for string collation).\n");
break;
case REG_ECTYPE:
fprintf (stderr,
"The regular expression referred to an invalid character class name.\n");
break;
#if REG_EESCAPE != REG_BADPAT
case REG_EESCAPE:
fprintf (stderr,
"The regular expression ended with `\\'.\n");
break;
#endif
case REG_ESUBREG:
fprintf (stderr,
"There was an invalid number in the `\\DIGIT' construct.\n");
break;
case REG_EBRACK:
fprintf (stderr,
"There were unbalanced square brackets in the regular expression.\n");
break;
#if REG_EPAREN != REG_BADPAT
case REG_EPAREN:
fprintf (stderr,
"An extended regular expression had unbalanced parentheses, or a\n"
"basic regular expression had unbalanced `\\(' and `\\)'.\n");
break;
#endif
#if REG_EBRACE != REG_BADPAT
case REG_EBRACE:
fprintf (stderr,
"The regular expression had unbalanced `\\{' and `\\}'.\n");
break;
#endif
#ifdef REG_EBOL
case REG_EBOL:
fprintf (stderr, "Found ^ not at the beginning.\n");
break;
#endif
#ifdef REG_EEOL
case REG_EEOL:
fprintf (stderr, "Found $ not at the end.\n");
break;
#endif
case REG_ERANGE:
fprintf (stderr,
"One of the endpoints in a range expression was invalid.\n");
break;
case REG_ESPACE:
fprintf (stderr,
"`regcomp' ran out of memory.\n");
break;
default:
fprintf (stderr, "regexp error %d\n", ret);
}
}
static GSList *format_match_list = NULL;
static GSList *format_dup_match_list = NULL;
static GSList *format_failed_match_list = NULL;
void
format_match_release (GnmFormat *fmt)
{
if (fmt->regexp_str != NULL) {
g_free (fmt->regexp_str);
go_regfree (&fmt->regexp);
g_byte_array_free (fmt->match_tags, TRUE);
}
}
gboolean
format_match_create (GnmFormat *fmt)
{
GByteArray *match_tags;
char *regexp;
go_regex_t r;
int ret;
g_return_val_if_fail (fmt != NULL, FALSE);
g_return_val_if_fail (fmt->regexp_str == NULL, FALSE);
g_return_val_if_fail (fmt->match_tags == NULL, FALSE);
g_return_val_if_fail (strcmp (fmt->format, "General"), FALSE);
regexp = format_create_regexp (fmt->format, &match_tags);
if (!regexp) {
fmt->regexp_str = NULL;
fmt->match_tags = NULL;
return FALSE;
}
ret = go_regcomp (&r, regexp, REG_EXTENDED | REG_ICASE);
if (ret != 0) {
g_warning ("expression [%s] produced [%s]", fmt->format, regexp);
print_regex_error (ret);
g_free (regexp);
return FALSE;
}
fmt->regexp_str = regexp;
fmt->regexp = r;
fmt->match_tags = match_tags;
return TRUE;
}
/*
* value_is_error : Check to see if a string begins with one of the magic
* error strings.
*
* @str : The string to test
*
* returns : an error if there is one, or NULL.
*/
static GnmValue *
value_is_error (char const *str)
{
GnmStdError e;
for (e = (GnmStdError)0; e < GNM_ERROR_UNKNOWN; e++)
if (0 == strcmp (str, value_error_name (e, TRUE)))
return value_new_error_std (NULL, e);
return NULL;
}
/*
* Loads the initial formats that we will recognize
*/
void
format_match_init (void)
{
int i;
GnmFormat *fmt;
GHashTable *hash;
currency_date_format_init ();
hash = g_hash_table_new (g_str_hash, g_str_equal);
for (i = 0; cell_formats[i]; i++) {
char const * const * p = cell_formats[i];
for (; *p; p++) {
/* do not include text formats in the standard set */
if (!strcmp ("@", *p))
continue;
fmt = style_format_new_XL (*p, FALSE);
if (fmt->regexp_str != NULL) {
/* TODO : * We could keep track of the regexps
* that General would match. and avoid putting
* them in the list. */
if (g_hash_table_lookup (hash, fmt->regexp_str) == NULL) {
format_match_list = g_slist_append (format_match_list, fmt);
g_hash_table_insert (hash, fmt->regexp_str, fmt);
} else
format_dup_match_list = g_slist_append (format_dup_match_list, fmt);
} else
format_failed_match_list = g_slist_append (format_failed_match_list, fmt);
}
}
g_hash_table_destroy (hash);
}
void
format_match_finish (void)
{
GSList *l;
for (l = format_match_list; l; l = l->next)
style_format_unref (l->data);
g_slist_free (format_match_list);
for (l = format_dup_match_list; l; l = l->next)
style_format_unref (l->data);
g_slist_free (format_dup_match_list);
for (l = format_failed_match_list; l; l = l->next)
style_format_unref (l->data);
g_slist_free (format_failed_match_list);
currency_date_format_shutdown ();
}
/*
* table_lookup:
*
* Looks the string in the table passed
*/
static int
table_lookup (char const *str, char const *const *table)
{
char const *const *p = table;
int i = 0;
for (p = table; *p; p++, i++) {
char const *v = *p;
char const *iv = _(*p);
if (*v == '*') {
v++;
iv++;
}
if (g_ascii_strcasecmp (str, v) == 0)
return i;
if (g_ascii_strcasecmp (str, iv) == 0)
return i;
}
return -1;
}
/*
* extract_text:
*
* Returns a newly allocated string which is a region from
* STR. The ranges are defined in the regmatch_t variable MP
* in the fields rm_so and rm_eo
*/
static char *
extract_text (char const *str, const regmatch_t *mp)
{
char *p;
p = g_malloc (mp->rm_eo - mp->rm_so + 1);
strncpy (p, &str[mp->rm_so], mp->rm_eo - mp->rm_so);
p[mp->rm_eo - mp->rm_so] = 0;
return p;
}
/*
* Given a number of matches described by MP on S,
* compute the number based on the information on ARRAY
*
* Currently the code cannot mix a MATCH_NUMBER with any
* of the date/time matching.
*/
static GnmValue *
compute_value (char const *s, const regmatch_t *mp,
GByteArray *array, GnmDateConventions const *date_conv)
{
int const len = array->len;
gnm_float number = 0.0;
guchar *data = array->data;
gboolean percentify = FALSE;
gboolean is_number = FALSE;
gboolean is_pm = FALSE;
gboolean is_explicit_am = FALSE;
gboolean is_neg = FALSE;
gboolean hours_are_cummulative = FALSE;
gboolean minutes_are_cummulative = FALSE;
gboolean seconds_are_cummulative = FALSE;
gboolean hours_set = FALSE;
gboolean minutes_set = FALSE;
gboolean seconds_set = FALSE;
int i;
int month, day, year, year_short;
int hours, minutes;
gnm_float seconds;
int numerator = 0, denominator = 1;
GString const *thousands_sep = format_get_thousand ();
GString const *decimal = format_get_decimal ();
month = day = year = year_short = -1;
hours = minutes = -1;
seconds = -1.;
for (i = 0; i < len; ) {
MatchType type = *(data++);
char *str;
str = extract_text (s, &mp[++i]);
#ifdef DEBUG_NUMBER_MATCH
printf ("Item[%d] = \'%s\' is a %d\n", i, str, type);
#endif
switch (type) {
case MATCH_MONTH_FULL:
month = table_lookup (str, month_long);
if (month == -1) {
g_free (str);
return NULL;
}
month++;
break;
case MATCH_MONTH_NUMBER:
month = atoi (str);
break;
case MATCH_MONTH_SHORT:
month = table_lookup (str, month_short);
if (month == -1) {
g_free (str);
return NULL;
}
month++;
break;
case MATCH_DAY_FULL:
/* FIXME: handle weekday */
break;
case MATCH_DAY_NUMBER:
day = atoi (str);
break;
case MATCH_NUMERATOR:
numerator = atoi (str);
break;
case MATCH_DENOMINATOR:
denominator = atoi (str);
if (denominator <= 0)
return NULL;
if (is_neg && numerator < 0)
return NULL;
is_number = TRUE;
if (is_neg)
number -= numerator / (gnm_float)denominator;
else
number += numerator / (gnm_float)denominator;
break;
case MATCH_NUMBER:
if (*str != '\0') {
char *ptr = str;
switch (*ptr) {
case '-':
is_neg = TRUE;
ptr++;
break;
case '+':
ptr++;
/* Fall through. */
default:
is_neg = FALSE;
}
number = 0.;
/* FIXME: this loop is bogus. */
while (1) {
int thisnumber;
if (number > DBL_MAX / 1000.0) {
g_free (str);
return NULL;
}
number *= 1000.0;
errno = 0; /* strtol sets errno, but does not clear it. */
thisnumber = strtoul (ptr, &ptr, 10);
if (errno == ERANGE) {
g_free (str);
return NULL;
}
number += thisnumber;
if (strncmp (ptr, thousands_sep->str, thousands_sep->len) != 0)
break;
ptr += thousands_sep->len;
}
is_number = TRUE;
if (is_neg) number = -number;
}
break;
case MATCH_NUMBER_DECIMALS: {
char *exppart = NULL;
if (strncmp (str, decimal->str, decimal->len) == 0) {
char *end;
errno = 0; /* strtognum sets errno, but does not clear it. */
if (seconds < 0) {
gnm_float fraction;
for (end = str; *end && *end != 'e' && *end != 'E'; )
end++;
if (*end) {
exppart = end + 1;
*end = 0;
}
fraction = strtognum (str, &end);
if (is_neg)
number -= fraction;
else
number += fraction;
is_number = TRUE;
} else
seconds += strtognum (str, &end);
}
if (exppart) {
char *end;
int exponent;
errno = 0; /* strtol sets errno, but does not clear it. */
exponent = strtol (exppart, &end, 10);
number *= gpow10 (exponent);
}
break;
}
case MATCH_CUMMULATIVE_HOURS:
hours_are_cummulative = TRUE;
if (str[0] == '-') is_neg = TRUE;
case MATCH_HOUR:
hours_set = TRUE;
hours = abs (atoi (str));
break;
case MATCH_CUMMULATIVE_MINUTES:
minutes_are_cummulative = TRUE;
if (str[0] == '-') is_neg = TRUE;
case MATCH_MINUTE:
minutes_set = TRUE;
minutes = abs (atoi (str));
break;
case MATCH_CUMMULATIVE_SECONDS :
seconds_are_cummulative = TRUE;
if (str[0] == '-') is_neg = TRUE;
case MATCH_SECOND:
seconds_set = TRUE;
seconds = abs (atoi (str));
break;
case MATCH_PERCENT:
percentify = TRUE;
break;
case MATCH_YEAR_FULL:
year = atoi (str);
break;
case MATCH_YEAR_SHORT:
year_short = atoi (str);
break;
case MATCH_AMPM:
if (*str == 'p' || *str == 'P')
is_pm = TRUE;
else
is_explicit_am = TRUE;
break;
case MATCH_SKIP:
break;
case MATCH_STRING_CONSTANT:
return value_new_string_str (gnm_string_get_nocopy (str));
default :
g_warning ("compute_value: This should not happen.");
break;
}
g_free (str);
}
if (is_number) {
if (percentify)
number *= 0.01;
return value_new_float (number);
}
if (!(year == -1 && month == -1 && day == -1)) {
time_t t = time (NULL);
static time_t lastt;
static struct tm tm;
GDate *date;
if (t != lastt) {
/*
* Since localtime is moderately expensive, do
* at most one call per second. One per day
* would be enough but is harder to check for.
*/
lastt = t;
tm = *localtime (&t);
}
if (year == -1) {
if (year_short != -1) {
/* Window of -75 thru +24 years. */
/* (TODO: See what current
* version of MS Excel uses.) */
/* Earliest allowable interpretation
* is 75 years ago. */
int earliest_ccyy
= tm.tm_year + 1900 - 75;
int earliest_cc = earliest_ccyy / 100;
g_return_val_if_fail (year_short >= 0 &&
year_short <= 99,
NULL);
year = earliest_cc * 100 + year_short;
/*
* Our first guess at year has the same
* cc part as EARLIEST_CCYY, so is
* guaranteed to be in [earliest_ccyy -
* 99, earliest_ccyy + 99]. The yy
* part is of course year_short.
*/
if (year < earliest_ccyy)
year += 100;
/*
* year is now guaranteed to be in
* [earliest_ccyy, earliest_ccyy + 99],
* i.e. -75 thru +24 years from current
* year; and year % 100 == short_year.
*/
} else if (month != -1) {
/* Window of -6 thru +5 months. */
/* (TODO: See what current
* version of MS Excel uses.) */
int earliest_yyymm
= (tm.tm_year * 12 +
tm.tm_mon - 6);
year = earliest_yyymm / 12;
/* First estimate of yyy (i.e. years
* since 1900) is the yyy part of
* earliest_yyymm. year*12+month-1 is
* guaranteed to be in [earliest_yyymm
* - 11, earliest_yyymm + 11].
*/
year += (year * 12 + month <=
earliest_yyymm);
/* year*12+month-1 is now guaranteed
* to be in [earliest_yyymm,
* earliest_yyymm + 11], i.e.
* representing -6 thru +5 months
* from now.
*/
year += 1900;
/* Finally convert from years since
* 1900 (yyy) to a proper 4-digit
* year.
*/
} else
year = 1900 + tm.tm_year;
}
if (month == -1)
month = tm.tm_mon + 1;
if (day == -1)
day = tm.tm_mday;
if (year < 1900 || !g_date_valid_dmy (day, month, year))
return NULL;
date = g_date_new_dmy (day, month, year);
number = datetime_g_to_serial (date, date_conv);
g_date_free (date);
}
if (!seconds_set && !minutes_set && !hours_set)
return value_new_int (number);
if (!seconds_set)
seconds = 0;
if (!minutes_set)
minutes = 0;
if (!hours_set)
hours = 0;
if (!hours_are_cummulative) {
if (is_pm) {
if (hours < 12)
hours += 12;
} else if (is_explicit_am && hours == 12)
hours = 0;
}
if ((hours < 0 || hours > 23) && !hours_are_cummulative)
return NULL;
if ((minutes < 0 || minutes > 59) && !minutes_are_cummulative)
return NULL;
if ((seconds < 0 || seconds > 59) && !seconds_are_cummulative)
return NULL;
if (hours == 0 && minutes == 0 && seconds == 0)
return value_new_int (number);
number += (hours * 3600 + minutes * 60 + seconds) / (3600*24.0);
if (is_neg) number = -number;
return value_new_float (number);
}
/**
* format_match_simple :
* @s : A String to match against.
*
* Attempt to match the the supplied string as a simple value.
*
* WARNING WARNING WARNING : This routine should NEVER be changed to match
* VALUE_STRING that will break the parsers
* handling of named expressions.
*/
GnmValue *
format_match_simple (char const *text)
{
/* Is it a boolean? */
if (0 == g_ascii_strcasecmp (text, format_boolean (TRUE)))
return value_new_bool (TRUE);
if (0 == g_ascii_strcasecmp (text, format_boolean (FALSE)))
return value_new_bool (FALSE);
/* Is it an error? */
if (*text == '#') {
GnmValue *err = value_is_error (text);
if (err != NULL)
return err;
}
/* Is it an integer? */
{
char *end;
long l;
errno = 0; /* strtol sets errno, but does not clear it. */
l = strtol (text, &end, 10);
if (text != end && errno != ERANGE && l == (int)l) {
/* Allow and ignore spaces at the end. */
while (*end == ' ')
end++;
if (*end == '\0')
return value_new_int ((int)l);
}
}
/* Is it a double? */
{
char *end;
gnm_float d;
errno = 0; /* strtognum sets errno, but does not clear it. */
d = strtognum (text, &end);
if (text != end && errno != ERANGE) {
/* Allow and ignore spaces at the end. */
while (*end == ' ')
end++;
if (*end == '\0')
return value_new_float (d);
}
}
return NULL;
}
#define NM 40
/**
* format_match :
*
* @text : The text to parse
* @cur_fmt : The current format for the value (potentially NULL)
* @date_conv: optional date convention
*
* Attempts to parse the supplied string to see if it matches a known value
* format. The caller is responsible for releasing the resulting value.
**/
GnmValue *
format_match (char const *text, GnmFormat *cur_fmt,
GnmDateConventions const *date_conv)
{
GnmValue *v;
GSList *l;
regmatch_t mp[NM + 1];
if (text[0] == '\0')
return value_new_empty ();
/* If it begins with a '\'' it is a string */
if (text[0] == '\'')
return value_new_string (text + 1);
if (cur_fmt) {
switch (cur_fmt->family) {
case FMT_TEXT:
return value_new_string (text);
default:
if (cur_fmt->regexp_str != NULL &&
go_regexec (&cur_fmt->regexp, text, NM, mp, 0) != REG_NOMATCH &&
NULL != (v = compute_value (text, mp, cur_fmt->match_tags,
date_conv))) {
#ifdef DEBUG_NUMBER_MATCH
int i;
g_print ("matches expression: %s %s\n", cur_fmt->format, cur_fmt->regexp_str);
for (i = 0; i < NM; i++) {
char *p;
if (mp[i].rm_so == -1)
break;
p = extract_text (text, &mp[i]);
g_print ("%d %d->%s\n", mp[i].rm_so, mp[i].rm_eo, p);
}
#endif
value_set_fmt (v, cur_fmt);
return v;
} else {
#ifdef DEBUG_NUMBER_MATCH
g_print ("does not match expression: %s %s\n",
cur_fmt->format,
cur_fmt->regexp_str ? cur_fmt->regexp_str : "(null)");
#endif
}
}
}
/* Check basic types */
v = format_match_simple (text);
if (v != NULL)
return v;
/* Fall back to checking the set of canned formats */
for (l = format_match_list; l; l = l->next) {
GnmFormat *fmt = l->data;
#ifdef DEBUG_NUMBER_MATCH
printf ("test: %s \'%s\'\n", fmt->format, fmt->regexp_str);
#endif
if (go_regexec (&fmt->regexp, text, NM, mp, 0) == REG_NOMATCH)
continue;
#ifdef DEBUG_NUMBER_MATCH
{
int i;
printf ("matches expression: %s %s\n", fmt->format, fmt->regexp_str);
for (i = 0; i < NM; i++) {
char *p;
if (mp[i].rm_so == -1)
break;
p = extract_text (text, &mp[i]);
printf ("%d %d->%s\n", mp[i].rm_so, mp[i].rm_eo, p);
}
}
#endif
v = compute_value (text, mp, fmt->match_tags, date_conv);
#ifdef DEBUG_NUMBER_MATCH
if (v) {
printf ("value = ");
value_dump (v);
} else
printf ("unable to compute value\n");
#endif
if (v != NULL) {
value_set_fmt (v, fmt);
return v;
}
}
return NULL;
}
/**
* format_match_number :
*
* @text : The text to parse
* @cur_fmt : The current format for the value (potentially NULL)
* @date_conv: optional date convention
*
* Attempts to parse the supplied string to see if it matches a known value format.
* Will eventually use the current cell format in preference to canned formats.
* If @format is supplied it will get a copy of the matching format with no
* additional references. The caller is responsible for releasing the
* resulting value. Will ONLY return numbers.
*/
GnmValue *
format_match_number (char const *text, GnmFormat *cur_fmt,
GnmDateConventions const *date_conv)
{
GnmValue *res = format_match (text, cur_fmt, date_conv);
if (res != NULL) {
if (VALUE_IS_NUMBER (res))
return res;
value_release (res);
}
return NULL;
}