Next: , Previous: , Up: regexps   [Contents][Index]


3.4 Inspecting named capturing groups

Function: int cre2_num_capturing_groups (const cre2_regexp_t * rex)

If rex is a successfully built regular expression object: return a non–negative integer representing the number of capturing groups (parenthetical subexpressions) in the pattern. If an error occurred while building rex: return -1.

Function: int cre2_find_named_capturing_groups (const cre2_regexp_t * rex, const char * name)

If rex is a successfully built regular expression object: return a non–negative integer representing the index of the named capturing group whose name is name. If an error occurred while building rex or the name is invalid: return -1.

const char *      pattern = "from (?P<S>.*) to (?P<D>.*)";
cre2_options_t *  opt     = cre2_opt_new();
cre2_regexp_t *   rex     = cre2_new(pattern, strlen(pattern),
                                     opt);
{
  if (cre2_error_code(rex))
    { /* handle the error */ }
  int nmatch = cre2_num_capturing_groups(rex) + 1;
  cre2_string_t strings[nmatch];
  int e, SIndex, DIndex;

  const char * text = \
     "from Montreal, Canada to Lausanne, Switzerland";
  int text_len = strlen(text);

  e = cre2_match(rex, text, text_len, 0, text_len,
                 CRE2_UNANCHORED, strings, nmatch);
  if (0 == e)
    { /* handle the error */ }

  SIndex = cre2_find_named_capturing_groups(rex, "S");
  if (0 != strncmp("Montreal, Canada",
                   strings[SIndex].data, strings[SIndex].length))
    { /* handle the error */ }

  DIndex = cre2_find_named_capturing_groups(rex, "D");
  if (0 != strncmp("Lausanne, Switzerland",
                   strings[DIndex].data, strings[DIndex].length))
    { /* handle the error */ }
}
cre2_delete(rex);
cre2_opt_delete(opt);

Iterating over named capturing groups

Function: cre2_named_groups_iter_t * cre2_named_groups_iter_new (const cre2_regexp_t * re)

Build and return a new named capturing groups iterator.

Function: void cre2_named_groups_iter_delete (cre2_named_groups_iter_t * iter)

Finalise and delete a named capturing groups iterator.

Function: bool cre2_named_groups_iter_next (cre2_named_groups_iter_t * iter, const char ** namep, int * indexp)

Search the next named capturing group.

When a group is found: return true; store in the variable referenced by namep a pointer to the string representing the name; store in the variable referenced by indexp an integer representing the index of the capturing group.

When no more groups are present: return false; store NULL in the variable referenced by namep; store ‘-1’ in the variable referenced by indexp.

As example, let’s see this:

cre2_regexp_t               * rex  = NULL;
cre2_named_groups_iter_t    * iter = NULL;

const char rex_pattern[] = "\
January:[[:blank:]]+(?P<january>[[:digit:]]+)\n\
February:[[:blank:]]+(?P<january>[[:digit:]]+)\n\
March:[[:blank:]]+(?P<march>[[:digit:]]+)\n\
April:[[:blank:]]+(?P<april>[[:digit:]]+)\n\
May:[[:blank:]]+(?P<may>[[:digit:]]+)\n\
June:[[:blank:]]+(?P<june>[[:digit:]]+)\n\
July:[[:blank:]]+(?P<july>[[:digit:]]+)\n\
August:[[:blank:]]+(?P<august>[[:digit:]]+)\n\
September:[[:blank:]]+(?P<september>[[:digit:]]+)\n\
October:[[:blank:]]+(?P<october>[[:digit:]]+)\n\
November:[[:blank:]]+(?P<november>[[:digit:]]+)\n\
December:[[:blank:]]+(?P<december>[[:digit:]]+)\n";

const char *   text     = "\
January: 8\n\
February: 3\n\
March: 3\n\
April: 4\n\
May: 9\n\
June: 4\n\
July: 7\n\
August: 5\n\
September: 9\n\
October: 2\n\
November: 1\n\
December: 6\n";
int            text_len = strlen(text);

int            rv;
int            nmatch = 20;
cre2_string_t  match[nmatch];

/* Let's build the regular expression. */
rex  = cre2_new(rex_pattern, strlen(rex_pattern), NULL);
if (!rex) {
  goto done;
}
if (cre2_error_code(rex)) {
  goto done;
}

/* Now match the pattern. */
rv = cre2_match(rex, text, text_len, 0, text_len,
                CRE2_ANCHOR_BOTH, match, nmatch);
if (! rv) {
  goto done;
}

/* Build the named groups iterator. */
iter = cre2_named_groups_iter_new(rex);
if (!iter) {
  goto done;
}

/* Perform the iteration. */
{
  char const *name;
  int        index;

  while (cre2_named_groups_iter_next(iter, &name, &index)) {
    printf("group: %d, %s\n", index, name);
  }
}

/* Final cleanup. */
done:
if (iter) {
  cre2_named_groups_iter_delete(iter);
}
if (rex) {
  cre2_delete(rex);
}

this code prints:

group: 4, april
group: 8, august
group: 12, december
group: 1, january
group: 7, july
group: 6, june
group: 3, march
group: 5, may
group: 11, november
group: 10, october
group: 9, september

Next: , Previous: , Up: regexps   [Contents][Index]

This document describes version 0.4.0-devel.2 of CRE2.