Logo Search packages:      
Sourcecode: scheme48 version File versions  Download package

regexp.c

/* Copyright (c) 1993-2008 by Richard Kelsey and Jonathan Rees.
   See file COPYING. */

/*
 * Scheme 48/POSIX regex interface
 */

#include <sys/types.h>
#include <regex.h> /* POSIX.2 */
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

#include "scheme48.h"

extern void       s48_init_posix_regex(void);
static s48_value  posix_compile_regexp(s48_value pattern,
                                   s48_value extended_p,
                                   s48_value ignore_case_p,
                                   s48_value submatches_p,
                                   s48_value newline_p),
                  posix_regexp_match(s48_value sch_regex,
                                 s48_value string,
                                 s48_value start,
                                 s48_value submatches_p,
                                 s48_value bol_p,
                                 s48_value eol_p),
                  posix_regexp_error_message(s48_value pattern,
                                   s48_value extended_p,
                                   s48_value ignore_case_p,
                                   s48_value submatches_p,
                                   s48_value newline_p),
                  posix_free_regexp(s48_value sch_regex);

/*
 * Record type imported from Scheme.
 */
static s48_value  posix_regexp_match_type_binding = S48_FALSE;

/*
 * Install all exported functions in Scheme48.
 */
void
s48_init_posix_regexp(void)
{
  /* Export our stuff. */
  S48_EXPORT_FUNCTION(posix_compile_regexp);
  S48_EXPORT_FUNCTION(posix_regexp_match);
  S48_EXPORT_FUNCTION(posix_regexp_error_message);
  S48_EXPORT_FUNCTION(posix_free_regexp);

  /* Protect and import the regex-match type. */
  S48_GC_PROTECT_GLOBAL(posix_regexp_match_type_binding);
  posix_regexp_match_type_binding =
    s48_get_imported_binding("posix-regexp-match-type");
}

/*
 * Interface to regcomp.  We encode the flags, make the return value, and
 * then call regcomp() to fill it in.
 */
static s48_value
posix_compile_regexp(s48_value pattern,
                 s48_value extended_p, s48_value ignore_case_p,
                 s48_value submatches_p, s48_value newline_p)
{
  s48_value sch_regex;
  int status;
  S48_DECLARE_GC_PROTECT(1);
  int flags = (S48_EXTRACT_BOOLEAN(extended_p)    ? REG_EXTENDED : 0) |
              (S48_EXTRACT_BOOLEAN(ignore_case_p) ? REG_ICASE    : 0) |
              (S48_EXTRACT_BOOLEAN(submatches_p)  ? 0 : REG_NOSUB) |
              (S48_EXTRACT_BOOLEAN(newline_p)     ? REG_NEWLINE  : 0);

  S48_GC_PROTECT_1(pattern);

  S48_CHECK_BYTE_VECTOR(pattern);

  sch_regex = S48_MAKE_VALUE(regex_t);

  status = regcomp(S48_UNSAFE_EXTRACT_VALUE_POINTER(sch_regex, regex_t),
               S48_UNSAFE_EXTRACT_BYTE_VECTOR(pattern),
               flags);

  S48_GC_UNPROTECT();

  if (status == 0)
    return sch_regex;
  else
    return S48_UNSAFE_ENTER_FIXNUM(status);   /* not that it can do them much good */
}

/*
 * Interface to regexec.
 *
 * Returns #f if there is no match, #t if there is a match and submatches_p
 * is false, and a list of regex-match records otherwise.
 *
 * Most of this is making the buffer for the match structs and then translating
 * them into Scheme match records.
 */
static s48_value
posix_regexp_match(s48_value sch_regex, s48_value string, s48_value sch_start,
               s48_value submatches_p,
               s48_value bol_p, s48_value eol_p)
{
  int status;
  s48_value result;

  int start = s48_extract_fixnum(sch_start);
  int len = strlen(s48_extract_byte_vector(string));
  /* re_nsub doesn't include the full pattern */
  size_t nmatch = 1 + S48_EXTRACT_VALUE_POINTER(sch_regex, regex_t)->re_nsub;
  regmatch_t *pmatch,
             pmatch_buffer[32];

  int flags = (S48_EXTRACT_BOOLEAN(bol_p) ? 0 : REG_NOTBOL) |
              (S48_EXTRACT_BOOLEAN(eol_p) ? 0 : REG_NOTEOL);

  if ((start < 0) || (start > len))
    s48_raise_range_error(sch_start,
                    s48_enter_fixnum(0),
                    s48_enter_fixnum(len));

  if (nmatch <= 32)
    pmatch = pmatch_buffer;
  else {
    pmatch = (regmatch_t *) malloc(nmatch * sizeof(regmatch_t));
    if (pmatch == NULL)
      s48_raise_out_of_memory_error(); }
    
  status = regexec(S48_EXTRACT_VALUE_POINTER(sch_regex, regex_t),
               S48_UNSAFE_EXTRACT_BYTE_VECTOR(string) + start,
               nmatch, pmatch, flags);

  if (status == REG_NOMATCH)
    result = S48_FALSE;
  else if (! S48_EXTRACT_BOOLEAN(submatches_p))
    result = S48_TRUE;
  else {
    s48_value match = S48_FALSE;
    s48_value matches = S48_NULL;
    int i;
    S48_DECLARE_GC_PROTECT(2);
  
    S48_GC_PROTECT_2(match, matches);
    
    for(i = nmatch - 1; i > -1; i--) {
      if (pmatch[i].rm_so == -1)
      match = S48_FALSE;
      else {
      match = s48_make_record(posix_regexp_match_type_binding);
      S48_UNSAFE_RECORD_SET(match,
                        0,
                        s48_enter_fixnum(pmatch[i].rm_so + start));
      S48_UNSAFE_RECORD_SET(match,
                        1,
                        s48_enter_fixnum(pmatch[i].rm_eo + start));
      S48_UNSAFE_RECORD_SET(match, 2, S48_FALSE); }  /* submatches */
      matches = s48_cons(match, matches); }
    
    S48_GC_UNPROTECT();

    result = matches; }

  if (nmatch > 32)
    free(pmatch);
  
  return result;
}

/*
 * Interface to regcomp.
 *
 * This takes the same arguments as `compile_regexp' but returns the error
 * message, if any, that `regcomp()' returns.  For some reason `regerror()'
 * requires both the status code and the compiled pattern buffer returned
 * by `regcomp()'.  `compile_regexp' only returned the status so we have to
 * redo the compilation.
 *
 */
static s48_value
posix_regexp_error_message(s48_value pattern,
                     s48_value extended_p, s48_value ignore_case_p,
                     s48_value submatches_p, s48_value newline_p)
{
  regex_t compiled_regex;
  int status;
  int flags = (S48_EXTRACT_BOOLEAN(extended_p)    ? REG_EXTENDED : 0) |
              (S48_EXTRACT_BOOLEAN(ignore_case_p) ? REG_ICASE    : 0) |
              (S48_EXTRACT_BOOLEAN(submatches_p)  ? 0 : REG_NOSUB) |
              (S48_EXTRACT_BOOLEAN(newline_p)     ? REG_NEWLINE  : 0);

  S48_CHECK_STRING(pattern);

  status = regcomp(&compiled_regex, S48_UNSAFE_EXTRACT_BYTE_VECTOR(pattern), flags);

  if (status == 0)
    return S48_FALSE;
  else {
    size_t buffer_size;
    s48_value buffer;
    
    buffer_size = regerror(status, &compiled_regex, NULL, 0);
    /* For string lengths C counts the nul, Scheme doesn't. */
    buffer = s48_make_byte_vector(buffer_size);
    regerror(status,
           &compiled_regex,
           S48_UNSAFE_EXTRACT_BYTE_VECTOR(buffer),
           buffer_size);
    
    return buffer; }
}

/*
 * Stub for regfree().
 */

static s48_value
posix_free_regexp(s48_value sch_regex)
{
  regfree(S48_EXTRACT_VALUE_POINTER(sch_regex, regex_t));

  return S48_UNSPECIFIC;
}

Generated by  Doxygen 1.6.0   Back to index