Regular expressions

The goal of this task is

to match a string against a regular expression
to substitute part of a string using a regular expression

AppleScript

try
    find text ".*string$" in "I am a string" with regexp
on error message
    return message
end try

try
    change "original" into "modified" in "I am the original string" with regexp
on error message
    return message
end try

ALGOL 68

The routines grep in strings and sub in string are not part of ALGOL 68's standard prelude.

Works with: ALGOL 68G version Any - tested with release mk15-0.8b.fc9.i386

<lang algol>INT match=0, no match=1, out of memory error=2, other error=3;

STRING str := "i am a string";

Match: #

STRING m := "string$"; INT start, end; IF grep in string(m, str, start, end) = match THEN printf(($"Ends with """g""""l$, str[start:end])) FI;

Replace: #

IF sub in string(" a ", " another ",str) = match THEN printf(($gl$, str)) FI;</lang> Output:

Ends with "string"
i am another string

Standard ALGOL 68 does have an primordial form of pattern matching called a format. This is designed to extract values from input data. But it can also be used for outputting (and transputting) the original data.

Works with: ALGOL 68 version Standard - But declaring book as flex[]flex[]string

Works with: ALGOL 68G version Any - tested with release mk15-0.8b.fc9.i386

For example:<lang algol> FORMAT pattern = $ddd" "c("cats","dogs")$; FILE file; STRING book; associate(file, book); on value error(file, (REF FILE f)BOOL: stop); on format error(file, (REF FILE f)BOOL: stop);

book := "100 dogs"; STRUCT(INT count, type) dalmatians;

getf(file, (pattern, dalmatians)); print(("Dalmatians: ", dalmatians, new line)); count OF dalmatians +:=1; printf(($"Gives: "$, pattern, dalmatians, $l$))</lang> Output:

Dalmatians:        +100         +2
Gives 101 dogs

C

Works with: POSIX

As far as I can see, POSIX defined function for regex matching, but nothing for substitution. So we must do all the hard work by hand. The complex-appearing code could be turned into a function.

<lang c>#include <stdio.h>

include <stdlib.h>
include <sys/types.h>
include <regex.h>
include <string.h>

int main() {

  regex_t preg;
  regmatch_t substmatch[1];
  const char *tp = "string$";
  const char *t1 = "this is a matching string";
  const char *t2 = "this is not a matching string!";
  const char *ss = "istyfied";
  
  regcomp(&preg, "string$", REG_EXTENDED);
  printf("'%s' %smatched with '%s'\n", t1,
                                       (regexec(&preg, t1, 0, NULL, 0)==0) ? "" : "did not ", tp);
  printf("'%s' %smatched with '%s'\n", t2,
                                       (regexec(&preg, t2, 0, NULL, 0)==0) ? "" : "did not ", tp);
  regfree(&preg);
  /* change "a[a-z]+" into "istifyed"?*/
  regcomp(&preg, "a[a-z]+", REG_EXTENDED);
  if ( regexec(&preg, t1, 1, substmatch, 0) == 0 )
  {
     //fprintf(stderr, "%d, %d\n", substmatch[0].rm_so, substmatch[0].rm_eo);
     char *ns = malloc(substmatch[0].rm_so + 1 + strlen(ss) +
                       (strlen(t1) - substmatch[0].rm_eo) + 2);
     memcpy(ns, t1, substmatch[0].rm_so+1);
     memcpy(&ns[substmatch[0].rm_so], ss, strlen(ss));
     memcpy(&ns[substmatch[0].rm_so+strlen(ss)], &t1[substmatch[0].rm_eo],
               strlen(&t1[substmatch[0].rm_eo]));
     ns[ substmatch[0].rm_so + strlen(ss) +
         strlen(&t1[substmatch[0].rm_eo]) ] = 0;
     printf("mod string: '%s'\n", ns);
     free(ns); 
  } else {
     printf("the string '%s' is the same: no matching!\n", t1);
  }
  regfree(&preg);
  
  return 0;

}</lang>

C++

Works with: g++ version 4.0.2

Library: Boost

 #include <iostream>
 #include <string>
 #include <iterator>
 #include <boost/regex.hpp>
 
 int main()
 {
   boost::regex re(".* string$");
   std::string s = "Hi, I am a string";
 
   // match the complete string
   if (boost::regex_match(s, re))
     std::cout << "The string matches.\n";
   else
     std::cout << "Oops - not found?\n";
 
   // match a substring
   boost::regex re2(" a.*a");
   boost::smatch match;
   if (boost::regex_search(s, match, re2))
   {
     std::cout << "Matched " << match.length()
               << " characters starting at " << match.position() << ".\n";
     std::cout << "Matched character sequence: \""
               << match.str() << "\"\n";
   }
   else
   {
     std::cout << "Oops - not found?\n";
   }
 
   // replace a substring
   std::string dest_string;
   boost::regex_replace(std::back_inserter(dest_string),
                        s.begin(), s.end(),
                        re2,
                        "'m now a changed");
   std::cout << dest_string << std::endl;
 }

C#

Works with: .NET version 2.0+

Import System.Text.RegularExpressions;

string str = "I am a clever string";
string pattern = ".*clever.*";
Regex regex = new Regex(pattern);
if (  regex.IsMatch( str) ) {
    Console.WriteLine( "The string contains clever" );
}
if ( Regex.IsMatch( str, pattern ) ) {
    Console.WriteLine( "A more clever way to detect that the string contains clever" );
}

// demonstrate regex grouping
RegEx regexUrl = new Regex( "sftp://(.*?):(.*?)@(.*?)/(.*)" );
string sftpUrl = "sftp://rseward:password@server.com/remoteDir/";
Match match = regexUrl.Match( sftpUrl );
if ( match.Success ) {
  Console.WriteLine( "user=" + match.Groups(1) );
  Console.WriteLine( "password=" + match.Groups(2) );
  Console.WriteLine( "server=" + match.Groups(3) );
  Console.WriteLine( "path=" + match.Groups(4) );
}

D

 import std.stdio, std.regexp;

 void main() {
     string s = "I am a string";

     // Test:
     if (search(s, r"string$"))
         writefln("Ends with 'string'");

     // Test, storing the regular expression:
     auto re1 = RegExp(r"string$");
     if (re1.search(s).test)
         writefln("Ends with 'string'");

     // Substitute:
     writefln(sub(s, " a ", " another "));

     // Substitute, storing the regular expression:
     auto re2 = RegExp(" a ");
     writefln(re2.replace(s, " another "));
 }

Note that in std.string there are string functions to perform those string operations in a faster way.

Forth

Library: Forth Foundation Library

Test/Match

include ffl/rgx.fs

\ Create a regular expression variable 'exp' in the dictionary

rgx-create exp

\ Compile an expression

s" Hello (World)" exp rgx-compile [IF]
  .( Regular expression successful compiled.) cr
[THEN]

\ (Case sensitive) match a string with the expression

s" Hello World" exp rgx-cmatch? [IF]
  .( String matches with the expression.) cr
[ELSE]
  .( No match.) cr
[THEN]

Haskell

Test

import Text.Regex

str = "I am a string"

case matchRegex (mkRegex ".*string$") str of
  Just _  -> putStrLn $ "ends with 'string'"
  Nothing -> return ()

Substitute

import Text.Regex

orig = "I am the original string"
result = subRegex (mkRegex "original") orig "modified
putStrLn $ result

J

J's regex support is built on top of PCRE.

   load'regex'               NB.  Load regex library
   str =: 'I am a string'   NB.  String used in examples.

Matching:

   '.*string$' rxeq str     NB.  1 is true, 0 is false
1

Substitution:

   ('am';'am still') rxrplc str
I am still a string

Java

Works with: Java version 1.5+

Test

String str = "I am a string";
if (str.matches(".*string$")) {
  System.out.println("ends with 'string'");
}

Substitute

String orig = "I am the original string";
String result = orig.replaceAll("original", "modified");
// result is now "I am the modified string"

JavaScript

Test/Match

     var subject = "Hello world!";
     
     // Two different ways to create the RegExp object
     // Both examples use the exact same pattern... matching "hello"
     var re_PatternToMatch = /Hello (World)/i; // creates a RegExp literal with case-insensitivity
     var re_PatternToMatch2 = new RegExp("Hello (World)", "i");
     
     // Test for a match - return a bool
     var isMatch = re_PatternToMatch.test(subject);
     
     // Get the match details
     //    Returns an array with the match's details
     //    matches[0] == "Hello world"
     //    matches[1] == "world"
     var matches = re_PatternToMatch2.exec(subject);

Substitute

     var subject = "Hello world!";
     
     // Perform a string replacement
     //    newSubject == "Replaced!"
     var newSubject = subject.replace(re_PatternToMatch, "Replaced");

Objective-C

Test

Works with: Mac OS X version 10.4+

<lang objc> NSString *str = @"I am a string"; NSString *regex = @".*string$";

NSPredicate *pred = [NSPredicate predicateWithFormat:@"SELF MATCHES %@", regex];

if ([pred evaluateWithObject:str]) {

   NSLog(@"ends with 'string'");

} </lang> Unfortunately this method cannot find the location of the match or do substitution.

OCaml

Test

#load "str.cma";;
let str = "I am a string";;
try
  ignore(Str.search_forward (Str.regexp ".*string$") str 0);
  print_endline "ends with 'string'"
with Not_found -> ()
;;

Substitute

#load "str.cma";;
let orig = "I am the original string";;
let result = Str.global_replace (Str.regexp "original") "modified" orig;;
(* result is now "I am the modified string" *)

Perl

Works with: Perl version 5.8.8

Test

$string = "I am a string";
if ($string =~ /string$/) {
  print "Ends with 'string'\n";
}

if ($string !~ /^You/) {
  print "Does not start with 'You'\n";
}

Substitute

$string = "I am a string";
$string =~ s/ a / another /; # makes "I am a string" into "I am another string"
print $string;

Test and Substitute

$string = "I am a string";
if ($string =~ s/\bam\b/was/) { # \b is a word border
  print "I was able to find and replace 'am' with 'was'\n";
}

Options

# add the following just after the last / for additional control
# g = globally (match as many as possible)
# i = case-insensitive
# s = treat all of $string as a single line (in case you have line breaks in the content)
# m = multi-line (the expression is run on each line individually)
 
$string =~ s/i/u/ig; # would change "I am a string" into "u am a strung"

PHP

Works with: PHP version 5.2.0

$string = 'I am a string';

Test

if (preg_match('/string$/', $string))
{
    echo "Ends with 'string'\n";
}

Replace

$string = preg_replace('/\ba\b/', 'another', $string);
echo "Found 'a' and replace it with 'another', resulting in this string: $string\n";

Python

Works with: Python version 2.5

Setup

import re
str = 'I am a string'

Test

if re.search(r'string$', str):
    print "Ends with 'string'"

Test, storing the compiled regular expression in a variable

regex = re.compile(r'string$')
if regex.search(str):
    print "Ends with 'string'"

To find all matches rather than just the first match, use re.findall rather than re.search.

Substitute

str = re.sub(r' a ', ' another ', str)

All instances of the specified pattern are replaced. To limit the number of instances replaced, specify the fourth argument to sub, the maximum number of replacements. To make a case-insensitive replacement, place (?i) at the beginning of the regular expression.

Substitute, storing the compiled regular expression in a variable

regex = re.compile(r' a ')
str = regex.sub(' another ', str)

Note: re.match() and regex.match() imply a "^" at the beginning of the regular expression. re.search() and regex.search() do not.

Raven

'i am a string' as str

Match:

str m/string$/
if  "Ends with 'string'\n" print

Replace:

str r/ a / another / print

Ruby

Test

 string="I am a string"
 puts "Ends with 'string'" if string[/string$/]
 puts "Does not start with 'You'" if !string[/^You/]

Substitute

 puts string.gsub(/ a /,' another ')
 #or
 string[/ a /]='another'
 puts string

Substitute using block

 puts(string.gsub(/\bam\b/) do |match|
        puts "I found #{match}"
        #place "was" instead of the match
        "was"
      end)

Tcl

Test

set theString "I am a string"
if {[regexp -- {string$} $theString]} {
  puts "Ends with 'string'\n"
}

if (![regexp -- {^You} $theString]) {
  puts "Does not start with 'You'\n"
}

Substitute

set theString = "I am a string"
puts [regsub -- { a } {I am a string} { another }]

Toka

Toka's regular expression library allows for matching, but does not yet provide for replacing elements within strings.

#! Include the regex library
needs regex

#! The two test strings
" This is a string" is-data test.1
" Another string" is-data test.2

#! Create a new regex named 'expression' which tries
#! to match strings beginning with 'This'.
" ^This" regex: expression

#! An array to store the results of the match 
#! (Element 0 = starting offset, Element 1 = ending offset of match)
2 cells is-array match

#! Try both test strings against the expression. 
#! try-regex will return a flag.  -1 is TRUE, 0 is FALSE
expression test.1 2 match try-regex .
expression test.2 2 match try-regex .