Count occurrences of a substring

From Rosetta Code
Jump to: navigation, search
Task
Count occurrences of a substring
You are encouraged to solve this task according to the task description, using any language you may know.

The task is to either create a function, or show a built-in function, to count the number of non-overlapping occurrences of a substring inside a string. The function should take two arguments: the first argument being the string to search and the second a substring to be searched for. It should return an integer count.

print countSubstring("the three truths","th")
3
 
// do not count substrings that overlap with previously-counted substrings:
print countSubstring("ababababab","abab")
2

The matching should yield the highest number of non-overlapping matches. In general, this essentially means matching from left-to-right or right-to-left (see proof on talk page).


Contents

[edit] Ada

with Ada.Strings.Fixed, Ada.Text_IO;
 
procedure Count_Substrings is
 
function Substrings(Main: String; Sub: String) return Natural is
Idx: Natural := Ada.Strings.Fixed.Index(Source => Main, Pattern => Sub);
begin
if Idx = 0 then
return 0;
else
return 1 + Substrings(Main(Idx+Sub'Length .. Main'Last), Sub);
end if;
end Substrings;
 
begin
Ada.Text_IO.Put(Integer'Image(Substrings("the three truths", "th")));
Ada.Text_IO.Put(Integer'Image(Substrings("ababababab", "abab")));
end Count_Substrings;
 
Output:
 3 2

[edit] ALGOL 68

Works with: ALGOL 68 version Revision 1 - no extensions to language used.
Works with: ALGOL 68G version Any - tested with release 1.18.0-9h.tiny.

Algol68 has no build in function to do this task, hence the next to create a count string in string routine.

#!/usr/local/bin/a68g --script #
 
PROC count string in string = (STRING needle, haystack)INT: (
INT start:=LWB haystack, next, out:=0;
FOR count WHILE string in string(needle, next, haystack[start:]) DO
start+:=next+UPB needle-LWB needle;
out:=count
OD;
out
);
 
printf(($d" "$,
count string in string("th", "the three truths"), # expect 3 #
count string in string("abab", "ababababab"), # expect 2 #
count string in string("a*b", "abaabba*bbaba*bbab"), # expect 2 #
$l$
))

Output:

3 2 2 

[edit] AutoHotkey

While it is simple enough to parse the string, AutoHotkey has a rather unconventional method which outperforms this. StringReplace sets the number of replaced strings to ErrorLevel.

MsgBox % countSubstring("the three truths","th") ; 3
MsgBox % countSubstring("ababababab","abab") ; 2
 
CountSubstring(fullstring, substring){
StringReplace, junk, fullstring, %substring%, , UseErrorLevel
return errorlevel
}

[edit] AWK

#!/usr/local/bin/awk -f
function countsubstring (str,pat)
{
n=0;
while (match(str,pat)) {
n++;
str = substr(str,RSTART+RLENGTH);
}
return n;
}
 
BEGIN {
print countsubstring("the three truths","th");
print countsubstring("ababababab","abab");
print countsubstring(ARGV[1],ARGV[2]);
}

Output:

$ ./countsubstring.awk aaaabacad aa
3
2
2

[edit] BASIC

Works with: QBasic

In FreeBASIC, this needs to be compiled with -lang qb or -lang fblite.

DECLARE FUNCTION countSubstring& (where AS STRING, what AS STRING)
 
PRINT "the three truths, th:", countSubstring&("the three truths", "th")
PRINT "ababababab, abab:", countSubstring&("ababababab", "abab")
 
FUNCTION countSubstring& (where AS STRING, what AS STRING)
DIM c AS LONG, s AS LONG
s = 1 - LEN(what)
DO
s = INSTR(s + LEN(what), where, what)
IF 0 = s THEN EXIT DO
c = c + 1
LOOP
countSubstring = c
END FUNCTION

Output:

the three truths, th:        3
ababababab, abab:            2

See also: Liberty BASIC, PowerBASIC, PureBasic.

[edit] Applesoft BASIC

10 F$ = "TH"
20 S$ = "THE THREE TRUTHS"
30 GOSUB 100"COUNT SUBSTRING
40 PRINT R
50 F$ = "ABAB"
60 S$ = "ABABABABAB"
70 GOSUB 100"COUNT SUBSTRING
80 PRINT R
90 END
 
100 R = 0
110 F = LEN(F$)
120 S = LEN(S$)
130 IF F > S THEN RETURN
140 IF F = 0 THEN RETURN
150 IF F = S AND F$ = S$ THEN R = 1 : RETURN
160 FOR I = 1 TO S - F
170 IF F$ = MID$(S$, I, F) THEN R = R + 1 : I = I + F - 1
180 NEXT I
190 RETURN

[edit] BBC BASIC

      tst$ = "the three truths"
sub$ = "th"
PRINT ; FNcountSubstring(tst$, sub$) " """ sub$ """ in """ tst$ """"
tst$ = "ababababab"
sub$ = "abab"
PRINT ; FNcountSubstring(tst$, sub$) " """ sub$ """ in """ tst$ """"
END
 
DEF FNcountSubstring(A$, B$)
LOCAL I%, N%
I% = 1 : N% = 0
REPEAT
I% = INSTR(A$, B$, I%)
IF I% THEN N% += 1 : I% += LEN(B$)
UNTIL I% = 0
= N%
 

Output:

3 "th" in "the three truths"
2 "abab" in "ababababab"

[edit] Bracmat

  ( count-substring
= n S s p
. 0:?n:?p
& !arg:(?S.?s)
& @( !S
 :  ?
( [!p ? !s [?p ?
& !n+1:?n
& ~
)
)
| !n
)
& out$(count-substring$("the three truths".th))
& out$(count-substring$(ababababab.abab))
& ;

Output:

3
2

[edit] C

#include <stdio.h>
#include <string.h>
 
int match(const char *s, const char *p, int overlap)
{
int c = 0, l = strlen(p);
 
while (*s != '\0') {
if (strncmp(s++, p, l)) continue;
if (!overlap) s += l - 1;
c++;
}
return c;
}
 
int main()
{
printf("%d\n", match("the three truths", "th", 0));
printf("overlap:%d\n", match("abababababa", "aba", 1));
printf("not:  %d\n", match("abababababa", "aba", 0));
return 0;
}

Alternate version:

#include <stdio.h>
#include <string.h>
 
// returns count of non-overlapping occurrences of 'sub' in 'str'
int countSubstring(const char *str, const char *sub)
{
int length = strlen(sub);
if (length == 0) return 0;
int count = 0;
for (str = strstr(str, sub); str; str = strstr(str + length, sub))
++count;
return count;
}
 
int main()
{
printf("%d\n", countSubstring("the three truths", "th"));
printf("%d\n", countSubstring("ababababab", "abab"));
printf("%d\n", countSubstring("abaabba*bbaba*bbab", "a*b"));
 
return 0;
}

Output:

3
2
2

[edit] C++

#include <iostream>
#include <string>
 
// returns count of non-overlapping occurrences of 'sub' in 'str'
int countSubstring(const std::string& str, const std::string& sub)
{
if (sub.length() == 0) return 0;
int count = 0;
for (size_t offset = str.find(sub); offset != std::string::npos;
offset = str.find(sub, offset + sub.length()))
{
++count;
}
return count;
}
 
int main()
{
std::cout << countSubstring("the three truths", "th") << '\n';
std::cout << countSubstring("ababababab", "abab") << '\n';
std::cout << countSubstring("abaabba*bbaba*bbab", "a*b") << '\n';
 
return 0;
}
Output:
3
2
2

[edit] C#

using System;
 
class SubStringTestClass
{
public static int CountSubStrings(this string testString, string testSubstring)
{
int count = 0;
 
if (testString.Contains(testSubstring))
{
for (int i = 0; i < testString.Length; i++)
{
if (testString.Substring(i).Length >= testSubstring.Length)
{
bool equals = testString.Substring(i, testSubstring.Length).Equals(testSubstring);
if (equals)
{
count++;
i += testSubstring.Length - 1; // Fix: Don't count overlapping matches
}
}
}
}
return count;
}
}


[edit] Clojure

Use a sequence of regexp matches to count occurrences.

 
(defn count-substring [txt sub]
(count (re-seq (re-pattern sub) txt)))
 

Use the trick of blank replacement and maths to count occurrences.

 
(defn count-substring1 [txt sub]
(/ (- (count txt) (count (.replaceAll txt sub "")))
(count sub)))
 

[edit] COBOL

INSPECT can be used for this task without having to create a function.

       IDENTIFICATION DIVISION.
PROGRAM-ID. testing.
 
DATA DIVISION.
WORKING-STORAGE SECTION.
01 occurrences PIC 99.
 
PROCEDURE DIVISION.
INSPECT "the three truths" TALLYING occurrences FOR ALL "th"
DISPLAY occurrences
 
MOVE 0 TO occurrences
INSPECT "ababababab" TALLYING occurrences FOR ALL "abab"
DISPLAY occurrences
 
MOVE 0 TO occurrences
INSPECT "abaabba*bbaba*bbab" TALLYING occurrences
FOR ALL "a*b"
DISPLAY occurrences
 
GOBACK
.
Output:
03
02
02

[edit] CoffeeScript

 
countSubstring = (str, substr) ->
n = 0
i = 0
while (pos = str.indexOf(substr, i)) != -1
n += 1
i = pos + substr.length
n
 
console.log countSubstring "the three truths", "th"
console.log countSubstring "ababababab", "abab"
 

[edit] Common Lisp

(defun count-sub (str pat)
(loop with z = 0 with s = 0 while s do
(when (setf s (search pat str :start2 s))
(incf z) (incf s (length pat)))
finally (return z)))
 
(count-sub "ababa" "ab") ; 2
(count-sub "ababa" "aba") ; 1

[edit] D

void main() {
import std.stdio, std.algorithm;
 
"the three truths".count("th").writeln;
"ababababab".count("abab").writeln;
}
Output:
3
2

[edit] Delphi

program OccurrencesOfASubstring;
 
{$APPTYPE CONSOLE}
 
uses StrUtils;
 
function CountSubstring(const aString, aSubstring: string): Integer;
var
lPosition: Integer;
begin
Result := 0;
lPosition := PosEx(aSubstring, aString);
while lPosition <> 0 do
begin
Inc(Result);
lPosition := PosEx(aSubstring, aString, lPosition + Length(aSubstring));
end;
end;
 
begin
Writeln(CountSubstring('the three truths', 'th'));
Writeln(CountSubstring('ababababab', 'abab'));
end.

[edit] Déjà Vu

!. count "the three truths" "th"
!. count "ababababab" "abab"
Output:
3
2

[edit] Eiffel

 
class
APPLICATION
inherit
ARGUMENTS
create
make
feature {NONE} -- Initialization
make
-- Run application.
do
occurance := 0
from
index := 1
until
index > text.count
loop
temp := text.fuzzy_index(search_for, index, 0)
if
temp /= 0
then
index := temp + search_for.count
occurance := occurance + 1
else
index := text.count + 1
end
end
print(occurance)
end
 
index:INTEGER
temp:INTEGER
occurance:INTEGER
text:STRING = "ababababab"
search_for:STRING = "abab"
end
 

[edit] Erlang

 
%% Count non-overlapping substrings in Erlang for the rosetta code wiki.
%% Implemented by J.W. Luiten
 
-module(substrings).
-export([main/2]).
 
%% String exhausted, present result
match([], _Sub, _OrigSub, Acc) ->
Acc;
 
%% Sub exhausted, count a match
match(String, [], Sub, Acc) ->
match(String, Sub, Sub, Acc+1);
 
%% First character matches, advance
match([X|MainTail], [X|SubTail], Sub, Acc) ->
match(MainTail, SubTail, Sub, Acc);
 
%% First characters do not match. Keep scanning for sub in remainder of string
match([_X|MainTail], [_Y|_SubTail], Sub, Acc)->
match(MainTail, Sub, Sub, Acc).
 
main(String, Sub) ->
match(String, Sub, Sub, 0).
Command:
substrings:main("ababababab","abab").

Output:

2

Alternative using built in functions:

 
main( String, Sub ) -> erlang:length( binary:split(binary:list_to_bin(String), binary:list_to_bin(Sub), [global]) ) - 1.
 

[edit] Euphoria

function countSubstring(sequence s, sequence sub)
integer from,count
count = 0
from = 1
while 1 do
from = match_from(sub,s,from)
if not from then
exit
end if
from += length(sub)
count += 1
end while
return count
end function
 
? countSubstring("the three truths","th")
? countSubstring("ababababab","abab")

Output:

3
2

[edit] EGL

Works with: EDT

The "remove and count the difference" and "manual loop" methods. Implementation includes protection from empty source and search strings.

program CountStrings
 
function main()
SysLib.writeStdout("Remove and Count:");
SysLib.writeStdout(countSubstring("th", "the three truths"));
SysLib.writeStdout(countSubstring("abab", "ababababab"));
SysLib.writeStdout(countSubstring("a*b", "abaabba*bbaba*bbab"));
SysLib.writeStdout(countSubstring("a", "abaabba*bbaba*bbab"));
SysLib.writeStdout(countSubstring(" ", "abaabba*bbaba*bbab"));
SysLib.writeStdout(countSubstring("", "abaabba*bbaba*bbab"));
SysLib.writeStdout(countSubstring("a", ""));
SysLib.writeStdout(countSubstring("", ""));
 
SysLib.writeStdout("Manual Loop:");
SysLib.writeStdout(countSubstringWithLoop("th", "the three truths"));
SysLib.writeStdout(countSubstringWithLoop("abab", "ababababab"));
SysLib.writeStdout(countSubstringWithLoop("a*b", "abaabba*bbaba*bbab"));
SysLib.writeStdout(countSubstringWithLoop("a", "abaabba*bbaba*bbab"));
SysLib.writeStdout(countSubstringWithLoop(" ", "abaabba*bbaba*bbab"));
SysLib.writeStdout(countSubstringWithLoop("", "abaabba*bbaba*bbab"));
SysLib.writeStdout(countSubstringWithLoop("a", ""));
SysLib.writeStdout(countSubstringWithLoop("", ""));
end
 
function countSubstring(substr string in, str string in) returns(int)
if(str.length() > 0 and substr.length() > 0)
return (str.length() - str.replaceStr(subStr, "").length()) / subStr.length();
else
return 0;
end
end
 
function countSubstringWithLoop(substr string in, str string in) returns(int)
count int = 0;
loc, index int = 1;
strlen int = str.length();
substrlen int = substr.length();
 
if(strlen > 0 and substrlen > 0)
while(loc != 0 and index <= strlen)
loc = str.indexOf(substr, index);
if(loc > 0)
count += 1;
index = loc + substrlen;
end
end
end
return count;
end
 
end
 

Output:

Remove and Count:
3
2
2
7
0
0
0
0
Manual Loop:
3
2
2
7
0
0
0
0

[edit] Factor

USING: math sequences splitting ;
: occurences ( seq subseq -- n ) split-subseq length 1 - ;

[edit] Forth

: str-count ( s1 len s2 len -- n )
2swap 0 >r
begin 2over search
while 2over nip /string
r> 1+ >r
repeat 2drop 2drop r> ;
 
s" the three truths" s" th" str-count . \ 3
s" ababababab" s" abab" str-count . \ 2

[edit] Fortran

Works with: Fortran version 90 and later
program Example
implicit none
integer :: n
 
n = countsubstring("the three truths", "th")
write(*,*) n
n = countsubstring("ababababab", "abab")
write(*,*) n
n = countsubstring("abaabba*bbaba*bbab", "a*b")
write(*,*) n
 
contains
 
function countsubstring(s1, s2) result(c)
character(*), intent(in) :: s1, s2
integer :: c, p, posn
 
c = 0
if(len(s2) == 0) return
p = 1
do
posn = index(s1(p:), s2)
if(posn == 0) return
c = c + 1
p = p + posn + len(s2)
end do
end function
end program

Output

3
2
2

[edit] F#

"Remove and count the difference" method, as shown by J, Java, ...

open System
 
let countSubstring (where :string) (what : string) =
match what with
| "" -> 0 // just a definition; infinity is not an int
| _ -> (where.Length - where.Replace(what, @"").Length) / what.Length
 
 
[<EntryPoint>]
let main argv =
let show where what =
printfn @"countSubstring(""%s"", ""%s"") = %d" where what (countSubstring where what)
show "the three truths" "th"
show "ababababab" "abab"
show "abc" ""
0
countSubstring("the three truths", "th") = 3
countSubstring("ababababab", "abab") = 2
countSubstring("abc", "") = 0

[edit] FunL

import util.Regex
 
def countSubstring( str, substr ) = Regex( substr ).findAllMatchIn( str ).length()
 
println( countSubstring("the three truths", "th") )
println( countSubstring("ababababab", "abab") )
Output:
3
2

[edit] Go

Using strings.Count() method:

package main
import (
"fmt"
"strings"
)
 
func main() {
fmt.Println(strings.Count("the three truths", "th")) // says: 3
fmt.Println(strings.Count("ababababab", "abab")) // says: 2
}

[edit] Groovy

Solution, uses the Groovy "find" operator (=~), and the Groovy-extended Matcher property "count":

println (('the three truths' =~ /th/).count)
println (('ababababab' =~ /abab/).count)
println (('abaabba*bbaba*bbab' =~ /a*b/).count)
println (('abaabba*bbaba*bbab' =~ /a\*b/).count)

Output:

3
2
9
2

[edit] Haskell

 
import Data.Text hiding (length)
 
-- Return the number of non-overlapping occurrences of sub in str.
countSubStrs str sub = length $ breakOnAll (pack sub) (pack str)
 
main = do
print $ countSubStrs "the three truths" "th"
print $ countSubStrs "ababababab" "abab"
 

Output:

3
2

[edit] Icon and Unicon

procedure main()
every A := ![ ["the three truths","th"], ["ababababab","abab"] ] do
write("The string ",image(A[2])," occurs as a non-overlapping substring ",
countSubstring!A , " times in ",image(A[1]))
end
 
procedure countSubstring(s1,s2) #: return count of non-overlapping substrings
c := 0
s1 ? while tab(find(s2)) do {
move(*s2)
c +:= 1
}
return c
end
Output:
The string "th" occurs as a non-overlapping substring 3 times in "the three truths"
The string "abab" occurs as a non-overlapping substring 2 times in "ababababab"

[edit] J

require'strings'
countss=: #@] %~ #@[ - [ #@rplc '';~]

In other words: find length of original string, replace the string to be counted with the empty string, find the difference in lengths and divide by the length of the string to be counted.

Example use:

   'the three truths' countss 'th'
3
'ababababab' countss 'abab'
2

[edit] Java

Works with: Java version 1.5+

The "remove and count the difference" method:

public class CountSubstring {
public static int countSubstring(String subStr, String str){
return (str.length() - str.replace(subStr, "").length()) / subStr.length();
}
 
public static void main(String[] args){
System.out.println(countSubstring("th", "the three truths"));
System.out.println(countSubstring("abab", "ababababab"));
System.out.println(countSubstring("a*b", "abaabba*bbaba*bbab"));
}
}

Output:

3
2
2


Works with: Java version 1.5+

The "split and count" method:

import java.util.regex.Pattern;
 
public class CountSubstring {
public static int countSubstring(String subStr, String str){
// the result of split() will contain one more element than the delimiter
// the "-1" second argument makes it not discard trailing empty strings
return str.split(Pattern.quote(subStr), -1).length - 1;
}
 
public static void main(String[] args){
System.out.println(countSubstring("th", "the three truths"));
System.out.println(countSubstring("abab", "ababababab"));
System.out.println(countSubstring("a*b", "abaabba*bbaba*bbab"));
}
}

Output:

3
2
2


Manual looping

public class CountSubstring {
public static int countSubstring(String subStr, String str){
int count = 0;
for (int loc = str.indexOf(subStr); loc != -1;
loc = str.indexOf(subStr, loc + subStr.length()))
count++;
return count;
}
 
public static void main(String[] args){
System.out.println(countSubstring("th", "the three truths"));
System.out.println(countSubstring("abab", "ababababab"));
System.out.println(countSubstring("a*b", "abaabba*bbaba*bbab"));
}
}

Output:

3
2
2

[edit] JavaScript

Using regexes:

function countSubstring(str, subStr){
return str.match(new RegExp(subStr, "g")).length
}

[edit] jq

Using regexes (available in jq versions after June 19, 2014):

 
def countSubstring(sub):
[match(sub; "g")] | length;
Example:
 
"the three truths" | countSubstring("th")

[edit] K

The dyadic verb _ss gives the positions of substring y in string x.

  "the three truths" _ss "th"
0 4 13
 
#"the three truths" _ss "th"
3
 
"ababababab" _ss "abab"
0 4
 
#"ababababab" _ss "abab"
2
 


[edit] Lasso

define countSubstring(str::string, substr::string)::integer => {
local(i = 1, foundpos = -1, found = 0)
while(#i < #str->size && #foundpos != 0) => {
protect => {
handle_error => { #foundpos = 0 }
#foundpos = #str->find(#substr, -offset=#i)
}
if(#foundpos > 0) => {
#found += 1
#i = #foundpos + #substr->size
else
#i++
}
}
return #found
}
define countSubstring_bothways(str::string, substr::string)::integer => {
local(found = countSubstring(#str,#substr))
#str->reverse
local(found2 = countSubstring(#str,#substr))
#found > #found2 ? return #found | return #found2
}
countSubstring_bothways('the three truths','th')
//3
countSubstring_bothways('ababababab','abab')
//2

[edit] Liberty BASIC

 
print countSubstring( "the three truths", "th")
print countSubstring( "ababababab", "abab")
end
 
function countSubstring( a$, s$)
c =0
la =len( a$)
ls =len( s$)
for i =1 to la -ls
if mid$( a$, i, ls) =s$ then c =c +1: i =i +ls -1
next i
countSubstring =c
end function
 

[edit] Logtalk

Using atoms for string representation:

 
:- object(counting).
 
:- public(count/3).
 
count(String, SubString, Count) :-
count(String, SubString, 0, Count).
 
count(String, SubString, Count0, Count) :-
( sub_atom(String, Before, Length, After, SubString) ->
Count1 is Count0 + 1,
Start is Before + Length,
sub_atom(String, Start, After, 0, Rest),
count(Rest, SubString, Count1, Count)
; Count is Count0
).
 
:- end_object.
 

Sample output:

 
| ?- counting::count('the three truths', th, N).
N = 3
yes
 
| ?- counting::count(ababababab, abab, N).
N = 2
yes
 

[edit] Lua

function Count_Substring( s1, s2 )
local magic = "[%^%$%(%)%%%.%[%]%*%+%-%?]"
local percent = function(s)return "%"..s end
return select( 2, s1:gsub( s2:gsub(magic,percent), "" ) )
end
 
print( Count_Substring( "the three truths", "th" ) )
print( Count_Substring( "ababababab","abab" ) )
3
2

[edit] Maple

 
f:=proc(s::string,c::string,count::nonnegint) local n;
n:=StringTools:-Search(c,s);
if n>0 then 1+procname(s[n+length(c)..],c,count);
else 0; end if;
end proc:
 
f("the three truths","th",0);
 
f("ababababab","abab",0);
 

Output:

                                      3

                                      2

[edit] Mathematica

StringPosition["the three truths","th",Overlaps->False]//Length
3
StringPosition["ababababab","abab",Overlaps->False]//Length
2

[edit] MATLAB / Octave

  % Count occurrences of a substring without overlap
length(findstr("ababababab","abab",0))
length(findstr("the three truths","th",0))
 
% Count occurrences of a substring with overlap
length(findstr("ababababab","abab",1))

Output:

>>   % Count occurrences of a substring without overlap
>>   length(findstr("ababababab","abab",0))
ans =  2
>>   length(findstr("the three truths","th",0))
ans =  3
>>   % Count occurrences of a substring with overlap
>>   length(findstr("ababababab","abab",1))
ans =  4
>> 

[edit] Maxima

scount(e, s) := block(
[n: 0, k: 1],
while integerp(k: ssearch(e, s, k)) do (n: n + 1, k: k + 1),
n
)$
 
scount("na", "banana");
2

[edit] Mirah

import java.util.regex.Pattern
import java.util.regex.Matcher
 
#The "remove and count the difference" method
def count_substring(pattern:string, source:string)
(source.length() - source.replace(pattern, "").length()) / pattern.length()
end
 
puts count_substring("th", "the three truths") # ==> 3
puts count_substring("abab", "ababababab") # ==> 2
puts count_substring("a*b", "abaabba*bbaba*bbab") # ==> 2
 
 
# The "split and count" method
def count_substring2(pattern:string, source:string)
# the result of split() will contain one more element than the delimiter
# the "-1" second argument makes it not discard trailing empty strings
source.split(Pattern.quote(pattern), -1).length - 1
end
 
puts count_substring2("th", "the three truths") # ==> 3
puts count_substring2("abab", "ababababab") # ==> 2
puts count_substring2("a*b", "abaabba*bbaba*bbab") # ==> 2
 
 
# This method does a match and counts how many times it matches
def count_substring3(pattern:string, source:string)
result = 0
Matcher m = Pattern.compile(Pattern.quote(pattern)).matcher(source);
while (m.find())
result = result + 1
end
result
end
 
puts count_substring3("th", "the three truths") # ==> 3
puts count_substring3("abab", "ababababab") # ==> 2
puts count_substring3("a*b", "abaabba*bbaba*bbab") # ==> 2
 

[edit] Nemerle

Translation of: F#
using System.Console;
 
module CountSubStrings
{
CountSubStrings(this text : string, target : string) : int
{
match (target) {
|"" => 0
|_ => (text.Length - text.Replace(target, "").Length) / target.Length
}
}
 
Main() : void
{
def text1 = "the three truths";
def target1 = "th";
def text2 = "ababababab";
def target2 = "abab";
 
WriteLine($"$target1 occurs $(text1.CountSubStrings(target1)) times in $text1");
WriteLine($"$target2 occurs $(text2.CountSubStrings(target2)) times in $text2");
}
}

Output:

th occurs 3 times in the three truths
abab occurs 2 times in ababababab

[edit] NetRexx

NetRexx provides the string.countstr(needle) built-in function:

/* NetRexx */
options replace format comments java crossref symbols nobinary
 
-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
method countSubstring(inStr, findStr) public static
return inStr.countstr(findStr)
 
-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
method main(args = String[]) public static
strings = ''
find = 'FIND'
ix = 0
ix = ix + 1; strings[0] = ix; find[0] = ix; strings[ix] = 'the three truths'; strings[ix, find] = 'th'
ix = ix + 1; strings[0] = ix; find[0] = ix; strings[ix] = 'ababababab'; strings[ix, find] = 'abab'
 
loop ix = 1 to strings[0]
str = strings[ix]
fnd = strings[ix, find]
say 'there are' countSubstring(str, fnd) 'occurences of "'fnd'" in "'str'"'
end ix
 
return
 

Output:

there are 3 occurences of "th" in "the three truths"
there are 2 occurences of "abab" in "ababababab"

[edit] NewLISP

; file:   stringcount.lsp
; url: http://rosettacode.org/wiki/Count_occurrences_of_a_substring
; author: oofoe 2012-01-29
 
; Obvious (and non-destructive...)
 
; Note that NewLISP performs an /implicit/ slice on a string or list
; with this form "(start# end# stringorlist)". If the end# is omitted,
; the slice will go to the end of the string. This is handy here to
; keep removing the front part of the string as it gets matched.
 
(define (scount needle haystack)
(let ((h (copy haystack)) ; Copy of haystack string.
(i 0) ; Cursor.
(c 0)) ; Count of occurences.
 
(while (setq i (find needle h))
(inc c)
(setq h ((+ i (length needle)) h)))
 
c)) ; Return count.
 
; Tricky -- Uses functionality from replace function to find all
; non-overlapping occurrences, replace them, and return the count of
; items replaced in system variable $0.
 
(define (rcount needle haystack)
(replace needle haystack "X") $0)
 
; Test
 
(define (test f needle haystack)
(println "Found " (f needle haystack)
" occurences of '" needle "' in '" haystack "'."))
 
(dolist (f (list scount rcount))
(test f "glart" "hinkerpop")
(test f "abab" "ababababab")
(test f "th" "the three truths")
(println)
)
 
(exit)

Sample output:

Found 0 occurences of 'glart' in 'hinkerpop'.
Found 2 occurences of 'abab' in 'ababababab'.
Found 3 occurences of 'th' in 'the three truths'.

Found 0 occurences of 'glart' in 'hinkerpop'.
Found 2 occurences of 'abab' in 'ababababab'.
Found 3 occurences of 'th' in 'the three truths'.

[edit] Nimrod

import strutils
 
proc count(s, sub): int =
var i = 0
while true:
i = s.find(sub, i)
if i < 0:
break
i += sub.len # i += 1 for overlapping substrings
inc result
 
echo count("the three truths","th")
 
echo count("ababababab","abab")

Output:

3
2

[edit] Objective-C

The "split and count" method:

@interface NSString (CountSubstrings)
- (NSUInteger)occurrencesOfSubstring:(NSString *)subStr;
@end
 
@implementation NSString (CountSubstrings)
- (NSUInteger)occurrencesOfSubstring:(NSString *)subStr {
return [[self componentsSeparatedByString:subStr] count] - 1;
}
@end
 
int main(int argc, const char *argv[]) {
@autoreleasepool {
 
NSLog(@"%lu", [@"the three truths" occurrencesOfSubstring:@"th"]);
NSLog(@"%lu", [@"ababababab" occurrencesOfSubstring:@"abab"]);
NSLog(@"%lu", [@"abaabba*bbaba*bbab" occurrencesOfSubstring:@"a*b"]);
 
}
return 0;
}
Output:
3
2
2


The "remove and count the difference" method:

@interface NSString (CountSubstrings)
- (NSUInteger)occurrencesOfSubstring:(NSString *)subStr;
@end
 
@implementation NSString (CountSubstrings)
- (NSUInteger)occurrencesOfSubstring:(NSString *)subStr {
return ([self length] - [[self stringByReplacingOccurrencesOfString:subStr withString:@""] length]) / [subStr length];
}
@end
 
int main(int argc, const char *argv[]) {
@autoreleasepool {
 
NSLog(@"%lu", [@"the three truths" occurrencesOfSubstring:@"th"]);
NSLog(@"%lu", [@"ababababab" occurrencesOfSubstring:@"abab"]);
NSLog(@"%lu", [@"abaabba*bbaba*bbab" occurrencesOfSubstring:@"a*b"]);
 
}
return 0;
}
Output:
3
2
2


Manual looping:

@interface NSString (CountSubstrings)
- (NSUInteger)occurrencesOfSubstring:(NSString *)subStr;
@end
 
@implementation NSString (CountSubstrings)
- (NSUInteger)occurrencesOfSubstring:(NSString *)subStr {
NSUInteger count = 0;
for (NSRange range = [self rangeOfString:subStr]; range.location != NSNotFound;
range.location += range.length,
range = [self rangeOfString:subStr options:0
range:NSMakeRange(range.location, [self length] - range.location)])
count++;
return count;
}
@end
 
int main(int argc, const char *argv[]) {
@autoreleasepool {
 
NSLog(@"%lu", [@"the three truths" occurrencesOfSubstring:@"th"]);
NSLog(@"%lu", [@"ababababab" occurrencesOfSubstring:@"abab"]);
NSLog(@"%lu", [@"abaabba*bbaba*bbab" occurrencesOfSubstring:@"a*b"]);
 
}
return 0;
}
Output:
3
2
2

[edit] OCaml

let count_substring str sub =
let sub_len = String.length sub in
let len_diff = (String.length str) - sub_len
and reg = Str.regexp_string sub in
let rec aux i n =
if i > len_diff then n else
try
let pos = Str.search_forward reg str i in
aux (pos + sub_len) (succ n)
with Not_found -> n
in
aux 0 0
 
let () =
Printf.printf "count 1: %d\n" (count_substring "the three truth" "th");
Printf.printf "count 2: %d\n" (count_substring "ababababab" "abab");
;;

[edit] ooRexx

 
bag="the three truths"
x="th"
say left(bag,30) left(x,15) 'found' bag~countstr(x)
 
bag="ababababab"
x="abab"
say left(bag,30) left(x,15) 'found' bag~countstr(x)
 
-- can be done caselessly too
bag="abABAbaBab"
x="abab"
say left(bag,30) left(x,15) 'found' bag~caselesscountstr(x)
 

Output:

the three truths               th              found 3
ababababab                     abab            found 2
abABAbaBab                     abab            found 2

[edit] PARI/GP

subvec(v,u)={
my(i=1,s);
while(i+#u<=#v,
for(j=1,#u,
if(v[i+j-1]!=u[j], i++; next(2))
);
s++;
i+=#u
);
s
};
substr(s1,s2)=subvec(Vec(s1),Vec(s2));
substr("the three truths","th")
substr("ababababab","abab")
Output:
%1 = 3
%2 = 2

[edit] Pascal

See Delphi

[edit] Perl

sub countSubstring {
my $str = shift;
my $sub = quotemeta(shift);
my $count = () = $str =~ /$sub/g;
return $count;
# or return scalar( () = $str =~ /$sub/g );
}
 
print countSubstring("the three truths","th"), "\n"; # prints "3"
print countSubstring("ababababab","abab"), "\n"; # prints "2"

[edit] Perl 6

sub count-substring($big,$little) { +$big.comb: /$little/ }
 
say count-substring("the three truths","th");
say count-substring("ababababab","abab");

Note that in Perl 6 the /$little/ matches the variable literally, so there's no need to quote regex metacharacters. Also, prefix + forces numeric context in Perl 6 (it's a no-op in Perl 5). One other style point: we now tend to prefer hyphenated names over camelCase.

[edit] PHP

<?php
echo substr_count("the three truths", "th"), "\n"; // prints "3"
echo substr_count("ababababab", "abab"), "\n"; // prints "2"
?>

[edit] PicoLisp

(de countSubstring (Str Sub)
(let (Cnt 0 H (chop Sub))
(for (S (chop Str) S (cdr S))
(when (head H S)
(inc 'Cnt)
(setq S (map prog2 H S)) ) )
Cnt ) )

Test:

: (countSubstring "the three truths" "th")
-> 3

: (countSubstring "ababababab" "abab")
-> 2

[edit] PL/I

cnt: procedure options (main);
declare (i, tally) fixed binary;
declare (text, key) character (100) varying;
 
get edit (text) (L); put skip data (text);
get edit (key) (L); put skip data (key);
 
tally = 0; i = 1;
do until (i = 0);
i = index(text, key, i);
if i > 0 then do; tally = tally + 1; i = i + length(key); end;
end;
put skip list (tally);
end cnt;

Output for the two specified strings is as expected. Output for the following data:-

TEXT='AAAAAAAAAAAAAAA';
KEY='AA';
        7 

[edit] PowerBASIC

Works with: PB/Win
Works with: PB/CC

Windows versions of PowerBASIC (at least since PB/Win 7, and possibly earlier) provide the TALLY function, which does exactly what this task requires (count non-overlapping substrings).

PB/DOS can use the example under BASIC, above.

Note that while this example is marked as working with PB/Win, the PRINT statement would need to be replaced with MSGBOX, or output to a file. (PB/Win does not support console output.)

FUNCTION PBMAIN () AS LONG
PRINT "the three truths, th:", TALLY("the three truths", "th")
PRINT "ababababab, abab:", TALLY("ababababab", "abab")
END FUNCTION

Output:

the three truths, th:        3
ababababab, abab:            2

[edit] PureBasic

a = CountString("the three truths","th")
b = CountString("ababababab","abab")
; a = 3
; b = 2

[edit] Prolog

Works with: SWI-Prolog version 7

Using SWI-Prolog's string facilities (this solution is very similar to the Logtalk solution that uses sub_atom/5):

 
 
count_substring(String, Sub, Total) :-
count_substring(String, Sub, 0, Total).
 
count_substring(String, Sub, Count, Total) :-
( substring_rest(String, Sub, Rest)
->
succ(Count, NextCount),
count_substring(Rest, Sub, NextCount, Total)
;
Total = Count
).
 
substring_rest(String, Sub, Rest) :-
sub_string(String, Before, Length, Remain, Sub),
DropN is Before + Length,
sub_string(String, DropN, Remain, 0, Rest).
 

Usage:

 
?- count_substring("the three truths","th",X).
X = 3.
 
?- count_substring("ababababab","abab",X).
X = 2.
 

[edit] Python

>>> "the three truths".count("th")
3
>>> "ababababab".count("abab")
2

[edit] R

The fixed parameter (and, in stringr, the function of the same name) is used to specify a search for a fixed string. Otherwise, the search pattern is interpreted as a POSIX regular expression. PCRE is also an option: use the perl parameter or function.

count = function(haystack, needle)
{v = attr(gregexpr(needle, haystack, fixed = T)[[1]], "match.length")
if (identical(v, -1L)) 0 else length(v)}
 
print(count("hello", "l"))
Library: stringr
library(stringr)
print(str_count("hello", fixed("l")))

[edit] Racket

 
(define count-substring
(compose length regexp-match*))
 
 
> (count-substring "th" "the three truths")
3
> (count-substring "abab" "ababababab")
2
 

[edit] REXX

Some older REXXes don't have the built-in function   countstr,   so one is included here.

The   countstr   subroutine (below) mimics the BIF in newer REXXes.

Either of the first two strings may be null.
The third argument is optional and is the start position to start counting.
If specified, it must be a positive integer.

No checks are made (in the countstr subroutine) for:

  • no arguments
  • too many arguments
  • if startAt is a positive integer
/*REXX program  counts  occurrences  of a substring  (with no overlap). */
bag = "the three truths"
x = "th"
say left(bag,30) left(x,15) 'found' countstr(bag,x)
bag = "ababababab"
x = "abab"
say left(bag,30) left(x,15) 'found' countstr(bag,x)
exit /*stick a fork in it, we're done.*/
/*─────────────────────────────────────COUNTSTR subroutine──────────────*/
countstr: procedure; parse arg haystack, needle, startAt
if startAt=='' then startAt=1; width=length(needle)
 
do k=0 until _==0; _=pos(needle,haystack,startAt)
startAt = _ + width
end /*k*/
return k

output

the three truths               th              found 3
ababababab                     abab            found 2

[edit] Ruby

def countSubstrings str, subStr
return str.scan(subStr).length
end
 
irb(main):001:0> "the three truths".scan("th").length
=> 3
irb(main):002:0> "ababababab".scan("abab").length
=> 2

String#scan returns an array of substrings, and Array#length (or Array#size) counts them.

[edit] Run BASIC

print countSubstring("the three truths","th")
print countSubstring("ababababab","abab")
 
FUNCTION countSubstring(s$,find$)
WHILE instr(s$,find$,i) <> 0
countSubstring = countSubstring + 1
i = instr(s$,find$,i) + len(find$)
WEND
END FUNCTION
Output:
3
2

[edit] Scala

[edit] Using Recursion

import scala.annotation.tailrec
def countSubstring(str1:String, str2:String):Int={
@tailrec def count(pos:Int, c:Int):Int={
val idx=str1 indexOf(str2, pos)
if(idx == -1) c else count(idx+str2.size, c+1)
}
count(0,0)
}

[edit] Using Regular Expressions

def countSubstring( str:String, substr:String ) = substr.r.findAllMatchIn(str).length


println(countSubstring("ababababab", "abab"))
println(countSubstring("the three truths", "th"))
Output:
2
3

[edit] Seed7

$ include "seed7_05.s7i";
 
const func integer: countSubstring (in string: stri, in string: searched) is func
result
var integer: count is 0;
local
var integer: offset is 0;
begin
offset := pos(stri, searched);
while offset <> 0 do
incr(count);
offset := pos(stri, searched, offset + length(searched));
end while;
end func;
 
const proc: main is func
begin
writeln(countSubstring("the three truths", "th"));
writeln(countSubstring("ababababab", "abab"));
end func;

Output:

3
2

[edit] SNOBOL4

 
DEFINE("countSubstring(t,s)")
 
OUTPUT = countSubstring("the three truths","th")
OUTPUT = countSubstring("ababababab","abab")
 
 :(END)
countSubstring t ARB s = :F(RETURN)
countSubstring = countSubstring + 1 :(countSubstring)
END
3
2
 

[edit] Standard ML

fun count_substrings (str, sub) =
let
fun aux (str', count) =
let
val suff = #2 (Substring.position sub str')
in
if Substring.isEmpty suff then
count
else
aux (Substring.triml (size sub) suff, count + 1)
end
in
aux (Substring.full str, 0)
end;
 
print (Int.toString (count_substrings ("the three truths", "th")) ^ "\n");
print (Int.toString (count_substrings ("ababababab", "abab")) ^ "\n");
print (Int.toString (count_substrings ("abaabba*bbaba*bbab", "a*b")) ^ "\n");

[edit] TUSCRIPT

 
$$ MODE TUSCRIPT, {}
occurences=COUNT ("the three truths", ":th:")
occurences=COUNT ("ababababab", ":abab:")
occurences=COUNT ("abaabba*bbaba*bbab",":a\*b:")
 

Output:

3
2
2

[edit] Tcl

The regular expression engine is ideal for this task, especially as the ***= prefix makes it interpret the rest of the argument as a literal string to match:

proc countSubstrings {haystack needle} {
regexp -all ***=$needle $haystack
}
puts [countSubstrings "the three truths" "th"]
puts [countSubstrings "ababababab" "abab"]
puts [countSubstrings "abaabba*bbaba*bbab" "a*b"]
Output:
3
2
2

[edit] TXR

@(next :args)
@(do (defun count-occurrences (haystack needle)
(for* ((occurrences 0)
(old-pos 0)
(new-pos (search-str haystack needle old-pos nil)))
(new-pos occurrences)
((inc occurrences)
(set old-pos (+ new-pos (length needle)))
(set new-pos (search-str haystack needle old-pos nil))))))
@ndl
@hay
@(output)
@(count-occurrences hay ndl) occurrences(s) of @ndl inside @hay
@(end)
$ ./txr count-occurrences.txr "baba" "babababa"
2 occurence(s) of baba inside babababa
$ ./txr count-occurrences.txr "cat" "catapultcatalog"
2 occurence(s) of cat inside catapultcatalog


[edit] VBA

Function CountStringInString(stLookIn As String, stLookFor As String)
CountStringInString = UBound(Split(stLookIn, stLookFor))
End Function

[edit] Wortel

@let {
c &[s t] #!s.match &(t)g
 
[[
 !!c "the three truths" "th"
 !!c "ababababab" "abab"
]]
}
Returns:
[3 2]

[edit] XPL0

include c:\cxpl\codes;  \intrinsic 'code' declarations
string 0; \use zero-terminated strings, instead of MSb terminated
 
 
func StrNCmp(A, B, N); \Compare string A to string B up to N bytes long
\This returns:
\ >0 if A > B
\ =0 if A = B
\ <0 if A < B
char A, B; \strings to be compared
int N; \number of bytes to compare
int I;
[for I:= 0 to N-1 do
if A(I) # B(I) then
return A(I) - B(I);
return 0; \they're equal
]; \StrNCmp
 
 
func StrLen(Str); \Return the number of characters in an ASCIIZ string
char Str;
int I;
for I:= 0 to -1>>1-1 do
if Str(I) = 0 then return I;
 
 
func SubStr(A, B); \Count number of times string B occurs in A
char A, B;
int LA, LB, C, I;
[LA:= StrLen(A); LB:= StrLen(B);
C:= 0; I:= 0;
while I < LA do
if StrNCmp(B, A+I, LB) = 0 then [C:= C+1; I:= I+LB]
else I:= I+1;
return C;
];
 
 
[IntOut(0, SubStr("the three truths", "th")); CrLf(0);
IntOut(0, SubStr("ababababab", "abab")); CrLf(0);
]

Output:

3
2
>>   % Count occurrences of a substring without overlap
>>   length(findstr("ababababab","abab",0))
ans =  2
>>   length(findstr("the three truths","th",0))
ans =  3
>>   % Count occurrences of a substring with overlap
>>   length(findstr("ababababab","abab",1))
ans =  4
>>

[edit] zkl

Two solutions:

fcn countSubstring(s,p){ pn:=p.len(); cnt:=n:=0;
while(Void!=(n:=s.find(p,n))){cnt+=1; n+=pn}
cnt
}
Translation of: J
fcn countSubstring(s,p){ (pl:=p.len()) and (s.len()-(s-p).len())/pl }
Output:
zkl: println(countSubstring("the three truths","th"))
3
zkl: println(countSubstring("ababababab","abab"))
2
zkl: println(countSubstring("ababababab","v"))
0
Personal tools
Namespaces

Variants
Actions
Community
Explore
Misc
Toolbox