I'm working on modernizing Rosetta Code's infrastructure. Starting with communications. Please accept this time-limited open invite to RC's Slack.. --Michael Mol (talk) 20:59, 30 May 2020 (UTC)

Longest substrings without repeating characters

From Rosetta Code
Longest substrings without repeating characters is a draft programming task. It is not yet considered ready to be promoted as a complete task, for reasons that should be found in its talk page.
Task

Given a string,   find the   longest substring   without any repeating characters.


Other tasks related to string operations:
Metrics
Counting
Remove/replace
Anagrams/Derangements/shuffling
Find/Search/Determine
Formatting
Song lyrics/poems/Mad Libs/phrases
Tokenize
Sequences



AutoHotkey[edit]

LSWRC(str){
found := [], result := [], maxL := 0
if (StrLen(str) = 1)
result[str] := true
else while (StrLen(str) >= 2){
s := str
while StrLen(s) >= maxL{
if !(s ~= "(.).*\1"){
found[s] := true
maxL := maxL < StrLen(s) ? StrLen(s) : maxL
break
}
s := SubStr(s, 1, StrLen(s)-1) ; trim last chr
}
str := SubStr(str, 2) ; trim 1st chr and try again
}
maxL := 0
for str in found
maxL := maxL < StrLen(str) ? StrLen(str) : maxL
for str in found
if (StrLen(str) = maxL)
result[str] := true
return result
}
Examples:
db =
(
xyzyabcybdfd
xyzyab
zzz
a
)
 
for i, line in StrSplit(db, "`n", "`r"){
result := "[", i := 0
for str in LSWRC(line)
result .= str ", "
output .= line "`t> " Trim(result, ", ") "]`n"
}
MsgBox % output
return
Output:
xyzyabcybdfd	> [cybdf, zyabc]
xyzyab		> [zyab]
zzz		> [z]
a		> [a]


C++[edit]

#include <iostream>
#include <fstream>
#include <set>
#include <sstream>
#include <string>
#include <vector>
 
std::vector<std::string> longest_substrings_without_repeats(const std::string& str) {
size_t max_length = 0;
std::vector<std::string> result;
size_t length = str.size();
for (size_t offset = 0; offset < length; ++offset) {
std::set<char> characters;
size_t len = 0;
for (; offset + len < length; ++len) {
if (characters.find(str[offset + len]) != characters.end())
break;
characters.insert(str[offset + len]);
}
if (len > max_length) {
result.clear();
max_length = len;
}
if (len == max_length)
result.push_back(str.substr(offset, max_length));
}
return result;
}
 
void print_strings(const std::vector<std::string>& strings) {
std::cout << "[";
for (size_t i = 0, n = strings.size(); i < n; ++i) {
if (i > 0)
std::cout << ", ";
std::cout << '\'' << strings[i] << '\'';
}
std::cout << "]";
}
 
void test1() {
for (std::string str : { "xyzyabcybdfd", "xyzyab", "zzzzz", "a", "thisisastringtest", "" }) {
std::cout << "Input: '" << str << "'\nResult: ";
print_strings(longest_substrings_without_repeats(str));
std::cout << "\n\n";
}
}
 
std::string slurp(std::istream& in) {
std::ostringstream out;
out << in.rdbuf();
return out.str();
}
 
void test2(const std::string& filename) {
std::ifstream in(filename);
if (!in) {
std::cerr << "Cannot open file '" << filename << "'.\n";
return;
}
std::cout << "Longest substring with no repeats found in '" << filename << "': ";
print_strings(longest_substrings_without_repeats(slurp(in)));
std::cout << "\n";
}
 
int main() {
test1();
test2("unixdict.txt");
}
Output:
Input: 'xyzyabcybdfd'
Result: ['zyabc', 'cybdf']

Input: 'xyzyab'
Result: ['zyab']

Input: 'zzzzz'
Result: ['z', 'z', 'z', 'z', 'z']

Input: 'a'
Result: ['a']

Input: 'thisisastringtest'
Result: ['astring', 'ringtes']

Input: ''
Result: []

Longest substring with no repeats found in 'unixdict.txt': ['mbowel
disgrunt']

Go[edit]

Translation of: Wren
package main
 
import "fmt"
 
func substrings(s string) []string {
n := len(s)
if n == 0 {
return []string{""}
}
var ss []string
for i := 0; i < n; i++ {
for le := 1; le <= n-i; le++ {
ss = append(ss, s[i:i+le])
}
}
return ss
}
 
func distinctRunes(chars []rune) []rune {
m := make(map[rune]bool)
for _, char := range chars {
m[char] = true
}
var l []rune
for k := range m {
l = append(l, k)
}
return l
}
 
func distinctStrings(strings []string) []string {
m := make(map[string]bool)
for _, str := range strings {
m[str] = true
}
var l []string
for k := range m {
l = append(l, k)
}
return l
}
 
func main() {
fmt.Println("The longest substring(s) of the following without repeating characters are:\n")
strs := []string{"xyzyabcybdfd", "xyzyab", "zzzzz", "a", ""}
for _, s := range strs {
var longest []string
longestLen := 0
for _, ss := range substrings(s) {
if len(distinctRunes([]rune(ss))) == len(ss) {
if len(ss) >= longestLen {
if len(ss) > longestLen {
longest = longest[:0]
longestLen = len(ss)
}
longest = append(longest, ss)
}
}
}
longest = distinctStrings(longest)
fmt.Printf("String = '%s'\n", s)
fmt.Println(longest)
fmt.Printf("Length = %d\n\n", len(longest[0]))
}
}
Output:
The longest substring(s) of the following without repeating characters are:

String = 'xyzyabcybdfd'
[zyabc cybdf]
Length = 5

String = 'xyzyab'
[zyab]
Length = 4

String = 'zzzzz'
[z]
Length = 1

String = 'a'
[a]
Length = 1

String = ''
[]
Length = 0

jq[edit]

Adapted from Julia

Works with: jq

Works with gojq, the Go implementation of jq

# Use a dictionary for speed in case of very long strings
def alluniquehead:
length as $n
| if $n <= 1 then .
else . as $in
| {ix: -1}
| until(.i or (.ix == $n);
.ix += 1
| $in[.ix:.ix+1] as $x
| if .dict[$x] then .i = .ix
else .dict[$x] = true
end )
| $in[: .ix]
end ;
 
def maximal_substring_with_distinct_characters:
. as $in
| length as $n
| {i: -1}
| until( .i == $n or .stop;
.i += 1
| if .max and .max > $n - .i then .stop = true
else ($in[.i:] | alluniquehead) as $head
| if ($head|length) > .max
then .ans = $head
| .max = ($head|length)
else .
end
end)
| .ans;
 

Test Cases

"xyzyabcybdfd",
"xyzyab",
"zzzzz",
"a", "α⊆϶α϶",
""
| "\"\(.)\" => \"\(maximal_substring_with_distinct_characters)\""
 
Output:
"xyzyabcybdfd" => "zyabc"
"xyzyab" => "zyab"
"zzzzz" => "z"
"a" => "a"
"α⊆϶α϶" => "α⊆϶"
"" => ""

Julia[edit]

Works on any array, treating a character string as a character array.

function alluniquehead(arr)
len = length(arr)
if len > 1
for i in 2:len
if arr[i] in @view arr[1:i-1]
return arr[1:i-1]
end
end
end
return arr[:]
end
 
function maxuniques(arr)
length(arr) < 2 && return arr
amax = [alluniquehead(@view arr[i:end]) for i in 1:length(arr)]
maxlen = maximum(map(length, amax))
return filter(a -> length(a) == maxlen, amax)
end
 
for s in ["xyzyabcybdfd", "xyzyab", "zzzzz", "a", "α⊆϶α϶", "", [1, 2, 3, 4, 1, 2, 5, 6, 1, 7, 8, 1, 0]]
uarray = maxuniques(collect(s))
 !isempty(uarray) && length(uarray[1]) > 1 && uarray[1][1] isa Char && (uarray = String.(uarray))
println("\"$s\" => ", uarray)
end
 
Output:
"xyzyabcybdfd" => ["zyabc", "cybdf"]
"xyzyab" => ["zyab"]
"zzzzz" => [['z'], ['z'], ['z'], ['z'], ['z']]
"a" => ['a']
"α⊆϶α϶" => ["α⊆϶", "⊆϶α"]
"" => Char[]
"[1, 2, 3, 4, 1, 2, 5, 6, 1, 7, 8, 1, 0]" => [[3, 4, 1, 2, 5, 6], [2, 5, 6, 1, 7, 8]]

Nim[edit]

This version converts strings to sequences of runes.

import sequtils, strutils, unicode
 
type Runes = seq[Rune]
 
func longestSubstrings(str: string): seq[string] =
var runes = str.toRunes
var current: Runes
var res: seq[Runes] # Result as a list of Runes.
var maxlen = 0
for c in runes:
let idx = current.find(c)
if idx >= 0:
if current.len > maxlen:
res = @[current]
maxlen = current.len
elif current.len == maxlen and current notin res:
res.add current
current.delete(0, idx)
current.add c
 
# Process the last current string.
if current.len > maxlen: res = @[current]
elif current.len == maxlen and current notin res: res.add current
result = res.mapIt($it)
 
 
for str in ["xyzyabcybdfd", "xyzyab", "zzzzz", "a", "α⊆϶α϶", ""]:
echo '"', str, '"', " → ", longestSubstrings(str)
 
echo "\nLongest substrings in concatenated list of words from “unixdict.txt”: ",
longestSubstrings(toSeq("unixdict.txt".lines).join())
Output:
"xyzyabcybdfd" → @["zyabc", "cybdf"]
"xyzyab" → @["zyab"]
"zzzzz" → @["z"]
"a" → @["a"]
"α⊆϶α϶" → @["α⊆϶", "⊆϶α"]
"" → @[""]

Longest substrings in concatenated list of words from “unixdict.txt”: @["mboweldisgrunt"]

Perl[edit]

Gets the same answer that raku does when run against unixdict.txt

#!/usr/bin/perl
 
use strict; # Longest_substrings_without_repeating_characters
use warnings;
 
for my $string ( qw( xyzyabcybdfd xyzyab zzzzz a thisisastringtest ) )
{
local $_ = $string;
my @sub;
length $+ >= $#sub and ++$sub[length $+]{$+} while s/.*(.)(.*\K\1.*)|(.+)//s;
printf "%20s -> %s\n", $string, join ' ', sort keys %{ pop @sub };
}
Output:
        xyzyabcybdfd -> cybdf zyabc
              xyzyab -> zyab
               zzzzz -> z
                   a -> a
   thisisastringtest -> astring ringtes

Phix[edit]

Single pass, maintains a start position and a table of seen character positions.
If the current character has occurred after start, move that along, then add any longer/equal length start..i to the result set.
Note that having moved start along we certainly won't be any longer than but may still be equal to maxlen.
Should be exponentially faster than collecting all possible substrings and eliminating those with duplicate characters or too short.
It will however collect duplicates (when long enough) before weeding them out at the end.

with javascript_semantics

function longest_substrings(sequence s)
    --
    -- s can be a normal 8-bit acsii/ansi string or a
    -- 32-bit unicode sequence from eg utf8_to_utf32().
    -- It will not complain if given utf-8, but may
    -- chop up multibyte characters inappropriately.
    -- s must not contain any 0 or non-integers.
    --
    sequence res = {},
             found = repeat(0,256)  -- (ansi, position)
    integer start = 1,
            maxlen = 0
    for i=1 to length(s) do
        integer si = s[i]           -- (deliberate typecheck)
        if si>length(found) then    -- (must be unicode then)
            assert(si<=#10FFFF) -- (limit size to valid chars)
            found &= repeat(0,si-length(found))
        else
            integer fi = found[si]
            if fi>=start then start = fi+1 end if
        end if
        found[si] = i
        integer newlen = i-start+1
        if newlen>=maxlen then
            if newlen>maxlen then
                res = {}
                maxlen = newlen
            end if
            res = append(res,s[start..i])
        end if
    end for
    res = unique(res,"STABLE")
    return res
end function

?longest_substrings("xyzyabcybdfd")
?longest_substrings("xyzyab")
?longest_substrings("zzzzz")
?longest_substrings("a")
?longest_substrings("")
Output:
{"zyabc","cybdf"}
{"zyab"}
{"z"}
{"a"}
{}

A quick test on unixdict.txt yielded the same results as Raku.

Python[edit]

The following algorithm works but is not terribly efficient for long strings.

def longest_substring(s = "xyzyab"):
substr = [s[x:y] for x in range(len(s)) for y in range(x+1, len(s) + 1)]
no_reps = []
for sub in substr:
if len(sub) == len(set(sub)) and sub not in no_reps:
no_reps.append(sub)
max_len = max(len(sub) for sub in no_reps) if no_reps else 0
max_subs = [sub for sub in no_reps if len(sub) == max_len]
return max_subs
 
if __name__ == '__main__':
for s in ["xyzyabcybdfd", "xyzyab", "zzzzz", "a", "α⊆϶α϶", "",
[1, 2, 3, 4, 1, 2, 5, 6, 1, 7, 8, 1, 0]]:
print(f"{s} => {longest_substring(s)}")
Output:
xyzyabcybdfd => ['zyabc', 'cybdf']
xyzyab => ['zyab']
zzzzz => ['z']
a => ['a']
α⊆϶α϶ => ['α⊆϶', '⊆϶α']
 => []
[1, 2, 3, 4, 1, 2, 5, 6, 1, 7, 8, 1, 0] => [[3, 4, 1, 2, 5, 6], [2, 5, 6, 1, 7, 8]]

Python: Some optimisation[edit]

The following algorithm only accrues the longest so far.

def longest_substring2(s = "xyzyab"):
max_subs, mx = [], 0
for x in range(len(s)):
for y in range(x+1, len(s) + 1):
sub = s[x:y]
if y - x >= mx and len(sub) == len(set(sub)):
if y - x == mx and sub not in max_subs:
max_subs.append(sub)
else:
max_subs, mx = [sub], y - x
return max_subs
Output:

It gives the same output as function longest_substring() above.

Raku[edit]

Detects any character when checking for repeated characters, even multi-byte composite characters, control sequences and whitespace.

Note that there is no requirement that the substrings be unique, only that each has no repeated characters internally.

Not going to bother handling arrays since an array is not a string, and the task description specifically says 'Given a string'.

sub abbreviate ($_) { .chars > 80 ?? "(abbreviated)\n" ~ .substr(0,35) ~ ' ... ' ~ .substr(*-35) !! $_ }
 
sub longest ($string) {
return 0 => [] unless $string.chars;
my ($start, $end, @substr) = 0, 0;
while ++$end < $string.chars {
my $sub = $string.substr($start, $end - $start);
if $sub.contains: my $next = $string.substr: $end, 1 {
@substr[$end - $start].push($sub) if $end - $start >= @substr.end;
$start += 1 + $sub.index($next);
}
}
@substr[$end - $start].push: $string.substr($start);
@substr.pairs.tail
}
 
# Testing
say "\nOriginal string: {abbreviate $_}\nLongest substring(s) chars: ", .&longest
 
# Standard tests
for flat qww< xyzyabcybdfd xyzyab zzzzz a '' >,
 
# multibyte Unicode
< 👒🎩🎓👩‍👩‍👦‍👦🧢🎓👨‍👧‍👧👒👩‍👩‍👦‍👦🎩🎓👒🧢 α⊆϶α϶ >,
 
# check a file
slurp 'unixdict.txt';
Output:
Original string: xyzyabcybdfd
Longest substring(s) chars: 5 => [zyabc cybdf]

Original string: xyzyab
Longest substring(s) chars: 4 => [zyab]

Original string: zzzzz
Longest substring(s) chars: 1 => [z z z z z]

Original string: a
Longest substring(s) chars: 1 => [a]

Original string: 
Longest substring(s) chars: 0 => []

Original string: 👒🎩🎓👩‍👩‍👦‍👦🧢🎓👨‍👧‍👧👒👩‍👩‍👦‍👦🎩🎓👒🧢
Longest substring(s) chars: 6 => [🧢🎓👨‍👧‍👧👒👩‍👩‍👦‍👦🎩]

Original string: α⊆϶α϶
Longest substring(s) chars: 3 => [α⊆϶ ⊆϶α]

Original string: (abbreviated)
10th
1st
2nd
3rd
4th
5th
6th
7th
8t ... rian
zounds
zucchini
zurich
zygote

Longest substring(s) chars: 15 => [mbowel
disgrunt]

REXX[edit]

/*REXX pgm finds the longest substring (in a given string) without a repeating character*/
parse arg $ /*obtain optional argument from the CL.*/
if $=='' | $=="," then $= 'xyzyabcybdfd' /*Not specified? Then use the default.*/
L= length($) /*L: the length of the given string.*/
maxL= 0 /*MAXL: max length substring (so far).*/
@.= /*array to hold the max len substrings.*/
do j=1 for L; b= substr($, j, 1) /*search for max length substrings. */
x= /*X: the substring, less the 1st char*/
do k=j+1 to L; x= x || substr($, k, 1) /*search for the max length substrings.*/
if \OKx(x) then iterate j /*Are there an replications? Skip it. */
_= length(x); if _<maxL then iterate /*is length less then the current max? */
@._= @._ x; maxL= _ /*add this substring to the max list. */
end /*k*/
end /*j*/
 
say 'longest substring's(words(@.maxL)) "in " $ ' ───► ' @.maxL
exit 0 /*stick a fork in it, we're all done. */
/*──────────────────────────────────────────────────────────────────────────────────────*/
s: if arg(1)==1 then return arg(3); return word(arg(2) 's', 1) /*simple pluralizer*/
/*──────────────────────────────────────────────────────────────────────────────────────*/
OKx: procedure; parse arg y; do r=1 for length(y)-1 /*look for duplicate chars.*/
if pos(substr(y, r, 1), y, r+1)>0 then return 0
end /*r*/; return 1
output   when using the default input:
longest substrings in  xyzyabcybdfd  ───►   zyabc cybdf

Ring[edit]

 
see "working..." + nl
see "Longest substrings without repeating characters are:" + nl
str = "xyzyabcybdfd"
see "Input: " + str + nl
lenStr = len(str)
strOk = []
lenOk = 0
 
for n = 1 to lenStr
for m = 1 to lenStr-n+1
temp = substr(str,n,m)
add(strOk,temp)
next
next
 
for n = len(strOk) to 1 step -1
if len(strOK[n]) = 1
del(strOK,n)
ok
next
 
for n = 1 to len(strOK)
for m = 1 to len(strOK)-1
if len(strOK[m+1]) > len(strOK[m])
temp = strOK[m]
strOK[m] = strOK[m+1]
strOK[m+1] = temp
ok
next
next
 
for n = 1 to len(strOK)
flag = 1
for m = 1 to len(strOK[n])
for p = m+1 to len(strOK[n])
if strOK[n][m] = strOK[n][p]
flag = 0
exit
ok
next
next
if flag = 1
if len(strOK[n]) >= lenOk
see "len = " + len(strOK[n]) + " : " + strOK[n] + nl
lenOK = len(strOK[n])
ok
ok
next
 
see "done..." + nl
 
Output:
working...
Longest substrings without repeating characters are:
Input: xyzyabcybdfd
len = 5 : zyabc
len = 5 : cybdf
done...

Wren[edit]

Library: Wren-seq
import "/seq" for Lst
 
var substrings = Fn.new { |s|
var n = s.count
if (n == 0) return [""]
var ss = []
for (i in 0...n) {
for (len in 1..n-i) ss.add(s[i...i+len])
}
return ss
}
 
System.print("The longest substring(s) of the following without repeating characters are:\n")
var strs = ["xyzyabcybdfd", "xyzyab", "zzzzz", "a", ""]
for (s in strs) {
var longest = []
var longestLen = 0
for (ss in substrings.call(s)) {
if (Lst.distinct(ss.toList).count == ss.count) {
if (ss.count >= longestLen) {
if (ss.count > longestLen) {
longest.clear()
longestLen = ss.count
}
longest.add(ss)
}
}
}
 
longest = Lst.distinct(longest)
System.print("String = '%(s)'")
System.print(longest)
System.print("Length = %(longest[0].count)\n")
}
Output:
The longest substring(s) of the following without repeating characters are:

String = 'xyzyabcybdfd'
[zyabc, cybdf]
Length = 5

String = 'xyzyab'
[zyab]
Length = 4

String = 'zzzzz'
[z]
Length = 1

String = 'a'
[a]
Length = 1

String = ''
[]
Length = 0