Longest substrings without repeating characters
- Task
Given a string, find the longest substring without any repeating characters.
Julia
Works on any array, treating a character string as a character array. <lang julia>function alluniquehead(arr)
len = length(arr) if len > 1 for i in 2:len if arr[i] in @view arr[1:i-1] return arr[1:i-1] end end end return arr[:]
end
function maxuniques(arr)
length(arr) < 2 && return arr amax = [alluniquehead(@view arr[i:end]) for i in 1:length(arr)] maxlen = maximum(map(length, amax)) return filter(a -> length(a) == maxlen, amax)
end
for s in ["xyzyabcybdfd", "xyzyab", "zzzzz", "a", "α⊆϶α϶", "", [1, 2, 3, 4, 1, 2, 5, 6, 1, 7, 8, 1, 0]]
uarray = maxuniques(collect(s)) !isempty(uarray) && length(uarray[1]) > 1 && uarray[1][1] isa Char && (uarray = String.(uarray)) println("\"$s\" => ", uarray)
end
</lang>
- Output:
"xyzyabcybdfd" => ["zyabc", "cybdf"] "xyzyab" => ["zyab"] "zzzzz" => [['z'], ['z'], ['z'], ['z'], ['z']] "a" => ['a'] "α⊆϶α϶" => ["α⊆϶", "⊆϶α"] "" => Char[] "[1, 2, 3, 4, 1, 2, 5, 6, 1, 7, 8, 1, 0]" => [[3, 4, 1, 2, 5, 6], [2, 5, 6, 1, 7, 8]]
Phix
Single pass, maintains a table of seen characters and a start position.
If the current character occurs in start..i we trim left until cleared, otherwise/then we add any longer/equal len start..i to the result set.
Should be exponentially faster than collecting all possible substrings and then eliminating those with duplicate characters or too short.
It will however collect duplicates (when long enough) before weeding them out at the end.
with javascript_semantics function longest_substrings(sequence s) -- -- s can be a normal 8-bit ansi string or a -- 32-bit unicode sequence from eg utf8_to_utf32(). -- it will not complain if given utf-8, but it may -- chop up multi-byte characters inappropriately. -- s must not contain any 0 or non-integer elements. -- sequence res = {}, found = repeat(false,256) -- (for ansi) integer start = 1, maxlen = 0 for i=1 to length(s) do integer si = s[i] -- (deliberate typecheck) if si>length(found) then -- (for unicode) found &= repeat(false,si-length(found)) elsif found[si] then while true do integer ss = s[start] start += 1 found[ss] = false if ss = si then exit end if end while -- aside: we will not have found anything -- longer, but may have trimmed 1 added 1 -- and therefore still be === maxlen. end if found[si] = true integer newlen = i-start+1 if newlen>maxlen then res = {} maxlen = newlen end if if newlen=maxlen then res = append(res,s[start..i]) end if end for res = unique(res,"STABLE") return res end function ?longest_substrings("xyzyabcybdfd") ?longest_substrings("xyzyab") ?longest_substrings("zzzzz") ?longest_substrings("a") ?longest_substrings("")
- Output:
{"zyabc","cybdf"} {"zyab"} {"z"} {"a"} {}
A quick test on unixdict.txt yielded the same results as Raku.
Python
The following algorithm works but is not terribly efficient for long strings. <lang python>def longest_substring(s = "xyzyab"):
substr = [s[x:y] for x in range(len(s)) for y in range(x+1, len(s) + 1)] no_reps = [] for sub in substr: if len(sub) == len(set(sub)) and sub not in no_reps: no_reps.append(sub) max_len = max(len(sub) for sub in no_reps) if no_reps else 0 max_subs = [sub for sub in no_reps if len(sub) == max_len] return max_subs
if __name__ == '__main__':
for s in ["xyzyabcybdfd", "xyzyab", "zzzzz", "a", "α⊆϶α϶", "", [1, 2, 3, 4, 1, 2, 5, 6, 1, 7, 8, 1, 0]]: print(f"{s} => {longest_substring(s)}")</lang>
- Output:
xyzyabcybdfd => ['zyabc', 'cybdf'] xyzyab => ['zyab'] zzzzz => ['z'] a => ['a'] α⊆϶α϶ => ['α⊆϶', '⊆϶α'] => [] [1, 2, 3, 4, 1, 2, 5, 6, 1, 7, 8, 1, 0] => [[3, 4, 1, 2, 5, 6], [2, 5, 6, 1, 7, 8]]
Python: Some optimisation
The following algorithm only accrues the longest so far. <lang python>def longest_substring2(s = "xyzyab"):
max_subs, mx = [], 0 for x in range(len(s)): for y in range(x+1, len(s) + 1): sub = s[x:y] if y - x >= mx and len(sub) == len(set(sub)): if y - x == mx and sub not in max_subs: max_subs.append(sub) else: max_subs, mx = [sub], y - x return max_subs</lang>
- Output:
It gives the same output as function longest_substring()
above.
Raku
Detects any character when checking for repeated characters, even multi-byte composite characters, control sequences and whitespace.
Note that there is no requirement that the substrings be unique, only that each has no repeated characters internally.
Not going to bother handling arrays since an array is not a string, and the task description specifically says 'Given a string'. <lang perl6>sub abbreviate ($_) { .chars > 80 ?? "(abbreviated)\n" ~ .substr(0,35) ~ ' ... ' ~ .substr(*-35) !! $_ }
sub longest ($string) {
return 0 => [] unless $string.chars; my ($start, $end, @substr) = 0, 0; while ++$end < $string.chars { my $sub = $string.substr($start, $end - $start); if $sub.contains: my $next = $string.substr: $end, 1 { @substr[$end - $start].push($sub) if $end - $start >= @substr.end; $start += 1 + $sub.index($next); } } @substr[$end - $start].push: $string.substr($start); @substr.pairs.tail
}
- Testing
say "\nOriginal string: {abbreviate $_}\nLongest substring(s) chars: ", .&longest
- Standard tests
for flat qww< xyzyabcybdfd xyzyab zzzzz a >,
- multibyte Unicode
< 👒🎩🎓👩👩👦👦🧢🎓👨👧👧👒👩👩👦👦🎩🎓👒🧢 α⊆϶α϶ >,
- check a file
slurp 'unixdict.txt';</lang>
- Output:
Original string: xyzyabcybdfd Longest substring(s) chars: 5 => [zyabc cybdf] Original string: xyzyab Longest substring(s) chars: 4 => [zyab] Original string: zzzzz Longest substring(s) chars: 1 => [z z z z z] Original string: a Longest substring(s) chars: 1 => [a] Original string: Longest substring(s) chars: 0 => [] Original string: 👒🎩🎓👩👩👦👦🧢🎓👨👧👧👒👩👩👦👦🎩🎓👒🧢 Longest substring(s) chars: 6 => [🧢🎓👨👧👧👒👩👩👦👦🎩] Original string: α⊆϶α϶ Longest substring(s) chars: 3 => [α⊆϶ ⊆϶α] Original string: (abbreviated) 10th 1st 2nd 3rd 4th 5th 6th 7th 8t ... rian zounds zucchini zurich zygote Longest substring(s) chars: 15 => [mbowel disgrunt]
REXX
<lang rexx>/*REXX pgm finds the longest substring (in a given string) without a repeating character*/ parse arg $ /*obtain optional argument from the CL.*/ if $== | $=="," then $= 'xyzyabcybdfd' /*Not specified? Then use the default.*/ L= length($) /*L: the length of the given string.*/ maxL= 0 /*MAXL: max length substring (so far).*/ @.= /*array to hold the max len substrings.*/
do j=1 for L; b= substr($, j, 1) /*search for max length substrings. */ x= /*X: the substring, less the 1st char*/ do k=j+1 to L; x= x || substr($, k, 1) /*search for the max length substrings.*/ if \okx(x) then iterate j /*Are there an replications? Skip it. */ _= length(x); if _<maxL then iterate /*is length less then the current max? */ @._= @._ x; maxL= _ /*add this substring to the max list. */ end /*k*/ end /*j*/
say 'longest substring's(words(@.maxL)) "in " $ ' ───► ' @.maxL exit 0 /*stick a fork in it, we're all done. */ /*──────────────────────────────────────────────────────────────────────────────────────*/ s: if arg(1)==1 then return arg(3); return word(arg(2) 's', 1) /*simple pluralizer*/ /*──────────────────────────────────────────────────────────────────────────────────────*/ OKx: procedure; parse arg y; do r=1 for length(y)-1 /*look for duplicate chars.*/
if pos(substr(y, r, 1), y, r+1)>0 then return 0 end /*r*/; return 1</lang>
- output when using the default input:
longest substrings in xyzyabcybdfd ───► zyabc cybdf
Ring
<lang ring> see "working..." + nl see "Longest substrings without repeating characters are:" + nl str = "xyzyabcybdfd" see "Input: " + str + nl lenStr = len(str) strOk = [] lenOk = 0
for n = 1 to lenStr
for m = 1 to lenStr-n+1 temp = substr(str,n,m) add(strOk,temp) next
next
for n = len(strOk) to 1 step -1
if len(strOK[n]) = 1 del(strOK,n) ok
next
for n = 1 to len(strOK)
for m = 1 to len(strOK)-1 if len(strOK[m+1]) > len(strOK[m]) temp = strOK[m] strOK[m] = strOK[m+1] strOK[m+1] = temp ok next
next
for n = 1 to len(strOK)
flag = 1 for m = 1 to len(strOK[n]) for p = m+1 to len(strOK[n]) if strOK[n][m] = strOK[n][p] flag = 0 exit ok next next if flag = 1 if len(strOK[n]) >= lenOk see "len = " + len(strOK[n]) + " : " + strOK[n] + nl lenOK = len(strOK[n]) ok ok
next
see "done..." + nl </lang>
- Output:
working... Longest substrings without repeating characters are: Input: xyzyabcybdfd len = 5 : zyabc len = 5 : cybdf done...
Wren
<lang ecmascript>import "/seq" for Lst
var substrings = Fn.new { |s|
var n = s.count if (n == 0) return [""] var ss = [] for (i in 0...n) { for (len in 1..n-i) ss.add(s[i...i+len]) } return ss
}
System.print("The longest substring(s) of the following without repeating characters are:\n") var strs = ["xyzyabcybdfd", "xyzyab", "zzzzz", "a", "", ] for (s in strs) {
var longest = [] var longestLen = 0 for (ss in substrings.call(s)) { if (Lst.distinct(ss.toList).count == ss.count) { if (ss.count >= longestLen) { if (ss.count > longestLen) { longest.clear() longestLen = ss.count } longest.add(ss.join()) } } }
longest = Lst.distinct(longest) System.print("String = '%(s)'") System.print(longest) System.print("Length = %(longest[0].count)\n")
}</lang>
- Output:
The longest substring(s) of the following without repeating characters are: String = 'xyzyabcybdfd' [zyabc, cybdf] Length = 5 String = 'xyzyab' [zyab] Length = 4 String = 'zzzzz' [z] Length = 1 String = 'a' [a] Length = 1 String = '' [] Length = 0