Longest substrings without repeating characters

From Rosetta Code
Longest substrings without repeating characters is a draft programming task. It is not yet considered ready to be promoted as a complete task, for reasons that should be found in its talk page.
Task

Given a string,   find the   longest substring   without any repeating characters.


Julia

Works on any array, treating a character string as a character array. <lang julia>function alluniquehead(arr)

   len = length(arr)
   if len > 1
       for i in 2:len
           if arr[i] in @view arr[1:i-1]
               return arr[1:i-1]
           end
       end
   end
   return arr[:]

end

function maxuniques(arr)

   length(arr) < 2 && return arr
   amax = [alluniquehead(@view arr[i:end]) for i in 1:length(arr)]
   maxlen = maximum(map(length, amax))
   return filter(a -> length(a) == maxlen, amax)

end

for s in ["xyzyabcybdfd", "xyzyab", "zzzzz", "a", "α⊆϶α϶", "", [1, 2, 3, 4, 1, 2, 5, 6, 1, 7, 8, 1, 0]]

   uarray = maxuniques(collect(s))
   !isempty(uarray) && length(uarray[1]) > 1 && uarray[1][1] isa Char && (uarray = String.(uarray))
   println("\"$s\" => ", uarray)

end

</lang>

Output:
"xyzyabcybdfd" => ["zyabc", "cybdf"]
"xyzyab" => ["zyab"]
"zzzzz" => [['z'], ['z'], ['z'], ['z'], ['z']]
"a" => ['a']
"α⊆϶α϶" => ["α⊆϶", "⊆϶α"]
"" => Char[]
"[1, 2, 3, 4, 1, 2, 5, 6, 1, 7, 8, 1, 0]" => [[3, 4, 1, 2, 5, 6], [2, 5, 6, 1, 7, 8]]

Python

The following algorithm works but is not terribly efficient for long strings. <lang python>def longest_substring(s = "xyzyab"):

   substr = [s[x:y] for x in range(len(s)) for y in range(x+1, len(s) + 1)]
   no_reps = []
   for sub in substr:
       if len(sub) == len(set(sub)) and sub not in no_reps:
           no_reps.append(sub)
   max_len = max(len(sub) for sub in no_reps) if no_reps else 0
   max_subs = [sub for sub in no_reps if len(sub) == max_len]
   return max_subs

if __name__ == '__main__':

   for s in ["xyzyabcybdfd", "xyzyab", "zzzzz", "a", "α⊆϶α϶", "",
             [1, 2, 3, 4, 1, 2, 5, 6, 1, 7, 8, 1, 0]]:
       print(f"{s} => {longest_substring(s)}")</lang>
Output:
xyzyabcybdfd => ['zyabc', 'cybdf']
xyzyab => ['zyab']
zzzzz => ['z']
a => ['a']
α⊆϶α϶ => ['α⊆϶', '⊆϶α']
 => []
[1, 2, 3, 4, 1, 2, 5, 6, 1, 7, 8, 1, 0] => [[3, 4, 1, 2, 5, 6], [2, 5, 6, 1, 7, 8]]

Python: Some optimisation

The following algorithm only accrues the longest so far. <lang python>def longest_substring2(s = "xyzyab"):

   max_subs, mx = [], 0
   for x in range(len(s)):
       for y in range(x+1, len(s) + 1):
           sub = s[x:y]
           if y - x >= mx and len(sub) == len(set(sub)):
               if y - x == mx and sub not in max_subs:
                   max_subs.append(sub)
               else:
                   max_subs, mx = [sub], y - x
   return max_subs</lang>
Output:

It gives the same output as function longest_substring() above.

Raku

Detects any character when checking for repeated characters, even multi-byte composite characters, control sequences and whitespace.

Note that there is no requirement that the substrings be unique, only that each has no repeated characters internally. <lang perl6>sub abbreviate ($_) { .chars > 80 ?? "(abbreviated)\n" ~ .substr(0,35) ~ ' ... ' ~ .substr(*-35) !! $_ }

sub longest ($string) {

  return 0 => [] unless $string.chars;
  my ($start, $end, @substr) = 0, 0;
  while ++$end < $string.chars {
      my $sub = $string.substr($start, $end - $start);
      if $sub.contains: my $next = $string.substr: $end, 1 {
          @substr[$end - $start].push: $sub;
          $start += 1 + $sub.index($next);
      }
  }
  @substr[$end - $start].push: $string.substr($start);
  @substr.pairs.tail

}

  1. Standard tests

say "\nOriginal string: {abbreviate $_}\nLongest substring(s) chars: ", .&longest

for flat qww<xyzyabcybdfd xyzyab zzzzz a >,

  1. multibyte Unicode

'👒🎩🎓👩‍👩‍👦‍👦🧢🎓👨‍👧‍👧👒👩‍👩‍👦‍👦🎩🎓👒🧢',

  1. check a file

slurp 'unixdict.txt';</lang>

Output:
Original string: xyzyabcybdfd
Longest substring(s) chars: 5 => [zyabc cybdf]

Original string: xyzyab
Longest substring(s) chars: 4 => [zyab]

Original string: zzzzz
Longest substring(s) chars: 1 => [z z z z z]

Original string: a
Longest substring(s) chars: 1 => [a]

Original string: 
Longest substring(s) chars: 0 => []

Original string: 👒🎩🎓👩‍👩‍👦‍👦🧢🎓👨‍👧‍👧👒👩‍👩‍👦‍👦🎩🎓👒🧢
Longest substring(s) chars: 6 => [🧢🎓👨‍👧‍👧👒👩‍👩‍👦‍👦🎩]

Original string: (abbreviated)
10th
1st
2nd
3rd
4th
5th
6th
7th
8t ... rian
zounds
zucchini
zurich
zygote

Longest substring(s) chars: 15 => [mbowel
disgrunt]

REXX

<lang rexx>/*REXX pgm finds the longest substring (in a given string) without a repeating character*/ parse arg $ /*obtain optional argument from the CL.*/ if $== | $=="," then $= 'xyzyabcybdfd' /*Not specified? Then use the default.*/ L= length($) /*L: the length of the given string.*/ maxL= 0 /*MAXL: max length substring (so far).*/ @.= /*array to hold the max len substrings.*/

    do   j=1  for L;    b= substr($, j, 1)      /*search for max length substrings.    */
    x=                                          /*X:   the substring, less the 1st char*/
       do k=j+1  to L;  x= x || substr($, k, 1) /*search for the max length substrings.*/
       if \okx(x)  then iterate j               /*Are there an replications?  Skip it. */
       _= length(x);  if _<maxL  then iterate   /*is length less then the current max? */
       @._= @._  x;   maxL= _                   /*add this substring to the max list.  */
       end   /*k*/
    end      /*j*/

say 'longest substring's(words(@.maxL)) "in " $ ' ───► ' @.maxL exit 0 /*stick a fork in it, we're all done. */ /*──────────────────────────────────────────────────────────────────────────────────────*/ s: if arg(1)==1 then return arg(3); return word(arg(2) 's', 1) /*simple pluralizer*/ /*──────────────────────────────────────────────────────────────────────────────────────*/ OKx: procedure; parse arg y; do r=1 for length(y)-1 /*look for duplicate chars.*/

                                  if pos(substr(y, r, 1), y, r+1)>0  then return 0
                                  end   /*r*/;                            return 1</lang>
output   when using the default input:
longest substrings in  xyzyabcybdfd  ───►   zyabc cybdf

Ring

<lang ring> see "working..." + nl see "Longest substrings without repeating characters are:" + nl str = "xyzyabcybdfd" see "Input: " + str + nl lenStr = len(str) strOk = [] lenOk = 0

for n = 1 to lenStr

   for m = 1 to lenStr-n+1
       temp = substr(str,n,m)
       add(strOk,temp)
   next

next

for n = len(strOk) to 1 step -1

   if len(strOK[n]) = 1
      del(strOK,n)
   ok

next

for n = 1 to len(strOK)

   for m = 1 to len(strOK)-1
       if len(strOK[m+1]) > len(strOK[m])
          temp = strOK[m]
          strOK[m] = strOK[m+1]
          strOK[m+1] = temp
       ok
   next

next

for n = 1 to len(strOK)

   flag = 1
   for m = 1 to len(strOK[n])
       for p = m+1 to len(strOK[n])
           if strOK[n][m] = strOK[n][p]
              flag = 0
              exit
           ok
       next
   next
   if flag = 1
      if len(strOK[n]) >= lenOk
         see "len = " + len(strOK[n]) + " : " + strOK[n] + nl
         lenOK = len(strOK[n])
      ok
   ok

next

see "done..." + nl </lang>

Output:
working...
Longest substrings without repeating characters are:
Input: xyzyabcybdfd
len = 5 : zyabc
len = 5 : cybdf
done...

Wren

Library: Wren-seq

<lang ecmascript>import "/seq" for Lst

var substrings = Fn.new { |s|

   var n = s.count
   if (n == 0) return [""]
   var ss = []
   for (i in 0...n) {
       for (len in 1..n-i) ss.add(s[i...i+len])
   }
   return ss

}

System.print("The longest substring(s) of the following without repeating characters are:\n") var strs = ["xyzyabcybdfd", "xyzyab", "zzzzz", "a", "", ] for (s in strs) {

   var longest = []
   var longestLen = 0
   for (ss in substrings.call(s)) {
       if (Lst.distinct(ss.toList).count == ss.count) { 
           if (ss.count >= longestLen) {
               if (ss.count > longestLen) {
                   longest.clear()
                   longestLen = ss.count
               }
               longest.add(ss.join())
           }
       }
   }
   longest = Lst.distinct(longest)
   System.print("String = '%(s)'")
   System.print(longest)
   System.print("Length = %(longest[0].count)\n")

}</lang>

Output:
The longest substring(s) of the following without repeating characters are:

String = 'xyzyabcybdfd'
[zyabc, cybdf]
Length = 5

String = 'xyzyab'
[zyab]
Length = 4

String = 'zzzzz'
[z]
Length = 1

String = 'a'
[a]
Length = 1

String = ''
[]
Length = 0