Longest substrings without repeating characters

From Rosetta Code
Revision as of 12:55, 29 May 2021 by Thundergnat (talk | contribs) (→‎{{header|Raku}}: more memory efficient for long strings.)
Longest substrings without repeating characters is a draft programming task. It is not yet considered ready to be promoted as a complete task, for reasons that should be found in its talk page.
Task

Given a string,   find the   longest substring   without any repeating characters.


Julia

Works on any array, treating a character string as a character array. <lang julia>function alluniquehead(arr)

   len = length(arr)
   if len > 1
       for i in 2:len
           if arr[i] in @view arr[1:i-1]
               return arr[1:i-1]
           end
       end
   end
   return arr[:]

end

function maxuniques(arr)

   length(arr) < 2 && return arr
   amax = [alluniquehead(@view arr[i:end]) for i in 1:length(arr)]
   maxlen = maximum(map(length, amax))
   return filter(a -> length(a) == maxlen, amax)

end

for s in ["xyzyabcybdfd", "xyzyab", "zzzzz", "a", "α⊆϶α϶", "", [1, 2, 3, 4, 1, 2, 5, 6, 1, 7, 8, 1, 0]]

   uarray = maxuniques(collect(s))
   !isempty(uarray) && length(uarray[1]) > 1 && uarray[1][1] isa Char && (uarray = String.(uarray))
   println("\"$s\" => ", uarray)

end

</lang>

Output:
"xyzyabcybdfd" => ["zyabc", "cybdf"]
"xyzyab" => ["zyab"]
"zzzzz" => [['z'], ['z'], ['z'], ['z'], ['z']]
"a" => ['a']
"α⊆϶α϶" => ["α⊆϶", "⊆϶α"]
"" => Char[]
"[1, 2, 3, 4, 1, 2, 5, 6, 1, 7, 8, 1, 0]" => [[3, 4, 1, 2, 5, 6], [2, 5, 6, 1, 7, 8]]

Python

The following algorithm works but is not terribly efficient for long strings. <lang python>def longest_substring(s = "xyzyab"):

   substr = [s[x:y] for x in range(len(s)) for y in range(x+1, len(s) + 1)]
   no_reps = []
   for sub in substr:
       if len(sub) == len(set(sub)) and sub not in no_reps:
           no_reps.append(sub)
   max_len = max(len(sub) for sub in no_reps) if no_reps else 0
   max_subs = [sub for sub in no_reps if len(sub) == max_len]
   return max_subs

if __name__ == '__main__':

   for s in ["xyzyabcybdfd", "xyzyab", "zzzzz", "a", "α⊆϶α϶", "",
             [1, 2, 3, 4, 1, 2, 5, 6, 1, 7, 8, 1, 0]]:
       print(f"{s} => {longest_substring(s)}")</lang>
Output:
xyzyabcybdfd => ['zyabc', 'cybdf']
xyzyab => ['zyab']
zzzzz => ['z']
a => ['a']
α⊆϶α϶ => ['α⊆϶', '⊆϶α']
 => []
[1, 2, 3, 4, 1, 2, 5, 6, 1, 7, 8, 1, 0] => [[3, 4, 1, 2, 5, 6], [2, 5, 6, 1, 7, 8]]

Python: Some optimisation

The following algorithm only accrues the longest so far. <lang python>def longest_substring2(s = "xyzyab"):

   max_subs, mx = [], 0
   for x in range(len(s)):
       for y in range(x+1, len(s) + 1):
           sub = s[x:y]
           if y - x >= mx and len(sub) == len(set(sub)):
               if y - x == mx and sub not in max_subs:
                   max_subs.append(sub)
               else:
                   max_subs, mx = [sub], y - x
   return max_subs</lang>
Output:

It gives the same output as function longest_substring() above.

Raku

Detects any character when checking for repeated characters, even multi-byte composite characters, control sequences and whitespace.

Note that there is no requirement that the substrings be unique, only that each has no repeated characters internally.

Not going to bother handling arrays since an array is not a string, and the task description specifically says 'Given a string'. <lang perl6>sub abbreviate ($_) { .chars > 80 ?? "(abbreviated)\n" ~ .substr(0,35) ~ ' ... ' ~ .substr(*-35) !! $_ }

sub longest ($string) {

  return 0 => [] unless $string.chars;
  my ($start, $end, @substr) = 0, 0;
  while ++$end < $string.chars {
      my $sub = $string.substr($start, $end - $start);
      if $sub.contains: my $next = $string.substr: $end, 1 {
          @substr[$end - $start].push($sub) if $end - $start >= @substr.end;
          $start += 1 + $sub.index($next);
      }
  }
  @substr[$end - $start].push: $string.substr($start);
  @substr.pairs.tail

}

  1. Testing

say "\nOriginal string: {abbreviate $_}\nLongest substring(s) chars: ", .&longest

  1. Standard tests

for flat qww< xyzyabcybdfd xyzyab zzzzz a >,

  1. multibyte Unicode

< 👒🎩🎓👩‍👩‍👦‍👦🧢🎓👨‍👧‍👧👒👩‍👩‍👦‍👦🎩🎓👒🧢 α⊆϶α϶ >,

  1. check a file

slurp 'unixdict.txt';</lang>

Output:
Original string: xyzyabcybdfd
Longest substring(s) chars: 5 => [zyabc cybdf]

Original string: xyzyab
Longest substring(s) chars: 4 => [zyab]

Original string: zzzzz
Longest substring(s) chars: 1 => [z z z z z]

Original string: a
Longest substring(s) chars: 1 => [a]

Original string: 
Longest substring(s) chars: 0 => []

Original string: 👒🎩🎓👩‍👩‍👦‍👦🧢🎓👨‍👧‍👧👒👩‍👩‍👦‍👦🎩🎓👒🧢
Longest substring(s) chars: 6 => [🧢🎓👨‍👧‍👧👒👩‍👩‍👦‍👦🎩]

Original string: α⊆϶α϶
Longest substring(s) chars: 3 => [α⊆϶ ⊆϶α]

Original string: (abbreviated)
10th
1st
2nd
3rd
4th
5th
6th
7th
8t ... rian
zounds
zucchini
zurich
zygote

Longest substring(s) chars: 15 => [mbowel
disgrunt]

REXX

<lang rexx>/*REXX pgm finds the longest substring (in a given string) without a repeating character*/ parse arg $ /*obtain optional argument from the CL.*/ if $== | $=="," then $= 'xyzyabcybdfd' /*Not specified? Then use the default.*/ L= length($) /*L: the length of the given string.*/ maxL= 0 /*MAXL: max length substring (so far).*/ @.= /*array to hold the max len substrings.*/

    do   j=1  for L;    b= substr($, j, 1)      /*search for max length substrings.    */
    x=                                          /*X:   the substring, less the 1st char*/
       do k=j+1  to L;  x= x || substr($, k, 1) /*search for the max length substrings.*/
       if \okx(x)  then iterate j               /*Are there an replications?  Skip it. */
       _= length(x);  if _<maxL  then iterate   /*is length less then the current max? */
       @._= @._  x;   maxL= _                   /*add this substring to the max list.  */
       end   /*k*/
    end      /*j*/

say 'longest substring's(words(@.maxL)) "in " $ ' ───► ' @.maxL exit 0 /*stick a fork in it, we're all done. */ /*──────────────────────────────────────────────────────────────────────────────────────*/ s: if arg(1)==1 then return arg(3); return word(arg(2) 's', 1) /*simple pluralizer*/ /*──────────────────────────────────────────────────────────────────────────────────────*/ OKx: procedure; parse arg y; do r=1 for length(y)-1 /*look for duplicate chars.*/

                                  if pos(substr(y, r, 1), y, r+1)>0  then return 0
                                  end   /*r*/;                            return 1</lang>
output   when using the default input:
longest substrings in  xyzyabcybdfd  ───►   zyabc cybdf

Ring

<lang ring> see "working..." + nl see "Longest substrings without repeating characters are:" + nl str = "xyzyabcybdfd" see "Input: " + str + nl lenStr = len(str) strOk = [] lenOk = 0

for n = 1 to lenStr

   for m = 1 to lenStr-n+1
       temp = substr(str,n,m)
       add(strOk,temp)
   next

next

for n = len(strOk) to 1 step -1

   if len(strOK[n]) = 1
      del(strOK,n)
   ok

next

for n = 1 to len(strOK)

   for m = 1 to len(strOK)-1
       if len(strOK[m+1]) > len(strOK[m])
          temp = strOK[m]
          strOK[m] = strOK[m+1]
          strOK[m+1] = temp
       ok
   next

next

for n = 1 to len(strOK)

   flag = 1
   for m = 1 to len(strOK[n])
       for p = m+1 to len(strOK[n])
           if strOK[n][m] = strOK[n][p]
              flag = 0
              exit
           ok
       next
   next
   if flag = 1
      if len(strOK[n]) >= lenOk
         see "len = " + len(strOK[n]) + " : " + strOK[n] + nl
         lenOK = len(strOK[n])
      ok
   ok

next

see "done..." + nl </lang>

Output:
working...
Longest substrings without repeating characters are:
Input: xyzyabcybdfd
len = 5 : zyabc
len = 5 : cybdf
done...

Wren

Library: Wren-seq

<lang ecmascript>import "/seq" for Lst

var substrings = Fn.new { |s|

   var n = s.count
   if (n == 0) return [""]
   var ss = []
   for (i in 0...n) {
       for (len in 1..n-i) ss.add(s[i...i+len])
   }
   return ss

}

System.print("The longest substring(s) of the following without repeating characters are:\n") var strs = ["xyzyabcybdfd", "xyzyab", "zzzzz", "a", "", ] for (s in strs) {

   var longest = []
   var longestLen = 0
   for (ss in substrings.call(s)) {
       if (Lst.distinct(ss.toList).count == ss.count) { 
           if (ss.count >= longestLen) {
               if (ss.count > longestLen) {
                   longest.clear()
                   longestLen = ss.count
               }
               longest.add(ss.join())
           }
       }
   }
   longest = Lst.distinct(longest)
   System.print("String = '%(s)'")
   System.print(longest)
   System.print("Length = %(longest[0].count)\n")

}</lang>

Output:
The longest substring(s) of the following without repeating characters are:

String = 'xyzyabcybdfd'
[zyabc, cybdf]
Length = 5

String = 'xyzyab'
[zyab]
Length = 4

String = 'zzzzz'
[z]
Length = 1

String = 'a'
[a]
Length = 1

String = ''
[]
Length = 0