Split a character string based on change of character: Difference between revisions

Content added Content deleted

Inline

Revision as of 17:20, 31 December 2016

Task

Split a (character) string into comma (plus a blank) delimited strings based on a change of character (left to right).

Show the output here (use the 1^st example below).

Blanks should be treated as any other character (except they are problematic to display clearly). The same applies to commas.

For instance, the string:

 gHHH5YY++///\

should be split and show:

 g, HHH, 5, YY, ++, ///, \

AppleScript

Translation of: JavaScript

<lang AppleScript>on run

   intercalate(", ", ¬
       map(curry(intercalate)'s lambda(""), ¬
           group("gHHH5YY++///\\")))
   
   --> "g, HHH, 5, YY, ++, ///, \\"

end run

-- GENERIC FUNCTIONS

-- group :: Eq a => [a] -> a on group(xs)

   script eq
       on lambda(a, b)
           a = b
       end lambda
   end script
   
   groupBy(eq, xs)

end group

-- groupBy :: (a -> a -> Bool) -> [a] -> a on groupBy(f, xs)

   set lng to length of xs
   if lng > 0 then
       set x to item 1 of xs
       set mf to mReturn(f)
       
       script matchPrevious
           on lambda(y)
               mf's lambda(x, y)
           end lambda
       end script
       
       set {ys, zs} to span(matchPrevious, tail(xs))
       set lstGroup to {{x} & ys}
       
       if zs ≠ {} then
           lstGroup & groupBy(mf, zs)
       else
           lstGroup
       end if
   else
       {}
   end if

end groupBy

-- Span of unbroken predicate matches at left, returned with remainder -- span :: (a -> Bool) -> [a] -> ([a],[a]) on span(f, xs)

   set lng to length of xs
   set i to 0
   tell mReturn(f)
       repeat while i < lng and lambda(item (i + 1) of xs)
           set i to i + 1
       end repeat
   end tell
   
   if i > 0 then
       if i < lng then
           {items 1 thru i of xs, (items (i + 1) thru -1 of xs)}
       else
           {xs, {}}
       end if
   else
       {{}, xs}
   end if

end span

-- intercalate :: Text -> [Text] -> Text on intercalate(strText, lstText)

   set {dlm, my text item delimiters} to {my text item delimiters, strText}
   set strJoined to lstText as text
   set my text item delimiters to dlm
   return strJoined

end intercalate

-- map :: (a -> b) -> [a] -> [b] on map(f, xs)

   tell mReturn(f)
       set lng to length of xs
       set lst to {}
       repeat with i from 1 to lng
           set end of lst to lambda(item i of xs, i, xs)
       end repeat
       return lst
   end tell

end map

-- curry :: (Script|Handler) -> Script on curry(f)

   script
       on lambda(a)
           script
               on lambda(b)
                   lambda(a, b) of mReturn(f)
               end lambda
           end script
       end lambda
   end script

end curry

-- Lift 2nd class handler function into 1st class script wrapper -- mReturn :: Handler -> Script on mReturn(f)

   if class of f is script then
       f
   else
       script
           property lambda : f
       end script
   end if

end mReturn

-- tail :: [a] -> [a] on tail(xs)

   if length of xs > 1 then
       items 2 thru -1 of xs
   else
       {}
   end if

end tail</lang>

Output:

g, HHH, 5, YY, ++, ///, \

Haskell

<lang Haskell>import Data.List (group, intercalate)

main :: IO () main = putStrLn $ intercalate ", " (group "gHHH5YY++///\\")</lang>

Output:

g, HHH, 5, YY, ++, ///, \

JavaScript

ES6

Translation of: Haskell

<lang JavaScript>(() => {

   'use strict';

   // GENERIC FUNCTIONS

   // group :: Eq a => [a] -> a
   const group = xs => groupBy((a, b) => a === b, xs);

   // groupBy :: (a -> a -> Bool) -> [a] -> a
   const groupBy = (f, xs) => {
       const lng = xs.length;

       if (lng > 0) {
           const x = xs[0],
               lstSpanRest = span(y => f(x, y), xs.slice(1)),
               ys = lstSpanRest[0],
               zs = lstSpanRest[1],
               group = [(ys.length ? [x].concat(ys) : [x])];

           return zs.length ? group.concat(groupBy(f, zs)) : group;

       } else return [];
   };

   // Span of unbroken predicate matches at left, returned with remainder
   //  span :: (a -> Bool) -> [a] -> ([a],[a])
   const span = (f, xs) => {
       for (var i = 0, lng = xs.length;
           (i < lng) && f(xs[i]); i++) {}
       return [xs.slice(0, i), xs.slice(i)];
   }

   // intercalate :: String -> [a] -> String
   const intercalate = (s, xs) => xs.join(s);

   // TEST
   return intercalate(", ", group("gHHH5YY++///\\".split())
       .map(x => x.join()));

   // -> "g, HHH, 5, YY, ++, ///, \\"

})();</lang>

Output:

g, HHH, 5, YY, ++, ///, \

Perl 6

Works with: Rakudo version 2016.12

Accept a string at the command line to split; if none provided, use default.

<lang perl6>my $string = @*ARGS[0] // < gHHH5YY++///\ >; put 'Orginal: ', $string; put ' Split: ', join ', ', $string ~~ m:g/(.)$0*/;</lang>

Output with default string:

Orginal: gHHH5YY++///\
  Split: g, HHH, 5, YY, ++, ///, \

Note that Perl 6 works with Unicode natively and handles combiners and zero width characters correctly.

Output with string fffn⃗n⃗n⃗»»» ℵℵ☄☄☃☃̂☃☄☄:

(For those of you with crappy browsers, thats: {f} x 3, {Zero-width non-breaking space} x 3, {n with combining over arrow} x 3, {Space} x 2 {Right-pointing double angle quotation mark} x 3, {Alef symbol} x 2 {Comet} x 2, {Snowman} x 1, {Snowman with combining circumflex accent} x 1, {Snowman} x 1, {Comet} x 2

Orginal: fffn⃗n⃗n⃗»»»  ℵℵ☄☄☃☃̂☃☄☄
  Split: fff, , n⃗n⃗n⃗, »»»,   , ℵℵ, ☄☄, ☃, ☃̂, ☃, ☄☄

Python

<lang python>import itertools

try: input = raw_input except: pass

s = input() groups = [] for _, g in itertools.groupby(s):

   groups.append(.join(g))

print(' input string: %s' % s) print(' output string: %s' % ', '.join(groups))</lang>

Output:

when using the default input

      input string:  gHHH5YY++///\
     output string:  g, HHH, 5, YY, ++, ///, \

REXX

<lang rexx>/*REXX program splits a string based on change of character ───► a comma delimited list.*/ parse arg str . /*obtain optional arguments from the CL*/ if str== | str=="," then str= 'gHHH5YY++///\' /*Not specified? Then use the default.*/ p=left(str, 1) /*placeholder for the "previous" string*/ $= /* " " " output " */

     do j=1  for length(str);      @=substr(str, j, 1)    /*obtain a char from string. */
     if @\==left(p, 1)   then do;  $=$',' @;  p=;  end    /*different then previous?   */
                         else      $=$ || @               /*a replicated character.    */
     p=p || @                                             /*append char to current list*/
     end   /*j*/

say ' input string: ' str say ' output string: ' $ /*stick a fork in it, we're all done. */</lang> output when using the default input:

      input string:  gHHH5YY++///\
     output string:  g, HHH, 5, YY, ++, ///, \