Split a character string based on change of character: Difference between revisions

Content added Content deleted

Inline

Revision as of 07:58, 1 January 2017

Task

Split a (character) string into comma (plus a blank) delimited strings based on a change of character (left to right).

Show the output here (use the 1^st example below).

Blanks should be treated as any other character (except they are problematic to display clearly). The same applies to commas.

For instance, the string:

 gHHH5YY++///\

should be split and show:

 g, HHH, 5, YY, ++, ///, \

AppleScript

Translation of: JavaScript

<lang AppleScript>on run

   intercalate(", ", ¬
       map(curry(intercalate)'s lambda(""), ¬
           group("gHHH5YY++///\\")))
   
   --> "g, HHH, 5, YY, ++, ///, \\"

end run

-- GENERIC FUNCTIONS

-- group :: Eq a => [a] -> a on group(xs)

   script eq
       on lambda(a, b)
           a = b
       end lambda
   end script
   
   groupBy(eq, xs)

end group

-- groupBy :: (a -> a -> Bool) -> [a] -> a on groupBy(f, xs)

   set mf to mReturn(f)
   
   script enGroup
       on lambda(a, x)
           set h to cond(length of (active of a) > 0, item 1 of active of a, missing value)
           
           if h is not missing value and mf's lambda(h, x) then
               {active:(active of a) & x, sofar:sofar of a}
           else
               {active:{x}, sofar:(sofar of a) & {active of a}}
           end if
       end lambda
   end script
   
   if length of xs > 0 then
       set dct to foldl(enGroup, {active:{item 1 of xs}, sofar:{}}, tail(xs))
       sofar of dct & cond(length of (active of dct) > 0, {active of dct}, {})
   else
       {}
   end if

end groupBy

-- foldl :: (a -> b -> a) -> a -> [b] -> a on foldl(f, startValue, xs)

   tell mReturn(f)
       set v to startValue
       set lng to length of xs
       repeat with i from 1 to lng
           set v to lambda(v, item i of xs, i, xs)
       end repeat
       return v
   end tell

end foldl

-- cond :: Bool -> a -> a -> a on cond(bool, f, g)

   if bool then
       f
   else
       g
   end if

end cond

-- intercalate :: Text -> [Text] -> Text on intercalate(strText, lstText)

   set {dlm, my text item delimiters} to {my text item delimiters, strText}
   set strJoined to lstText as text
   set my text item delimiters to dlm
   return strJoined

end intercalate

-- map :: (a -> b) -> [a] -> [b] on map(f, xs)

   tell mReturn(f)
       set lng to length of xs
       set lst to {}
       repeat with i from 1 to lng
           set end of lst to lambda(item i of xs, i, xs)
       end repeat
       return lst
   end tell

end map

-- curry :: (Script|Handler) -> Script on curry(f)

   script
       on lambda(a)
           script
               on lambda(b)
                   lambda(a, b) of mReturn(f)
               end lambda
           end script
       end lambda
   end script

end curry

-- Lift 2nd class handler function into 1st class script wrapper -- mReturn :: Handler -> Script on mReturn(f)

   if class of f is script then
       f
   else
       script
           property lambda : f
       end script
   end if

end mReturn

-- tail :: [a] -> [a] on tail(xs)

   if length of xs > 1 then
       items 2 thru -1 of xs
   else
       {}
   end if

end tail</lang>

Output:

g, HHH, 5, YY, ++, ///, \

Haskell

<lang Haskell>import Data.List (group, intercalate)

main :: IO () main = putStrLn $ intercalate ", " (group "gHHH5YY++///\\")</lang>

Output:

g, HHH, 5, YY, ++, ///, \

JavaScript

ES6

Translation of: Haskell

<lang JavaScript>(() => {

   'use strict';

   // GENERIC FUNCTIONS

   // group :: Eq a => [a] -> a
   const group = xs => groupBy((a, b) => a === b, xs);

   // groupBy :: (a -> a -> Bool) -> [a] -> a
   const groupBy = (f, xs) => {
       const dct = xs.slice(1)
           .reduce((a, x) => {
               const
                   h = a.active.length > 0 ? a.active[0] : undefined,
                   blnGroup = h !== undefined && f(h, x);

               return {
                   active: blnGroup ? a.active.concat(x) : [x],
                   sofar: blnGroup ? a.sofar : a.sofar.concat([a.active])
               };
           }, {
               active: xs.length > 0 ? [xs[0]] : [],
               sofar: []
           });
       return dct.sofar.concat(dct.active.length > 0 ? [dct.active] : []);
   };

   // intercalate :: String -> [a] -> String
   const intercalate = (s, xs) => xs.join(s);

   // TEST
   return intercalate(", ", group("gHHH5YY++///\\".split())
       .map(x => x.join()));

   // -> "g, HHH, 5, YY, ++, ///, \\"

})();</lang>

Output:

g, HHH, 5, YY, ++, ///, \

Perl 6

Works with: Rakudo version 2016.12

Accept a string at the command line to split; if none provided, use default.

<lang perl6>my $string = @*ARGS[0] // < gHHH5YY++///\ >; put 'Orginal: ', $string; put ' Split: ', join ', ', $string ~~ m:g/(.)$0*/;</lang>

Output with default string:

Orginal: gHHH5YY++///\
  Split: g, HHH, 5, YY, ++, ///, \

Note that Perl 6 works with Unicode natively and handles combiners and zero width characters correctly.

Output with string fffn⃗n⃗n⃗»»» ℵℵ☄☄☃☃̂☃☄☄:

For those of you with crappy browsers, thats: {f} x 3, {Zero-width non-breaking space} x 3, {n with combining over arrow} x 3, {Space} x 2 {Right-pointing double angle quotation mark} x 3, {Alef symbol} x 2 {Comet} x 2, {Snowman} x 1, {Snowman with combining circumflex accent} x 1, {Snowman} x 1, {Comet} x 2

Orginal: fffn⃗n⃗n⃗»»»  ℵℵ☄☄☃☃̂☃☄☄
  Split: fff, , n⃗n⃗n⃗, »»»,   , ℵℵ, ☄☄, ☃, ☃̂, ☃, ☄☄

Python

<lang python>import itertools

try: input = raw_input except: pass

s = input() groups = [] for _, g in itertools.groupby(s):

   groups.append(.join(g))

print(' input string: %s' % s) print(' output string: %s' % ', '.join(groups))</lang>

Output:

when using the default input

      input string:  gHHH5YY++///\
     output string:  g, HHH, 5, YY, ++, ///, \

REXX

<lang rexx>/*REXX program splits a string based on change of character ───► a comma delimited list.*/ parse arg str . /*obtain optional arguments from the CL*/ if str== | str=="," then str= 'gHHH5YY++///\' /*Not specified? Then use the default.*/ p=left(str, 1) /*placeholder for the "previous" string*/ $= /* " " " output " */

     do j=1  for length(str);      @=substr(str, j, 1)    /*obtain a char from string. */
     if @\==left(p, 1)   then do;  $=$',' @;  p=;  end    /*different then previous?   */
                         else      $=$ || @               /*a replicated character.    */
     p=p || @                                             /*append char to current list*/
     end   /*j*/

say ' input string: ' str say ' output string: ' $ /*stick a fork in it, we're all done. */</lang> output when using the default input:

      input string:  gHHH5YY++///\
     output string:  g, HHH, 5, YY, ++, ///, \

zkl

<lang zkl>fcn group(str){

  C,out := str[0],Sink(C);
  foreach c in (str[1,*]){ out.write(if(c==C) c else String(", ",C=c)) }
  out.close();

} group("gHHH5YY++///\\").println();</lang>

Output:

g, HHH, 5, YY, ++, ///, \