Split a character string based on change of character

From Rosetta Code
Revision as of 01:43, 21 January 2017 by rosettacode>Purple24 (Added Ruby)
Split a character string based on change of character is a draft programming task. It is not yet considered ready to be promoted as a complete task, for reasons that should be found in its talk page.
Task

Split a (character) string into comma (plus a blank) delimited strings based on a change of character   (left to right).

Show the output here   (use the 1st example below).


Blanks should be treated as any other character   (except they are problematic to display clearly).   The same applies to commas.


For instance, the string:

 gHHH5YY++///\ 

should be split and show:

 g, HHH, 5, YY, ++, ///, \ 



ALGOL 68

Works with: ALGOL 68G version Any - tested with release 2.8.3.win32

<lang algol68>BEGIN

   # returns s with ", " added between each change of character #
   PROC split on characters = ( STRING s )STRING:
        IF s = "" THEN
           # empty string #
           ""
        ELSE
           # allow for 3 times as many characters as in the string #
           # this would handle a string of unique characters       #
           [ 3 * ( ( UPB s - LWB s ) + 1 ) ]CHAR result;
           INT  r pos  := LWB result;
           INT  s pos  := LWB s;
           CHAR s char := s[ LWB s ];
           FOR s pos FROM LWB s TO UPB s DO
               IF s char /= s[ s pos ] THEN
                   # change of character - insert ", " #
                   result[ r pos     ] := ",";
                   result[ r pos + 1 ] := " ";
                   r pos +:= 2;
                   s char := s[ s pos ]
               FI;
               result[ r pos ] := s[ s pos ];
               r pos +:= 1
           OD;
           # return the used portion of the result #
           result[ 1 : r pos - 1 ]
        FI ; # split on characters #
   print( ( split on characters( "gHHH5YY++///\" ), newline ) )

END</lang>

Output:
g, HHH, 5, YY, ++, ///, \

AppleScript

Translation of: JavaScript

<lang AppleScript>on run

   intercalate(", ", ¬
       map(curry(intercalate)'s lambda(""), ¬
           group("gHHH5YY++///\\")))
   
   --> "g, HHH, 5, YY, ++, ///, \\"
   

end run


-- GENERIC FUNCTIONS

-- group :: Eq a => [a] -> a on group(xs)

   script eq
       on lambda(a, b)
           a = b
       end lambda
   end script
   
   groupBy(eq, xs)

end group

-- groupBy :: (a -> a -> Bool) -> [a] -> a on groupBy(f, xs)

   set mf to mReturn(f)
   
   script enGroup
       on lambda(a, x)
           set h to cond(length of (active of a) > 0, item 1 of active of a, missing value)
           
           if h is not missing value and mf's lambda(h, x) then
               {active:(active of a) & x, sofar:sofar of a}
           else
               {active:{x}, sofar:(sofar of a) & {active of a}}
           end if
       end lambda
   end script
   
   if length of xs > 0 then
       set dct to foldl(enGroup, {active:{item 1 of xs}, sofar:{}}, tail(xs))
       sofar of dct & cond(length of (active of dct) > 0, {active of dct}, {})
   else
       {}
   end if

end groupBy

-- foldl :: (a -> b -> a) -> a -> [b] -> a on foldl(f, startValue, xs)

   tell mReturn(f)
       set v to startValue
       set lng to length of xs
       repeat with i from 1 to lng
           set v to lambda(v, item i of xs, i, xs)
       end repeat
       return v
   end tell

end foldl

-- cond :: Bool -> a -> a -> a on cond(bool, f, g)

   if bool then
       f
   else
       g
   end if

end cond

-- intercalate :: Text -> [Text] -> Text on intercalate(strText, lstText)

   set {dlm, my text item delimiters} to {my text item delimiters, strText}
   set strJoined to lstText as text
   set my text item delimiters to dlm
   return strJoined

end intercalate

-- map :: (a -> b) -> [a] -> [b] on map(f, xs)

   tell mReturn(f)
       set lng to length of xs
       set lst to {}
       repeat with i from 1 to lng
           set end of lst to lambda(item i of xs, i, xs)
       end repeat
       return lst
   end tell

end map

-- curry :: (Script|Handler) -> Script on curry(f)

   script
       on lambda(a)
           script
               on lambda(b)
                   lambda(a, b) of mReturn(f)
               end lambda
           end script
       end lambda
   end script

end curry

-- Lift 2nd class handler function into 1st class script wrapper -- mReturn :: Handler -> Script on mReturn(f)

   if class of f is script then
       f
   else
       script
           property lambda : f
       end script
   end if

end mReturn

-- tail :: [a] -> [a] on tail(xs)

   if length of xs > 1 then
       items 2 thru -1 of xs
   else
       {}
   end if

end tail</lang>

Output:
g, HHH, 5, YY, ++, ///, \

BBC BASIC

<lang bbcbasic>REM >split PRINT FN_split( "gHHH5YY++///\" ) END

DEF FN_split( s$ ) LOCAL c$, d$, split$, i% c$ = LEFT$( s$, 1 ) split$ = "" FOR i% = 1 TO LEN s$

 d$ = MID$( s$, i%, 1 )
 IF d$ <> c$ THEN
   split$ += ", "
   c$ = d$
 ENDIF
 split$ += d$

NEXT = split$</lang>

Output:
g, HHH, 5, YY, ++, ///, \

Elixir

<lang elixir>split = fn str ->

         IO.puts " input string: #{str}"
         String.graphemes(str)
         |> Enum.chunk_by(&(&1))
         |> Enum.map_join(", ", &Enum.join &1)
         |> fn s -> IO.puts "output string: #{s}" end.()
       end

split.("gHHH5YY++///\\")</lang>

Output:
 input string: gHHH5YY++///\
output string: g, HHH, 5, YY, ++, ///, \

Haskell

<lang Haskell>import Data.List (group, intercalate)

main :: IO () main = putStrLn $ intercalate ", " (group "gHHH5YY++///\\")</lang>

Output:
g, HHH, 5, YY, ++, ///, \

JavaScript

ES6

Translation of: Haskell

<lang JavaScript>(() => {

   'use strict';
   // GENERIC FUNCTIONS
   // group :: Eq a => [a] -> a
   const group = xs => groupBy((a, b) => a === b, xs);
   // groupBy :: (a -> a -> Bool) -> [a] -> a
   const groupBy = (f, xs) => {
       const dct = xs.slice(1)
           .reduce((a, x) => {
               const
                   h = a.active.length > 0 ? a.active[0] : undefined,
                   blnGroup = h !== undefined && f(h, x);
               return {
                   active: blnGroup ? a.active.concat(x) : [x],
                   sofar: blnGroup ? a.sofar : a.sofar.concat([a.active])
               };
           }, {
               active: xs.length > 0 ? [xs[0]] : [],
               sofar: []
           });
       return dct.sofar.concat(dct.active.length > 0 ? [dct.active] : []);
   };
   // intercalate :: String -> [a] -> String
   const intercalate = (s, xs) => xs.join(s);
   // TEST
   return intercalate(", ", group("gHHH5YY++///\\".split())
       .map(x => x.join()));
   // -> "g, HHH, 5, YY, ++, ///, \\"

})();</lang>

Output:
g, HHH, 5, YY, ++, ///, \

Lua

Note that the backslash must be quoted as a double backslash as Lua uses C-like escape sequences. <lang Lua>function charSplit (inStr)

   local outStr, nextChar = inStr:sub(1, 1)
   for pos = 2, #inStr do
       nextChar = inStr:sub(pos, pos)
       if nextChar ~= outStr:sub(#outStr, #outStr) then
           outStr = outStr .. ", "
       end
       outStr = outStr .. nextChar
   end
   return outStr

end

print(charSplit("gHHH5YY++///\\"))</lang>

Output:
g, HHH, 5, YY, ++, ///, \

Perl 6

Works with: Rakudo version 2016.12

<lang perl6>sub group-chars ($str) { $str.comb: / (.) $0* / }

  1. Testing:

for Q[gHHH5YY++///\], Q[fffn⃗n⃗n⃗»»» ℵℵ☄☄☃☃̂☃☄☄] -> $string {

   put 'Original: ', $string;
   put '   Split: ', group-chars($string).join(', ');

}</lang>

Output:
Original: gHHH5YY++///\
   Split: g, HHH, 5, YY, ++, ///, \
Original: fffn⃗n⃗n⃗»»»  ℵℵ☄☄☃☃̂☃☄☄
   Split: fff, , n⃗n⃗n⃗, »»»,   , ℵℵ, ☄☄, ☃, ☃̂, ☃, ☄☄

The second test-case is to show that Perl 6 works with strings on the Unicode grapheme level, and handles combiners and zero width characters correctly. For those of you with crappy browsers, that string consists of:

  • {LATIN SMALL LETTER F} x 3
  • {ZERO WIDTH NO-BREAK SPACE} x 3
  • {LATIN SMALL LETTER N COMBINING RIGHT ARROW ABOVE} x 3
  • {RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK} x 3
  • {SPACE} x 2,
  • {ALEF SYMBOL} x 2,
  • {COMET} x 2,
  • {SNOWMAN} x 1,
  • {SNOWMAN COMBINING CIRCUMFLEX ACCENT} x 1
  • {SNOWMAN} x 1,
  • {COMET} x 2

Python

<lang python>import itertools

try: input = raw_input except: pass

s = input() groups = [] for _, g in itertools.groupby(s):

   groups.append(.join(g))

print(' input string: %s' % s) print(' output string: %s' % ', '.join(groups))</lang>

Output:

  when using the default input

      input string:  gHHH5YY++///\
     output string:  g, HHH, 5, YY, ++, ///, \

Racket

Translation of: Python

<lang racket>#lang racket (define (split-strings-on-change s)

 (map list->string (group-by values (string->list s) char=?)))

(displayln (string-join (split-strings-on-change #<<< gHHH5YY++///\ <

                                                )
                       ", "))</lang>
Output:
g, HHH, 5, YY, ++, ///, \

REXX

<lang rexx>/*REXX program splits a string based on change of character ───► a comma delimited list.*/ parse arg str . /*obtain optional arguments from the CL*/ if str== | str=="," then str= 'gHHH5YY++///\' /*Not specified? Then use the default.*/ p=left(str, 1) /*placeholder for the "previous" string*/ $= /* " " " output " */

     do j=1  for length(str);      @=substr(str, j, 1)    /*obtain a char from string. */
     if @\==left(p, 1)   then do;  $=$',' @;  p=;  end    /*different then previous?   */
                         else      $=$ || @               /*a replicated character.    */
     p=p || @                                             /*append char to current list*/
     end   /*j*/

say ' input string: ' str say ' output string: ' $ /*stick a fork in it, we're all done. */</lang> output   when using the default input:

      input string:  gHHH5YY++///\
     output string:  g, HHH, 5, YY, ++, ///, \

Ruby

<lang ruby>def split(str)

 puts " input string: #{str}"
 s = str.chars.chunk(&:itself).map{|_,a| a.join}.join(", ")
 puts "output string: #{s}"
 s

end

split("gHHH5YY++///\\")</lang>

Output:
 input string: gHHH5YY++///\
output string: g, HHH, 5, YY, ++, ///, \

Sidef

<lang ruby>func group(str) {

   gather {
       while (var match = (str =~ /((.)\g{-1}*)/g)) {
           take(match[0])
       }
   }

}

say group(ARGV[0] \\ 'gHHH5YY++///\\').join(', ')</lang>

Output:
g, HHH, 5, YY, ++, ///, \

zkl

<lang zkl>fcn group(str){

  C,out := str[0],Sink(C);
  foreach c in (str[1,*]){ out.write(if(c==C) c else String(", ",C=c)) }
  out.close();

} group("gHHH5YY++///\\").println();</lang>

Output:
g, HHH, 5, YY, ++, ///, \