Split a character string based on change of character: Difference between revisions

Content added Content deleted

Inline

Revision as of 04:03, 21 January 2017

Task

Split a (character) string into comma (plus a blank) delimited strings based on a change of character (left to right).

Show the output here (use the 1^st example below).

Blanks should be treated as any other character (except they are problematic to display clearly). The same applies to commas.

For instance, the string:

 gHHH5YY++///\

should be split and show:

 g, HHH, 5, YY, ++, ///, \

ALGOL 68

Works with: ALGOL 68G version Any - tested with release 2.8.3.win32

<lang algol68>BEGIN

   # returns s with ", " added between each change of character #
   PROC split on characters = ( STRING s )STRING:
        IF s = "" THEN
           # empty string #
           ""
        ELSE
           # allow for 3 times as many characters as in the string #
           # this would handle a string of unique characters       #
           [ 3 * ( ( UPB s - LWB s ) + 1 ) ]CHAR result;
           INT  r pos  := LWB result;
           INT  s pos  := LWB s;
           CHAR s char := s[ LWB s ];
           FOR s pos FROM LWB s TO UPB s DO
               IF s char /= s[ s pos ] THEN
                   # change of character - insert ", " #
                   result[ r pos     ] := ",";
                   result[ r pos + 1 ] := " ";
                   r pos +:= 2;
                   s char := s[ s pos ]
               FI;
               result[ r pos ] := s[ s pos ];
               r pos +:= 1
           OD;
           # return the used portion of the result #
           result[ 1 : r pos - 1 ]
        FI ; # split on characters #

   print( ( split on characters( "gHHH5YY++///\" ), newline ) )

END</lang>

Output:

g, HHH, 5, YY, ++, ///, \

AppleScript

Translation of: JavaScript

<lang AppleScript>on run

   intercalate(", ", ¬
       map(curry(intercalate)'s lambda(""), ¬
           group("gHHH5YY++///\\")))
   
   --> "g, HHH, 5, YY, ++, ///, \\"

end run

-- GENERIC FUNCTIONS

-- group :: Eq a => [a] -> a on group(xs)

   script eq
       on lambda(a, b)
           a = b
       end lambda
   end script
   
   groupBy(eq, xs)

end group

-- groupBy :: (a -> a -> Bool) -> [a] -> a on groupBy(f, xs)

   set mf to mReturn(f)
   
   script enGroup
       on lambda(a, x)
           set h to cond(length of (active of a) > 0, item 1 of active of a, missing value)
           
           if h is not missing value and mf's lambda(h, x) then
               {active:(active of a) & x, sofar:sofar of a}
           else
               {active:{x}, sofar:(sofar of a) & {active of a}}
           end if
       end lambda
   end script
   
   if length of xs > 0 then
       set dct to foldl(enGroup, {active:{item 1 of xs}, sofar:{}}, tail(xs))
       sofar of dct & cond(length of (active of dct) > 0, {active of dct}, {})
   else
       {}
   end if

end groupBy

-- foldl :: (a -> b -> a) -> a -> [b] -> a on foldl(f, startValue, xs)

   tell mReturn(f)
       set v to startValue
       set lng to length of xs
       repeat with i from 1 to lng
           set v to lambda(v, item i of xs, i, xs)
       end repeat
       return v
   end tell

end foldl

-- cond :: Bool -> a -> a -> a on cond(bool, f, g)

   if bool then
       f
   else
       g
   end if

end cond

-- intercalate :: Text -> [Text] -> Text on intercalate(strText, lstText)

   set {dlm, my text item delimiters} to {my text item delimiters, strText}
   set strJoined to lstText as text
   set my text item delimiters to dlm
   return strJoined

end intercalate

-- map :: (a -> b) -> [a] -> [b] on map(f, xs)

   tell mReturn(f)
       set lng to length of xs
       set lst to {}
       repeat with i from 1 to lng
           set end of lst to lambda(item i of xs, i, xs)
       end repeat
       return lst
   end tell

end map

-- curry :: (Script|Handler) -> Script on curry(f)

   script
       on lambda(a)
           script
               on lambda(b)
                   lambda(a, b) of mReturn(f)
               end lambda
           end script
       end lambda
   end script

end curry

-- Lift 2nd class handler function into 1st class script wrapper -- mReturn :: Handler -> Script on mReturn(f)

   if class of f is script then
       f
   else
       script
           property lambda : f
       end script
   end if

end mReturn

-- tail :: [a] -> [a] on tail(xs)

   if length of xs > 1 then
       items 2 thru -1 of xs
   else
       {}
   end if

end tail</lang>

Output:

g, HHH, 5, YY, ++, ///, \

BBC BASIC

<lang bbcbasic>REM >split PRINT FN_split( "gHHH5YY++///\" ) END

DEF FN_split( s$ ) LOCAL c$, d$, split$, i% c$ = LEFT$( s$, 1 ) split$ = "" FOR i% = 1 TO LEN s$

 d$ = MID$( s$, i%, 1 )
 IF d$ <> c$ THEN
   split$ += ", "
   c$ = d$
 ENDIF
 split$ += d$

NEXT = split$</lang>

Output:

g, HHH, 5, YY, ++, ///, \

Elixir

<lang elixir>split = fn str ->

         IO.puts " input string: #{str}"
         String.graphemes(str)
         |> Enum.chunk_by(&(&1))
         |> Enum.map_join(", ", &Enum.join &1)
         |> fn s -> IO.puts "output string: #{s}" end.()
       end

split.("gHHH5YY++///\\")</lang>

Output:

 input string: gHHH5YY++///\
output string: g, HHH, 5, YY, ++, ///, \

Haskell

<lang Haskell>import Data.List (group, intercalate)

main :: IO () main = putStrLn $ intercalate ", " (group "gHHH5YY++///\\")</lang>

Output:

g, HHH, 5, YY, ++, ///, \

JavaScript

ES6

Translation of: Haskell

<lang JavaScript>(() => {

   'use strict';

   // GENERIC FUNCTIONS

   // group :: Eq a => [a] -> a
   const group = xs => groupBy((a, b) => a === b, xs);

   // groupBy :: (a -> a -> Bool) -> [a] -> a
   const groupBy = (f, xs) => {
       const dct = xs.slice(1)
           .reduce((a, x) => {
               const
                   h = a.active.length > 0 ? a.active[0] : undefined,
                   blnGroup = h !== undefined && f(h, x);

               return {
                   active: blnGroup ? a.active.concat(x) : [x],
                   sofar: blnGroup ? a.sofar : a.sofar.concat([a.active])
               };
           }, {
               active: xs.length > 0 ? [xs[0]] : [],
               sofar: []
           });
       return dct.sofar.concat(dct.active.length > 0 ? [dct.active] : []);
   };

   // intercalate :: String -> [a] -> String
   const intercalate = (s, xs) => xs.join(s);

   // TEST
   return intercalate(", ", group("gHHH5YY++///\\".split())
       .map(x => x.join()));

   // -> "g, HHH, 5, YY, ++, ///, \\"

})();</lang>

Output:

g, HHH, 5, YY, ++, ///, \

Lua

Note that the backslash must be quoted as a double backslash as Lua uses C-like escape sequences. <lang Lua>function charSplit (inStr)

   local outStr, nextChar = inStr:sub(1, 1)
   for pos = 2, #inStr do
       nextChar = inStr:sub(pos, pos)
       if nextChar ~= outStr:sub(#outStr, #outStr) then
           outStr = outStr .. ", "
       end
       outStr = outStr .. nextChar
   end
   return outStr

end

print(charSplit("gHHH5YY++///\\"))</lang>

Output:

g, HHH, 5, YY, ++, ///, \

Perl 6

Works with: Rakudo version 2016.12

<lang perl6>sub group-chars ($str) { $str.comb: / (.) $0* / }

Testing:

for Q[gHHH5YY++///\], Q[fffn⃗n⃗n⃗»»» ℵℵ☄☄☃☃̂☃☄☄] -> $string {

   put 'Original: ', $string;
   put '   Split: ', group-chars($string).join(', ');

}</lang>

Output:

Original: gHHH5YY++///\
   Split: g, HHH, 5, YY, ++, ///, \
Original: fffn⃗n⃗n⃗»»»  ℵℵ☄☄☃☃̂☃☄☄
   Split: fff, , n⃗n⃗n⃗, »»»,   , ℵℵ, ☄☄, ☃, ☃̂, ☃, ☄☄

The second test-case is to show that Perl 6 works with strings on the Unicode grapheme level, and handles combiners and zero width characters correctly. For those of you with crappy browsers, that string consists of:

{LATIN SMALL LETTER F} x 3
{ZERO WIDTH NO-BREAK SPACE} x 3
{LATIN SMALL LETTER N COMBINING RIGHT ARROW ABOVE} x 3
{RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK} x 3
{SPACE} x 2,
{ALEF SYMBOL} x 2,
{COMET} x 2,
{SNOWMAN} x 1,
{SNOWMAN COMBINING CIRCUMFLEX ACCENT} x 1
{SNOWMAN} x 1,
{COMET} x 2

Python

<lang python>import itertools

try: input = raw_input except: pass

s = input() groups = [] for _, g in itertools.groupby(s):

   groups.append(.join(g))

print(' input string: %s' % s) print(' output string: %s' % ', '.join(groups))</lang>

Output:

when using the default input

      input string:  gHHH5YY++///\
     output string:  g, HHH, 5, YY, ++, ///, \

Racket

Translation of: Python

<lang racket>#lang racket (define (split-strings-on-change s)

 (map list->string (group-by values (string->list s) char=?)))

(displayln (string-join (split-strings-on-change #<<< gHHH5YY++///\ <

                                                )
                       ", "))</lang>

Output:

g, HHH, 5, YY, ++, ///, \

REXX

<lang rexx>/*REXX program splits a string based on change of character ───► a comma delimited list.*/ parse arg str . /*obtain optional arguments from the CL*/ if str== | str=="," then str= 'gHHH5YY++///\' /*Not specified? Then use the default.*/ p=left(str, 1) /*placeholder for the "previous" string*/ $= /* " " " output " */

     do j=1  for length(str);      @=substr(str, j, 1)    /*obtain a char from string. */
     if @\==left(p, 1)   then do;  $=$',' @;  p=;  end    /*different then previous?   */
                         else      $=$ || @               /*a replicated character.    */
     p=p || @                                             /*append char to current list*/
     end   /*j*/

say ' input string: ' str say ' output string: ' $ /*stick a fork in it, we're all done. */</lang> output when using the default input:

      input string:  gHHH5YY++///\
     output string:  g, HHH, 5, YY, ++, ///, \

Ruby

<lang ruby>def split(str)

 puts " input string: #{str}"
 s = str.chars.chunk(&:itself).map{|_,a| a.join}.join(", ")
 puts "output string: #{s}"
 s

end

split("gHHH5YY++///\\")</lang>

Output:

 input string: gHHH5YY++///\
output string: g, HHH, 5, YY, ++, ///, \

Sidef

<lang ruby>func group(str) {

   gather {
       while (var match = (str =~ /((.)\g{-1}*)/g)) {
           take(match[0])
       }
   }

}

say group(ARGV[0] \\ 'gHHH5YY++///\\').join(', ')</lang>

Output:

g, HHH, 5, YY, ++, ///, \

zkl

<lang zkl>fcn group(str){

  C,out := str[0],Sink(C);
  foreach c in (str[1,*]){ out.write(if(c==C) c else String(", ",C=c)) }
  out.close();

} group("gHHH5YY++///\\").println();</lang>

Output:

g, HHH, 5, YY, ++, ///, \

@@ Line 1: / Line 1: @@
 [[Category: String manipulation]]
 [[Category:Simple]]
-{{draft task}}
+{{task}}
 <!--  this problem is also known as  "splitsville"  elsewhere.  !-->
 <!--  I imagine this Rosetta Code task will lead to quite a few code-golf solutions.  !-->