Content deleted Content added

Inline

Revision as of 18:30, 7 March 2022

Task

Split a (character) string into comma (plus a blank) delimited strings based on a change of character (left to right).

Show the output here (use the 1^st example below).

Blanks should be treated as any other character (except they are problematic to display clearly). The same applies to commas.

For instance, the string:

 gHHH5YY++///\

should be split and show:

 g, HHH, 5, YY, ++, ///, \

Other tasks related to string operations:

Metrics

Counting

Remove/replace

Anagrams/Derangements/shuffling

Find/Search/Determine

Formatting

Song lyrics/poems/Mad Libs/phrases

Tokenize

Sequences

11l

Translation of: C++

<lang 11l>F split(input, delim)

  V res = ‘’
  L(ch) input
     I !res.empty & ch != res.last
        res ‘’= delim
     res ‘’= ch
  R res

print(split(‘gHHH5YY++///\’, ‘, ’))</lang>

Output:

g, HHH, 5, YY, ++, ///, \

<lang 8080asm> org 100h jmp demo ;;; Split the string under DE on changing characters, ;;; and store the result at HL. split: ldax d ; Load character from string spcopy: mov m,a ; Store in output cpi '$' ; CP/M string terminator rz ; Stop when the end is reached mov b,a ; Store previous character in B inx d ; Increment input pointer inx h ; Increment output pointer ldax d ; Get next character cmp b ; Same as previous character? jz spcopy ; Then just copy it cpi '$' ; Otherwise, if it is the en jz spcopy ; Then just copy it as well mvi m,',' ; Otherwise, add a comma and a space inx h mvi m,' ' inx h jmp spcopy ;;; Demo code demo: lxi d,string lxi h,out call split ; Split the string lxi d,out mvi c,9 ; And print it using CP/M jmp 5 string: db 'gHHH5YY++///',5Ch,'$' out: equ $</lang>

Output:

g, HHH, 5, YY, ++, ///, \

8086 Assembly

<lang asm> cpu 8086 org 100h section .text jmp demo ;;; Split the string at DS:SI on changing characters, ;;; and store the result at ES:DI. split: lodsb ; Load character .copy: stosb ; Store in output cmp al,'$' ; Done yet? je .out ; If so, stop. mov ah,al ; Store previous character lodsb ; Get next character cmp al,ah ; Same character? je .copy ; Then just copy it cmp al,'$' ; End of string? je .copy ; Then just copy it too mov dl,al mov ax,', ' ; Otherwise, add a comma and a space stosw mov al,dl jmp .copy .out: ret ;;; Demo code demo: mov si,string mov di,buf call split ; Split the string mov dx,buf mov ah,9 int 21h ; And print the result using DOS ret section .data string: db 'gHHH5YY++///\$' section .bss buf: resb 32</lang>

Output:

g, HHH, 5, YY, ++, ///, \

AArch64 Assembly

Works with: as version Raspberry Pi 3B version Buster 64 bits

<lang AArch64 Assembly> /* ARM assembly AARCH64 Raspberry PI 3B */ /* program splitcar64.s */

/*******************************************/ /* Constantes file */ /*******************************************/ /* for this file see task include a file in language AArch64 assembly*/ .include "../includeConstantesARM64.inc"

/*********************************/ /* Initialized data */ /*********************************/ .data szCarriageReturn: .asciz "\n" szString1: .asciz "gHHH5YY++///\\" /* IMPORTANT REMARK for compiler as The way to get special characters into a string is to escape these characters: precede them with a backslash ‘\’ character. For example ‘\\’ represents one backslash: the first \ is an escape which tells as to interpret the second character literally as a backslash (which prevents as from recognizing the second \ as an escape character).

/

/*********************************/ /* UnInitialized data */ /*********************************/ .bss sBuffer: .skip 100

/*********************************/ /* code section */ /*********************************/ .text .global main main: // entry of program

   ldr x0,qAdrszString1           // input string address
   ldr x1,qAdrsBuffer             // output buffer address
   bl split 

   ldr x0,qAdrsBuffer
   bl affichageMess               // display message
   ldr x0,qAdrszCarriageReturn
   bl affichageMess

100: // standard end of the program

   mov x0,0                       // return code
   mov x8,EXIT                    // request to exit program
   svc 0                          // perform the system call

qAdrszString1: .quad szString1 qAdrszCarriageReturn: .quad szCarriageReturn qAdrsBuffer: .quad sBuffer

/******************************************************************/ /* generate value */ /******************************************************************/ /* x0 contains the address of input string */ /* x1 contains the address of output buffer */

split:

   stp x1,lr,[sp,-16]!            // save  registers
   mov x4,0                       // indice loop input string
   mov x5,0                       // indice buffer
   ldrb w2,[x0,x4]                // read first char in reg x2
   cbz x2,4f                       // if null -> end
   strb w2,[x1,x5]                // store char in buffer
   add x5,x5,1                    // increment location buffer

1:

   ldrb w3,[x0,x4]                //read char[x4] in reg x3
   cbz x3,4f                      // if null  end
   cmp x2,x3                      // compare two characters
   bne 2f
   strb w3,[x1,x5]                // = -> store char in buffer   
   b 3f                           // loop

2:

   mov x2,','                    // else store comma in buffer
   strb w2,[x1,x5]                // store char in buffer
   add x5,x5,1
   mov x2,' '                    // and store space in buffer
   strb w2,[x1,x5]
   add x5,x5,1
   strb w3,[x1,x5]               // and store input char in buffer
   mov x2,x3                     // and maj x2 with new char

3:

   add x5,x5,1                   // increment indices
   add x4,x4,1
   b 1b                          // and loop

4:

   strb w3,[x1,x5]               // store zero final in buffer

100:

   ldp x1,lr,[sp],16             // restaur  2 registers
   ret                           // return to address lr x30

/********************************************************/ /* File Include fonctions */ /********************************************************/ /* for this file see task include a file in language AArch64 assembly */ .include "../includeARM64.inc" </lang>

Output:

 gg, HHH, 5, YY, ++, ///, \

Action!

<lang Action!>PROC Split(CHAR ARRAY s)

 BYTE i
 CHAR curr,last

 i=1 last=s(1)
 Put('")
 WHILE i<=s(0)
 DO
   curr=s(i)
   IF curr#last THEN
     Print(", ")
   FI
   Put(curr)
   last=curr
   i==+1
 OD
 Put('")

RETURN

PROC Test(CHAR ARRAY s)

 PrintF("Input: ""%S""%E",s)
 Print("Split: ") Split(s)
 PutE() PutE()

RETURN

PROC Main()

 Test("gHHH5YY++///\")
 Test("gHHH   5++,,,///\")

RETURN</lang>

Output:

Screenshot from Atari 8-bit computer

Input: "gHHH5YY++///\"
Split: "g, HHH, 5, YY, ++, ///, \"

Input: "gHHH   5++,,,///\"
Split: "g, HHH,    , 5, ++, ,,,, ///, \"

Ada

<lang ada> with Ada.Text_IO; procedure Split is

 procedure Print_Tokens (s : String) is
   i, j : Integer := s'First;
 begin
   loop
     while j<=s'Last and then s(j)=s(i) loop j := j + 1; end loop;
     if i/=s'first then Ada.Text_IO.Put (", "); end if;
     Ada.Text_IO.Put (s(i..j-1));
     i := j;
     exit when j>s'last;
   end loop;
 end Print_Tokens;

begin

 Print_Tokens ("gHHH5YY+++");

end split; </lang>

ALGOL 68

Works with: ALGOL 68G version Any - tested with release 2.8.3.win32

<lang algol68>BEGIN

   # returns s with ", " added between each change of character #
   PROC split on characters = ( STRING s )STRING:
        IF s = "" THEN
           # empty string #
           ""
        ELSE
           # allow for 3 times as many characters as in the string #
           # this would handle a string of unique characters       #
           [ 3 * ( ( UPB s - LWB s ) + 1 ) ]CHAR result;
           INT  r pos  := LWB result;
           INT  s pos  := LWB s;
           CHAR s char := s[ LWB s ];
           FOR s pos FROM LWB s TO UPB s DO
               IF s char /= s[ s pos ] THEN
                   # change of character - insert ", " #
                   result[ r pos     ] := ",";
                   result[ r pos + 1 ] := " ";
                   r pos +:= 2;
                   s char := s[ s pos ]
               FI;
               result[ r pos ] := s[ s pos ];
               r pos +:= 1
           OD;
           # return the used portion of the result #
           result[ 1 : r pos - 1 ]
        FI ; # split on characters #

   print( ( split on characters( "gHHH5YY++///\" ), newline ) )

END</lang>

Output:

g, HHH, 5, YY, ++, ///, \

ANSI BASIC

<lang ansibasic>REM >split DECLARE EXTERNAL FUNCTION FN_split$

PRINT FN_split$( "gHHH5YY++///\" ) END

EXTERNAL FUNCTION FN_split$( s$ ) LET c$ = s$(1:1) LET split$ = "" FOR i = 1 TO LEN(s$)

 LET d$ = s$(i:i)
 IF d$ <> c$ THEN
   LET split$ = split$ & ", "
   LET c$ = d$
 END IF
 LET split$ = split$ & d$

NEXT i LET FN_split$ = split$ END FUNCTION</lang>

Output:

g, HHH, 5, YY, ++, ///, \

APL

Works with: Dyalog APL

<lang APL>split ← 2↓∘∊(⊂', '),¨(⊢≠¯1⌽⊢)⊂⊢</lang>

Output:

      split 'gHHH5YY++///\'
g, HHH, 5, YY, ++, ///, \

AppleScript

Functional

Translation of: JavaScript

<lang AppleScript>intercalate(", ", ¬

   map(curry(intercalate)'s |λ|(""), ¬
       group("gHHH5YY++///\\")))

--> "g, HHH, 5, YY, ++, ///, \\"

-- GENERIC FUNCTIONS ---------------------------------------------------------- -- curry :: (Script|Handler) -> Script on curry(f)

   script
       on |λ|(a)
           script
               on |λ|(b)
                   |λ|(a, b) of mReturn(f)
               end |λ|
           end script
       end |λ|
   end script

end curry

-- foldl :: (a -> b -> a) -> a -> [b] -> a on foldl(f, startValue, xs)

   tell mReturn(f)
       set v to startValue
       set lng to length of xs
       repeat with i from 1 to lng
           set v to |λ|(v, item i of xs, i, xs)
       end repeat
       return v
   end tell

end foldl

-- group :: Eq a => [a] -> a on group(xs)

   script eq
       on |λ|(a, b)
           a = b
       end |λ|
   end script
   
   groupBy(eq, xs)

end group

-- groupBy :: (a -> a -> Bool) -> [a] -> a on groupBy(f, xs)

   set mf to mReturn(f)
   
   script enGroup
       on |λ|(a, x)
           if length of (active of a) > 0 then
               set h to item 1 of active of a
           else
               set h to missing value
           end if
           
           if h is not missing value and mf's |λ|(h, x) then
               {active:(active of a) & x, sofar:sofar of a}
           else
               {active:{x}, sofar:(sofar of a) & {active of a}}
           end if
       end |λ|
   end script
   
   if length of xs > 0 then
       tell foldl(enGroup, {active:{item 1 of xs}, sofar:{}}, tail(xs))
           if length of (its active) > 0 then
               its sofar & its active
           else
               {}
           end if
       end tell
   else
       {}
   end if

end groupBy

-- intercalate :: Text -> [Text] -> Text on intercalate(strText, lstText)

   set {dlm, my text item delimiters} to {my text item delimiters, strText}
   set strJoined to lstText as text
   set my text item delimiters to dlm
   return strJoined

end intercalate

-- map :: (a -> b) -> [a] -> [b] on map(f, xs)

   tell mReturn(f)
       set lng to length of xs
       set lst to {}
       repeat with i from 1 to lng
           set end of lst to |λ|(item i of xs, i, xs)
       end repeat
       return lst
   end tell

end map

-- Lift 2nd class handler function into 1st class script wrapper -- mReturn :: Handler -> Script on mReturn(f)

   if class of f is script then
       f
   else
       script
           property |λ| : f
       end script
   end if

end mReturn

-- tail :: [a] -> [a] on tail(xs)

   if length of xs > 1 then
       items 2 thru -1 of xs
   else
       {}
   end if

end tail</lang>

Output:

g, HHH, 5, YY, ++, ///, \

Straightforward

(Also case-sensitve.) <lang applescript>on splitAtCharacterChanges(input)

   set len to (count input)
   if (len < 2) then return input
   set chrs to input's characters
   set currentChr to beginning of chrs
   considering case
       repeat with i from 2 to len
           set thisChr to item i of chrs
           if (thisChr is not currentChr) then
               set item i of chrs to ", " & thisChr
               set currentChr to thisChr
           end if
       end repeat
   end considering
   set astid to AppleScript's text item delimiters
   set AppleScript's text item delimiters to ""
   set output to chrs as text
   set AppleScript's text item delimiters to astid
   
   return output

end splitAtCharacterChanges

-- Test code: splitAtCharacterChanges("gHHH5YY++///\\")</lang>

Output:

ASObjC

<lang applescript>use AppleScript version "2.4" -- OS X 10.10 (Yosemite) or later use framework "Foundation"

on splitAtCharacterChanges(input)

   tell (current application's class "NSMutableString"'s stringWithString:(input)) to ¬
       return (its stringByReplacingOccurrencesOfString:("(.)\\1*+(?!$)") withString:("$0, ") ¬
           options:(current application's NSRegularExpressionSearch) range:({0, its |length|()})) as text

end splitAtCharacterChanges

-- Test code: splitAtCharacterChanges("gHHH5YY++///\\")</lang>

Output:

ARM Assembly

Works with: as version Raspberry Pi

<lang ARM Assembly> /* ARM assembly Raspberry PI */ /* program splitcar.s */

/************************************/ /* Constantes */ /************************************/ .equ STDOUT, 1 @ Linux output console .equ EXIT, 1 @ Linux syscall .equ WRITE, 4 @ Linux syscall

/*********************************/ /* Initialized data */ /*********************************/ .data szCarriageReturn: .asciz "\n" szString1: .asciz "gHHH5YY++///\\" /* IMPORTANT REMARK for compiler as The way to get special characters into a string is to escape these characters: precede them with a backslash ‘\’ character. For example ‘\\’ represents one backslash: the first \ is an escape which tells as to interpret the second character literally as a backslash (which prevents as from recognizing the second \ as an escape character).

/

/*********************************/ /* UnInitialized data */ /*********************************/ .bss sBuffer: .skip 100

/*********************************/ /* code section */ /*********************************/ .text .global main main: @ entry of program

   ldr r0,iAdrszString1                          @ input string address
   ldr r1,iAdrsBuffer                            @ output buffer address
   bl split

   ldr r0,iAdrsBuffer
   bl affichageMess                              @ display message
   ldr r0,iAdrszCarriageReturn
   bl affichageMess

100: @ standard end of the program

   mov r0, #0                                    @ return code
   mov r7, #EXIT                                 @ request to exit program
   svc #0                                        @ perform the system call

iAdrszString1: .int szString1 iAdrszCarriageReturn: .int szCarriageReturn iAdrsBuffer: .int sBuffer

/******************************************************************/ /* generate value */ /******************************************************************/ /* r0 contains the address of input string */ /* r1 contains the address of output buffer */

split:

   push {r1-r5,lr}                           @ save registers
   mov r4,#0                                 @ indice loop input string
   mov r5,#0                                 @ indice buffer
   ldrb r2,[r0,r4]                           @ read first char in reg r2
   cmp r2,#0                                 @ if null -> end
   beq 3f
   strb r2,[r1,r5]                           @ store char in buffer
   add r5,#1                                 @ increment location buffer

1:

   ldrb r3,[r0,r4]                           @read char[r4] in reg r3
   cmp r3,#0                                 @ if null  end
   beq 3f
   cmp r2,r3                                 @ compare two characters
   streqb r3,[r1,r5]                         @ = -> store char in buffer   
   beq 2f                                    @ loop

   mov r2,#','                               @ else store comma in buffer
   strb r2,[r1,r5]                           @ store char in buffer
   add r5,#1
   mov r2,#' '                               @ and store space in buffer
   strb r2,[r1,r5]
   add r5,#1
   strb r3,[r1,r5]                           @ and store input char in buffer
   mov r2,r3                                 @ and maj r2 with new char

2:

   add r5,#1                                 @ increment indices
   add r4,#1
   b 1b                                      @ and loop

3:

   strb r3,[r1,r5]                           @ store zero final in buffer

100:

   pop {r1-r5,lr}
   bx lr                                     @ return

/******************************************************************/ /* display text with size calculation */ /******************************************************************/ /* r0 contains the address of the message */ affichageMess:

   push {r0,r1,r2,r7,lr}                          @ save  registres
   mov r2,#0                                      @ counter length

1: @ loop length calculation

   ldrb r1,[r0,r2]                                @ read octet start position + index 
   cmp r1,#0                                      @ if 0 its over 
   addne r2,r2,#1                                 @ else add 1 in the length 
   bne 1b                                         @ and loop 
                                                  @ so here r2 contains the length of the message 
   mov r1,r0                                      @ address message in r1 
   mov r0,#STDOUT                                 @ code to write to the standard output Linux 
   mov r7, #WRITE                                 @ code call system "write" 
   svc #0                                         @ call systeme 
   pop {r0,r1,r2,r7,lr}                           @ restaur des  2 registres */ 
   bx lr                                          @ return

output : gg, HHH, 5, YY, ++, ///, \ </lang>

Arturo

<lang rebol>parts: [] current: "" loop split {gHHH5YY++///\} 'ch [

   if? or? empty? current
           contains? current ch -> 'current ++ ch
   else [
       'parts ++ current
       current: new ch
   ]

] 'parts ++ current print parts</lang>

Output:

g HHH 5 YY ++ /// \

AutoHotkey

<lang AutoHotkey>Split_Change(str){ for i, v in StrSplit(str) res .= (v=prev) ? v : (res?", " :"") v , prev := v return res }</lang> Examples:<lang AutoHotkey>str := "gHHH5YY++///\" MsgBox % Split_Change(str)</lang>

Outputs:

g, HHH, 5, YY, ++, ///, \

RegEx Version

<lang AutoHotkey>Split_Change(str){ return RegExReplace(str, "(.)\1*(?!$)", "$0, ") }</lang> Examples:<lang AutoHotkey>str := "gHHH5YY++///\" MsgBox % Split_Change(str)</lang>

Outputs:

g, HHH, 5, YY, ++, ///, \

AWK

syntax: GAWK -f SPLIT_A_CHARACTER_STRING_BASED_ON_CHANGE_OF_CHARACTER.AWK

BEGIN {

   str = "gHHH5YY++///\\"
   printf("old: %s\n",str)
   printf("new: %s\n",split_on_change(str))
   exit(0)

} function split_on_change(str, c,i,new_str) {

   new_str = substr(str,1,1)
   for (i=2; i<=length(str); i++) {
     c = substr(str,i,1)
     if (substr(str,i-1,1) != c) {
       new_str = new_str ", "
     }
     new_str = new_str c
   }
   return(new_str)

} </lang>

Output:

old: gHHH5YY++///\
new: g, HHH, 5, YY, ++, ///, \

BaCon

Literal strings in BaCon are passed to the C compiler as they are; a backslash therefore needs to be escaped. <lang freebasic>txt$ = "gHHH5YY++///\\"

c$ = LEFT$(txt$, 1)

FOR x = 1 TO LEN(txt$)

   d$ = MID$(txt$, x, 1)
   IF d$ <> c$ THEN
       PRINT ", ";
       c$ = d$
   END IF
   PRINT d$;

NEXT</lang>

Output:

g, HHH, 5, YY, ++, ///, \

BASIC256

<lang freebasic>function split$(instring$)

   if length(instring$) < 2 then return instring$
   ret$ = left(instring$,1)
       for i = 2 to length(instring$)

if mid(instring$,i,1) <> mid(instring$, i-1, 1) then ret$ += ", " ret$ += mid(instring$, i, 1) next i

   return ret$

end function

print split$("gHHH5YY++///\")</lang>

BBC BASIC

<lang bbcbasic>REM >split PRINT FN_split( "gHHH5YY++///\" ) END

DEF FN_split( s$ ) LOCAL c$, split$, d$, i% c$ = LEFT$( s$, 1 ) split$ = "" FOR i% = 1 TO LEN s$

 LET d$ = MID$( s$, i%, 1 )
 IF d$ <> c$ THEN
   split$ += ", "
   c$ = d$
 ENDIF
 split$ += d$

NEXT = split$</lang>

Output:

g, HHH, 5, YY, ++, ///, \

C

<lang c>#include <stdio.h>

include <stdlib.h>
include <string.h>

char *split(char *str); int main(int argc,char **argv) { char input[13]="gHHH5YY++///\\"; printf("%s\n",split(input)); } char *split(char *str) { char last=*str,*result=malloc(3*strlen(str)),*counter=result; for (char *c=str;*c;c++) { if (*c!=last) { strcpy(counter,", "); counter+=2; last=*c; } *counter=*c; counter++; } *(counter--)='\0'; return realloc(result,strlen(result)); }</lang>

Output:

g, HHH, 5, YY, ++, ///, \

C#

<lang csharp>using System; using System.Linq; using System.Collections.Generic;

public class Program {

   string s = @"gHHH5YY++///\";
   Console.WriteLine(s.RunLengthSplit().Delimit(", "));

}

public static class Extensions {

   public static IEnumerable<string> RunLengthSplit(this string source) {
       using (var enumerator = source.GetEnumerator()) {
           if (!enumerator.MoveNext()) yield break;
           char previous = enumerator.Current;
           int count = 1;
           while (enumerator.MoveNext()) {
               if (previous == enumerator.Current) {
                   count++;
               } else {
                   yield return new string(Enumerable.Repeat(previous, count).ToArray());
                   previous = enumerator.Current;
                   count = 1;
               }
           }
           yield return new string(Enumerable.Repeat(previous, count).ToArray());
       }
   }

   public static string Delimit<T>(this IEnumerable<T> source, string separator = "") => string.Join(separator ?? "", source);

}</lang>

Output:

g, HHH, 5, YY, ++, ///, \

C++

<lang cpp> // Solution for http://rosettacode.org/wiki/Split_a_character_string_based_on_change_of_character

include<string>
include<iostream>

auto split(const std::string& input, const std::string& delim){ std::string res; for(auto ch : input){ if(!res.empty() && ch != res.back()) res += delim; res += ch; } return res; }

int main(){ std::cout << split("gHHH5 ))YY++,,,///\\", ", ") << std::endl; }</lang>

Output:

g, HHH, 5,   , )), YY, ++, ,,,, ///, \

Clojure

<lang clojure>(defn print-cchanges [s]

 (println (clojure.string/join ", " (map first (re-seq #"(.)\1*" s)))))

(print-cchanges "gHHH5YY++///\\") </lang>

Output:

g, HHH, 5, YY, ++, ///, \

CLU

<lang clu>% Split a string based on a change of character split_on_change = iter (s: string) yields (string)

   part: string := ""
   for c: char in string$chars(s) do
       if ~string$empty(part) 
       cand part[string$size(part)] ~= c then 
           yield(part)
           part := ""
       end
       part := part || string$c2s(c)
   end
   yield(part)

end split_on_change

start_up = proc ()

   po: stream := stream$primary_output()
   str: string := "gHHH5YYY++///\\" % \\ escapes, as in C
   rslt: string := ""
   first: bool := true
   
   for part: string in split_on_change(str) do
       if first then first := false
       else rslt := rslt || ", "
       end
       rslt := rslt || part
   end
   stream$putl(po, rslt)

end start_up</lang>

Output:

g, HHH, 5, YYY, ++, ///, \

COBOL

      identification division.
      program-id. split-ch.
      data division.
      1 split-str pic x(30) value space.
      88 str-1 value "gHHH5YY++///\".
      88 str-2 value "gHHH5  ))YY++,,,///\".
      1 binary.
       2 ptr pic 9(4) value 1.
       2 str-start pic 9(4) value 1.
       2 delim-len pic 9(4) value 1.
       2 split-str-len pic 9(4) value 0.
       2 trash-9 pic 9(4) value 0.
      1 delim-char pic x value space.
      1 delim-str pic x(6) value space.
      1 trash-x pic x.
      procedure division.
          display "Requested string"
          set str-1 to true
          perform split-init-and-go
          display space
          display "With spaces and commas"
          set str-2 to true
          perform split-init-and-go
          stop run
          .

      split-init-and-go.
          move 1 to ptr
          move 0 to split-str-len
          perform split
          .

      split.
          perform get-split-str-len
          display split-str (1:split-str-len)
          perform until ptr > split-str-len
              move ptr to str-start
              move split-str (ptr:1) to delim-char
              unstring split-str (1:split-str-len)
                  delimited all delim-char
                  into trash-x delimiter delim-str
                  pointer ptr
              end-unstring
              subtract str-start from ptr giving delim-len
              move split-str (str-start:delim-len)
                  to delim-str (1:delim-len)
              display delim-str (1:delim-len) with no advancing
              if ptr <= split-str-len
                  display ", " with no advancing
              end-if
          end-perform
          display space
          .

      get-split-str-len.
          inspect function reverse (split-str) tallying
              trash-9 for leading space
              split-str-len for characters after space
          .

      end program split-ch.

</lang>

Output:

Requested string
gHHH5YY++///\
g, HHH, 5, YY, ++, ///, \

With spaces and commas
gHHH5  ))YY++,,,///\
g, HHH, 5,   , )), YY, ++, ,,,, ///, \

Common Lisp

<lang lisp>(defun split (string)

 (loop :for prev := nil :then c
    :for c :across string
    :do (format t "~:[~;, ~]~c" (and prev (char/= c prev)) c)))

(split "gHHH5YY++///\\") </lang>

Output:

g, HHH, 5, YY, ++, ///, \

Doing more work that what's being ask, the following solution builds a list of strings then output it:

<lang lisp>(defun split (string)

 (flet ((make-buffer ()
          (make-array 0 :element-type 'character :adjustable t :fill-pointer t)))
   (loop with buffer = (make-buffer)
         with result
         for prev = nil then c
         for c across string
         when (and prev (char/= c prev))
           do (push buffer result)
              (setf buffer (make-buffer))
         do (vector-push-extend c buffer)
         finally (push buffer result)
                 (format t "~{~A~^, ~}"(nreverse result)))))

(split "gHHH5YY++///\\")</lang>

Output:

g, HHH, 5, YY, ++, ///, \

Cowgol

<lang cowgol>include "cowgol.coh";

sub split(in: [uint8], buf: [uint8]): (out: [uint8]) is

   out := buf;
   loop
       [buf] := [in];
       if [in] == 0 then break; end if;
       if [in] != [@next in] and [@next in] != 0 then
           [buf+1] := ',';
           [buf+2] := ' ';
           buf := buf+2;
       end if;
       buf := buf+1;
       in := in+1;
   end loop;

end sub;

var buf: uint8[32];

print(split("gHHH5YY++//\\", &buf[0])); print_nl();</lang>

Output:

g, HHH, 5, YY, ++, //, \

D

<lang D>import std.stdio;

void main() {

   auto source = "gHHH5YY++///\\";

   char prev = source[0];
   foreach(ch; source) {
       if (prev != ch) {
           prev = ch;
           write(", ");
       }
       write(ch);
   }
   writeln();

}</lang>

Output:

g, HHH, 5, YY, ++, ///, \

Dyalect

<lang dyalect>func String.smartSplit() {

   var c
   var str = ""
   var last = this.len() - 1

   for n in 0..last {
       if c && this[n] != c {
           str += ", "
       }
       c = this[n]
       str += c
   }

str

}

print("gHHH5YY++///\\".smartSplit())</lang>

Output:

g, HHH, 5, YY, ++, ///, \

EasyLang

<lang>a$ = "gHHH5YY++///\\" a$[] = strchars a$ cp$ = a$[0] for c$ in a$[]

 if c$ <> cp$
   s$ &= ", "
   cp$ = c$
 .
 s$ &= c$

. print s$</lang>

Output:

g, HHH, 5, YY, ++, ///, \

Elixir

<lang elixir>split = fn str ->

         IO.puts " input string: #{str}"
         String.graphemes(str)
         |> Enum.chunk_by(&(&1))
         |> Enum.map_join(", ", &Enum.join &1)
         |> fn s -> IO.puts "output string: #{s}" end.()
       end

split.("gHHH5YY++///\\")</lang>

Output:

 input string: gHHH5YY++///\
output string: g, HHH, 5, YY, ++, ///, \

F#

<lang fsharp>open System.Text.RegularExpressions let splitRuns s = Regex("""(.)\1*""").Matches(s) |> Seq.cast<Match> |> Seq.map (fun m -> m.Value) |> Seq.toList printfn "%A" (splitRuns """gHHH5YY++///\""")</lang>

Output:

["g"; "HHH"; "5"; "YY"; "++"; "///"; "\"]

Factor

<lang factor>USE: splitting.monotonic "gHHH5YY++///\\" "aaabbccccdeeff" [ [ = ] monotonic-split ", " join print ] bi@</lang>

Output:

g, HHH, 5, YY, ++, ///, \
aaa, bb, cccc, d, ee, ff

Forth

Works with: Gforth version 0.7.3

<lang Forth>CREATE A 0 ,

C@A+ A @ C@ [ 1 CHARS ]L A +! ;

SPLIT. ( c-addr u --) SWAP A ! A @ C@

  BEGIN OVER WHILE
    C@A+  TUCK  <> IF ." , " THEN   
    DUP EMIT  SWAP 1- SWAP
  REPEAT  DROP ;

TEST OVER OVER

  ." input: " TYPE CR
  ." split: " SPLIT. CR ;

s" gHHH5YY++///\" TEST s" gHHH5 ))YY++,,,///\" TEST BYE</lang>

Output:

input: gHHH5YY++///\
split: g, HHH, 5, YY, ++, ///, \
input: gHHH5  ))YY++,,,///\
split: g, HHH, 5,   , )), YY, ++, ,,,, ///, \

Fortran

This is F77 style, except for the END SUBROUTINE SPLATTER which would be just END, which for F90 is also allowable outside of the MODULE protocol. Linking the start/stop markers by giving the same name is helpful, especially when the compiler checks for this. The $ symbol at the end of a FORMAT code sequence is a common F77 extension, meaning "do not finish the line" so that a later output will follow on. This is acceptable to F90 and is less blather than adding the term ,ADVANCE = "NO" inside a WRITE statement that would otherwise be required. Output is to I/O unit 6 which is the modern default for "standard output". The format code is A meaning "any number of characters" rather than A1 for "one character" so as to accommodate not just the single character from TEXT but also the two characters of ", " for the splitter between sequences. Alas, there is no provision to change fount or colour for this, to facilitate the reader's attempts to parse the resulting list especially when the text includes commas or spaces of its own. By contrast, with quoted strings, the standard protocol is to double contained quotes.

An alternative method would be to prepare the entire output in a CHARACTER variable then write that, but this means answering the maddening question "how long is a piece of string?" for that variable, though later Fortran has arrangements whereby a text variable is resized to suit on every assignment, as in TEMP = TEMP // more - but this means repeatedly copying the text to the new manifestation of the variable. Still another approach would be to prepare an array of fingers to each split point (as in Phrase_reversals#Fortran) so that the final output would be a single WRITE using that array, and again, how big must the array be? At most, as big as the number of characters in TEXT. With F90, subroutines can declare arrays of a size determined on entry, with something like INTEGER A(LEN(TEXT))

If the problem were to be solved by writing a "main line" only, there would have to be a declaration of the text variable there but since a subroutine can receive a CHARACTER variable of any size (the actual size is passed as a secret parameter), this can be dodged.

For this example a DO-loop stepping along the text is convenient, but in a larger context it would probably be most useful to work along the text with fingers L1 and L2 marking the start and finish positions of each sequence. <lang Fortran> SUBROUTINE SPLATTER(TEXT) !Print a comma-separated list. Repeated characters constitute one item. Can't display the inserted commas in a different colour so as not to look like any commas in TEXT.

      CHARACTER*(*) TEXT	!The text.
      INTEGER L	!A finger.
      CHARACTER*1 C	!A state follower.
       IF (LEN(TEXT).LE.0) RETURN	!Prevent surprises in the following..
       C = TEXT(1:1)			!Syncopation: what went before.
       DO L = 1,LEN(TEXT)	!Step through the text.
         IF (C.NE.TEXT(L:L)) THEN	!A change of character?
           C = TEXT(L:L)			!Yes. This is the new normal.
           WRITE (6,1) ", "			!Set off from what went before. This is not from TEXT.
         END IF			!So much for changes.
         WRITE (6,1) C			!Roll the current character. (=TEXT(L:L))
   1     FORMAT (A,$)			!The $ sez: do not end the line.
       END DO			!On to the next character.
       WRITE (6,1)	!Thus end the line. No output item means that the $ is not reached, so the line is ended.
     END SUBROUTINE SPLATTER	!TEXT with spaces, or worse, commas, will produce an odd-looking list.

     PROGRAM POKE
     CALL SPLATTER("gHHH5YY++///\")	!The example given.
     END</lang>

Unfortunately, the syntax highlighter has failed to notice the terminating quote character, presumably because the preceding backslash might be an "escape sequence" trigger, a facility not used in Fortran text literals except possibly as a later modernist option.

Output:

g, HHH, 5, YY, ++, ///, \

FreeBASIC

<lang freebasic>function split( instring as string ) as string

   if len(instring) < 2 then return instring
   dim as string ret = left(instring,1)
   for i as uinteger = 2 to len(instring)
       if mid(instring,i,1)<>mid(instring, i - 1, 1) then ret + = ", "
       ret += mid(instring, i, 1)
   next i
   return ret

end function</lang>

Go

Treating "character" as a byte: <lang go>package main

import (

   "fmt"
   "strings"

)

func main() {

   fmt.Println(scc(`gHHH5YY++///\`))

}

func scc(s string) string {

   if len(s) < 2 {
       return s
   }
   var b strings.Builder
   p := s[0]
   b.WriteByte(p)
   for _, c := range []byte(s[1:]) {
       if c != p {
           b.WriteString(", ")
       }
       b.WriteByte(c)
       p = c
   }
   return b.String()

}</lang>

Output:

g, HHH, 5, YY, ++, ///, \

Haskell

<lang Haskell>import Data.List (group, intercalate)

main :: IO () main = putStrLn $ intercalate ", " (group "gHHH5YY++///\\")</lang>

Output:

g, HHH, 5, YY, ++, ///, \

or as a hand-written fold: <lang haskell>import Data.List (intercalate) import Data.Bool (bool)

charGroups :: String -> [String] charGroups =

 let go (a, b) (s, groups)
       | a == b = (b : s, groups)
       | otherwise =
           ( [a],
             bool s [b] (null s) : groups
           )
  in uncurry (:) . foldr go ([], []) . (zip <*> tail)

main :: IO () main =

 putStrLn $ intercalate ", " $ charGroups "gHHH5YY++///\\"</lang>

g, HHH, 5, YY, ++, ///, \

or in terms of span: <lang haskell>import Data.List (intercalate)

charGroups :: String -> [String] charGroups [] = [] charGroups (c : cs) =

 let (xs, ys) = span (c ==) cs
  in (c : xs) : charGroups ys

main :: IO () main =

 putStrLn $ intercalate ", " $ charGroups "gHHH5YY++///\\"</lang>

Output:

g, HHH, 5, YY, ++, ///, \

IS-BASIC

<lang IS-BASIC>100 LET S$="gHHH5YY++///\" 110 PRINT S$(1); 120 FOR I=2 TO LEN(S$) 130 IF S$(I)<>S$(I-1) THEN PRINT ", "; 140 PRINT S$(I); 150 NEXT 160 PRINT</lang>

J

Solution: <lang j>splitChars=: (1 ,~ 2 ~:/\ ]) <;.2 ] delimitChars=: ', ' joinstring splitChars</lang> Example Usage: <lang j> delimitChars 'gHHH5YY++///\' g, HHH, 5, YY, ++, ///, \</lang>

Java

<lang Java>package org.rosettacode;

import java.util.ArrayList; import java.util.List;

/**

* This class provides a main method that will, for each arg provided,
* transform a String into a list of sub-strings, where each contiguous
* series of characters is made into a String, then the next, and so on,
* and then it will output them all separated by a comma and a space.
*/

public class SplitStringByCharacterChange {

   public static void main(String... args){
       for (String string : args){
           
           List<String> resultStrings = splitStringByCharacter(string);
           String output = formatList(resultStrings);
           System.out.println(output);
       }
   }
   
   /**
    * @param string String - String to split
    * @return List<\String> - substrings of contiguous characters
    */
   public static List<String> splitStringByCharacter(String string){
       
       List<String> resultStrings = new ArrayList<>();
       StringBuilder currentString = new StringBuilder();
       
       for (int pointer = 0; pointer < string.length(); pointer++){
           
           currentString.append(string.charAt(pointer));
           
           if (pointer == string.length() - 1 
                   || currentString.charAt(0) != string.charAt(pointer + 1)) {
               resultStrings.add(currentString.toString());
               currentString = new StringBuilder();
           }
       }
       
       return resultStrings;
   }
   
   /**
    * @param list List<\String> - list of strings to format as a comma+space-delimited string
    * @return String
    */
   public static String formatList(List<String> list){
       
       StringBuilder output = new StringBuilder();
       
       for (int pointer = 0; pointer < list.size(); pointer++){
           output.append(list.get(pointer));
           
           if (pointer != list.size() - 1){
               output.append(", ");
           }
       }
       
       return output.toString();
   }

}</lang>

Output:

g, HHH, 5, YY, ++, ///, \

JavaScript

ES6

Translation of: Haskell

<lang JavaScript>(() => {

   "use strict";

   // ----------- SPLIT ON CHARACTER CHANGES ------------
   const main = () =>
       group("gHHH5YY++///\\")
       .map(x => x.join(""))
       .join(", ");

   // --------------------- GENERIC ---------------------

   // group :: [a] -> a
   const group = xs =>
       // A list of lists, each containing only
       // elements equal under (===), such that the
       // concatenation of these lists is xs.
       groupBy(a => b => a === b)(xs);

   // groupBy :: (a -> a -> Bool) [a] -> a
   const groupBy = eqOp =>
       // A list of lists, each containing only elements
       // equal under the given equality operator,
       // such that the concatenation of these lists is xs.
       xs => 0 < xs.length ? (() => {
           const [h, ...t] = xs;
           const [groups, g] = t.reduce(
               ([gs, a], x) => eqOp(x)(a[0]) ? (
                   Tuple(gs)([...a, x])
               ) : Tuple([...gs, a])([x]),
               Tuple([])([h])
           );

           return [...groups, g];
       })() : [];

   // Tuple (,) :: a -> b -> (a, b)
   const Tuple = a =>
       b => ({
           type: "Tuple",
           "0": a,
           "1": b,
           length: 2,
           *[Symbol.iterator]() {
               for (const k in this) {
                   if (!isNaN(k)) {
                       yield this[k];
                   }
               }
           }
       });

   // MAIN ---
   return main();

})();</lang>

Output:

g, HHH, 5, YY, ++, ///, \

Or, in terms of a general `span` function: <lang javascript>(() => {

   "use strict";

   // -------- STRING SPLIT ON CHARACTER CHANGES --------

   // charGroups :: String -> [String]
   const charGroups = s =>
       // The characters of s split at each point where
       // consecutive characters differ.
       0 < s.length ? (() => {
           const
               c = s[0],
               [xs, ys] = span(x => c === x)([
                   ...s.slice(1)
               ]);

           return [
                   [c, ...xs], ...charGroups(ys)
               ]
               .map(zs => [...zs].join(""));
       })() : "";

   // ---------------------- TEST -----------------------
   // main :: IO()
   const main = () =>
       charGroups("gHHH5YY++///\\")
       .join(", ");

   // --------------------- GENERIC ---------------------

   // span :: (a -> Bool) -> [a] -> ([a], [a])
   const span = p =>
       // Longest prefix of xs consisting of elements which
       // all satisfy p, tupled with the remainder of xs.
       xs => {
           const i = xs.findIndex(x => !p(x));

           return -1 !== i ? (
               [xs.slice(0, i),
                   xs.slice(i)
               ]
           ) : [xs, []];
       };

   // MAIN ---
   return main();

})();</lang>

Output:

g, HHH, 5, YY, ++, ///, \

jq

<lang jq># input: a string

output: a stream of runs

def runs:

 def init:
   explode as $s
   | $s[0] as $i
   | (1 | until( $s[.] != $i; .+1));
 if length == 0 then empty
 elif length == 1 then .
 else init as $n | .[0:$n], (.[$n:] | runs)
 end;

"gHHH5YY++///\\" | [runs] | join(", ")</lang>

Output:

Using the -r ("raw output") command-line option of jq:

g, HHH, 5, YY, ++, ///, \

Jsish

Showing off a little unit testing...

Starting with <lang javascript>#!/usr/bin/env jsish

'Split a string based on change of character, in Jsish';

function splitOnChange(str:string):string {

   if (str.length < 2) return str;
   var last = str[0];
   var result = last;
   for (var pos = 1; pos < str.length; pos++) {
       result += ((last == str[pos]) ? last : ', ' + str[pos]);
       last = str[pos];
   }
   return result;

} provide('splitOnChange', 1.0);

/* literal backslash needs escaping during initial processing */

splitOnChange('gHHH5YY++///\\');
splitOnChange('a');
splitOnChange('ab');
splitOnChange('aaa');
splitOnChange('aaaba');
splitOnChange('gH HH5YY++//,/\\');</lang>

Then

prompt$ jsish -u -update true splitOnChange.jsi
Created splitOnChange.jsi

Giving

<lang javascript>#!/usr/bin/env jsish

'Split a string based on change of character, in Jsish';

function splitOnChange(str:string):string {

   if (str.length < 2) return str;
   var last = str[0];
   var result = last;
   for (var pos = 1; pos < str.length; pos++) {
       (last == str[pos]) ? result += last : result += ', ' + str[pos];
       last = str[pos];
   }
   return result;

} provide('splitOnChange', 1.0);

/* literal backslash needs escaping during initial processing */

splitOnChange('gHHH5YY++///\\');
splitOnChange('a');
splitOnChange('ab');
splitOnChange('aaa');
splitOnChange('aaaba');
splitOnChange('gH HH5YY++//,/\\');

/*

!EXPECTSTART!

'Split a string based on change of character, in Jsish' splitOnChange('gHHH5YY++///\') ==> g, HHH, 5, YY, ++, ///, \ splitOnChange('a') ==> a splitOnChange('ab') ==> a, b splitOnChange('aaa') ==> aaa splitOnChange('aaaba') ==> aaa, b, a splitOnChange('gH HH5YY++//,/\') ==> g, H, , HH, 5, YY, ++, //, ,, /, \

!EXPECTEND!

/</lang>

Which tests as:

prompt$ jsish -u splitOnChange.jsi
[PASS] splitOnChange.jsi

And then satisfying the task of showing the one result, using the script as a module:

Output:

prompt$ jsish
Jsish interactive: see 'help [cmd]'.  \ cancels > input.  ctrl-c aborts running script.
# require('splitOnChange');
1
# puts(splitOnChange('gHHH5YY++///\\'));
g, HHH, 5, YY, ++, ///, \

Julia

<lang julia># v0.6 using IterTools

str = "gHHH5YY++///\\" sep = map(join, groupby(identity, str)) println("string: $str\nseparated: ", join(sep, ", "))</lang>

Output:

string: gHHH5YY++///\
separated: g, HHH, 5, YY, ++, ///, \

Kotlin

<lang scala>// version 1.0.6

fun splitOnChange(s: String): String {

   if (s.length < 2) return s
   var t = s.take(1)  
   for (i in 1 until s.length)
       if (t.last() == s[i]) t += s[i]
       else t += ", " + s[i] 
   return t

}

fun main(args: Array<String>) {

   val s = """gHHH5YY++///\"""
   println(splitOnChange(s))

}</lang>

Output:

g, HHH, 5, YY, ++, ///, \

Lambdatalk

<lang scheme> {def mysplit

{def mysplit.r
 {lambda {:w :i}
  {if {> :i {W.length :w}}
   then 
   else {if {not {W.equal? {W.get :i :w} {W.get {+ :i 1} :w}}}
         then ____ else} {W.get {+ :i 1} :w}{mysplit.r :w {+ :i 1}}}}}
{lambda {:w}
 {S.replace ____ by in {mysplit.r #:w 0}}}}

-> mysplit

{mysplit gHHH5YY++///\} -> g HHH 5 YY ++ /// \ </lang>

Lua

Note that the backslash must be quoted as a double backslash as Lua uses C-like escape sequences. <lang Lua>function charSplit (inStr)

   local outStr, nextChar = inStr:sub(1, 1)
   for pos = 2, #inStr do
       nextChar = inStr:sub(pos, pos)
       if nextChar ~= outStr:sub(#outStr, #outStr) then
           outStr = outStr .. ", "
       end
       outStr = outStr .. nextChar
   end
   return outStr

end

print(charSplit("gHHH5YY++///\\"))</lang>

Output:

g, HHH, 5, YY, ++, ///, \

Alternative: Simply scan difference in reverse order and insert delimiter in place, the loop counter i will not update with length of s. <lang lua>function splitdiff(s)

 for i=#s,2,-1 do
   if s:sub(i,i)~=s:sub(i-1,i-1) then
     s = s:sub(1,i-1)..', '.. s:sub(i,-1)
   end
 end
 return s

end</lang>

Ksh

!/bin/ksh

Split a character string based on change of character

# Variables:

str='gHHH5YY++///\' delim=', '

# Functions:
# Function _splitonchg(str, delim) - return str split by delim at char change

function _splitonchg { typeset _str ; _str="$1" typeset _delim ; _delim="$2" typeset _i _splitstr ; integer _i

for ((_i=1; _i<${#_str}+1; _i++)); do if [[ "${_str:$((_i-1)):1}" != "${_str:${_i}:1}" ]]; then _splitstr+="${_str:$((_i-1)):1}${_delim}" else _splitstr+="${_str:$((_i-1)):1}" fi done echo "${_splitstr%"${_delim}"*}" }

######

main #

######

print "Original: ${str}" print " Split: $(_splitonchg "${str}" "${delim}")" </lang>

Output:

Original: gHHH5YY++///\

Split: g, HHH, 5, YY, ++, ///, \

M2000 Interpreter

Stack New open a new stack object as current stack, and keep the old one. After the end of block execution old stack get back as current stack. Data statement push to bottom (we read from top, so using data we get a FIFO type). Letter$ pops a string or raise an error if no string found at the top of stack.

<lang M2000 Interpreter> Module PrintParts(splitthis$) {

     Def string m$, p$
     Def long c
     Stack New {
           if len(splitthis$)=0 then exit
           For i=1 to len(splitthis$)
                 p$=mid$(splitthis$,i,1)
                 if m$<>p$ then {
                       if c>0 then data string$(m$, c)
                       m$=p$
                       c=1
                 } else c++
           Next i
           if c>0 then data string$(m$, c)
           While stack.size>1 {
                 Print letter$+", ";
           }
           If not empty then Print letter$
     }

} PrintParts "gHHH5YY++///\" </lang>

Maple

Added an additional backlash to escape the \ character at the end. <lang Maple>splitChange := proc(str::string) local start,i,len; start := 1; len := StringTools:-Length(str); for i from 2 to len do if str[i] <> str[start] then printf("%s, ", str[start..i-1]); start := i: end if; end do; printf("%s", str[start..len]); end proc; splitChange("gHHH5YY++///\\");</lang>

Output:

g, HHH, 5, YY, ++, ///, \

Mathematica /Wolfram Language

The backslash (\) must be escaped with another backslash when defining the string. <lang Mathematica>StringJoin@@Riffle[StringCases["gHHH5YY++///\\", p : (x_) .. -> p], ", "]</lang>

Output:

g, HHH, 5, YY, ++, ///, \

MiniScript

<lang MiniScript>s = "gHHH5YY++///\" output = [] lastLetter = s[0] for letter in s

   if letter != lastLetter then output.push ", "
   output.push letter
   lastLetter = letter

end for print output.join("")</lang>

Output:

g, HHH, 5, YY, ++, ///, \

Modula-2

<lang modula2>MODULE CharacterChange; FROM Terminal IMPORT Write,WriteString,WriteLn,ReadChar;

PROCEDURE Split(str : ARRAY OF CHAR); VAR

   i : CARDINAL;
   c : CHAR;

BEGIN

   FOR i:=0 TO HIGH(str) DO
       IF i=0 THEN
           c := str[i]
       ELSIF str[i]#c THEN
           c := str[i];
           WriteLn;
       END;
       Write(c)
   END

END Split;

CONST EX = "gHHH5YY++///\"; BEGIN

   Split(EX);

   ReadChar

END CharacterChange.</lang>

Output:

g
HHH
5
YY
++
///
\

Nim

<lang nim>proc splitOnDiff(str: string): string =

 result = ""

 if str.len < 1: return result

 var prevChar: char = str[0]

 for idx in 0 ..< str.len:
   if str[idx] != prevChar:
     result &= ", "
     prevChar = str[idx]

   result &= str[idx]

assert splitOnDiff("""X""") == """X""" assert splitOnDiff("""XX""") == """XX""" assert splitOnDiff("""XY""") == """X, Y""" assert splitOnDiff("""gHHH5YY++///\""") == """g, HHH, 5, YY, ++, ///, \"""

echo splitOnDiff("""gHHH5YY++///\""")</lang>

Output:

g, HHH, 5, YY, ++, ///, \

ooRexx

<lang oorexx>Parse Arg str . /*obtain optional arguments from the CL*/ If str== Then str= 'gHHH5YY++///\' /*Not specified? Then use the default.*/ i=1 ol= Do Forever

 j=verify(str,substr(str,i,1),'N',i,99)  /* find first character that's different */
 If j=0 Then Do                          /* End of strin reached                  */
   ol=ol||substr(str,i)                  /* the final substring                   */
   Leave
   End
 ol=ol||substr(str,i,j-i)', '            /* add substring and delimiter           */
 i=j
 End

Say ol</lang>

Output:

g, HHH, 5, YY, ++, ///, \

Pascal

<lang pascal>program SplitChars; {$IFDEF FPC}

 {$MODE DELPHI}{$COPERATORS ON}

{$ENDIF} const

 TestString =  'gHHH5YY++///\';

function SplitAtChars(const S: String):String; var

 i : integer;
 lastChar:Char;

begin

 result := ;
 IF length(s) > 0 then
 begin
   LastChar := s[1];
   result := LastChar;
   For i := 2 to length(s) do
   begin
     if s[i] <> lastChar then
     begin
       lastChar := s[i];
       result += ', ';
     end;
     result += LastChar;
   end;
 end;

end;

BEGIN

 writeln(SplitAtChars(TestString));

end.</lang>

Output:

g, HHH, 5, YY, ++, ///, \

Perl

<lang perl>use strict; use warnings; use feature 'say'; use utf8; binmode(STDOUT, ':utf8');

for my $string (q[gHHH5YY++///\\], q[fffn⃗n⃗n⃗»»» ℵℵ☄☄☃☃̂☃🤔🇺🇸🤦♂️👨‍👩‍👧‍👦]) {

   my @S;
   my $last = ;
   while ($string =~ /(\X)/g) {
       if ($last eq $1) { $S[-1] .= $1 } else { push @S, $1 }
       $last = $1;
   }
   say "Orginal: $string\n  Split: 「" . join('」, 「', @S) . "」\n";

}</lang>

Output:

Orginal: gHHH5YY++///\
  Split: 「g」, 「HHH」, 「5」, 「YY」, 「++」, 「///」, 「\」

Orginal: fffn⃗n⃗n⃗»»»  ℵℵ☄☄☃☃̂☃🤔🇺🇸🤦♂️👨‍👩‍👧‍👦
  Split: 「fff」, 「」, 「n⃗n⃗n⃗」, 「»»»」, 「  」, 「ℵℵ」, 「☄☄」, 「☃」, 「☃̂」, 「☃」, 「🤔」, 「🇺🇸」, 「🤦♂️」, 「👨‍👩‍👧‍👦」

Phix

function split_on_change(string s)
    string res = ""
    if length(s) then
        integer prev = s[1]
        for i=1 to length(s) do
            integer ch = s[i]
            if ch!=prev then
                res &= ", "
                prev = ch
            end if
            res &= ch
        end for
    end if
    return res
end function
 
puts(1,split_on_change(`gHHH5YY++///\`))

Output:

g, HHH, 5, YY, ++, ///, \

PicoLisp

<lang PicoLisp>(de splitme (Str)

  (let (Str (chop Str)  Fin)
     (glue
        ", "
        (make
           (for X Str
              (if (= X (car Fin))
                 (conc Fin (cons X))
                 (link (setq Fin (cons X))) ) ) ) ) ) )

(prinl (splitme "gHHH5YY++///\\"))</lang>

Output:

g, HHH, 5, YY, ++, ///, \

Pike

<lang Pike> string input = "gHHH5YY++///\\"; // \ needs escaping string last_char; foreach(input/1, string char) {

   if(last_char && char != last_char)
       write(", ");
   write(char);
   last_char = char;

}

</lang>

Output:

g, HHH, 5, YY, ++, ///, \

Plain English

To make sense of this example, you must understand riders. A rider is a simple abstraction for efficiently parsing strings. A rider is a record with an original substring, a source substring, and a token substring.

After executing the following code, for example: <lang plainenglish>Put "abcdef" into a string. Slap a rider on the string.</lang>

The rider looks like this: <lang plainenglish>Original: "abcdef" Source: "abcdef" Token: ""</lang>

Now when we Bump the rider., it looks like this: <lang plainenglish>Original: "abcdef" Source: "bcdef" Token: "a"</lang>

Another bump, and: <lang plainenglish>Original: "abcdef" Source: "cdef" Token: "ab"</lang>

Now let's say we have a complete token and want to start a new one. We can Position the rider's token on the rider's source. and now the rider looks like this:

<lang plainenglish>Original: "abcdef" Source: "cdef" Token: ""</lang>

And that's all there is to it.

<lang plainenglish>To run: Start up. Split "gHHH5YY++///\" into some string things by change of character. Write the string things on the console. Destroy the string things. Wait for the escape key. Shut down.

To split a string into some string things by change of character: If the string's length is less than 2, add the string to the string things; exit. Slap a rider on the string. Loop. Move the rider (change of character rules). Add the rider's token to the string things. If the rider's source is blank, exit. Repeat.

To move a rider (change of character rules): Position the rider's token on the rider's source. Loop. If the rider's source is blank, exit. If the rider's token is blank, bump the rider; repeat. Put the rider's token's last plus 1 into a byte pointer. If the rider's token's last's target is not the byte pointer's target, exit. Bump the rider. Repeat.

To write some string things to a console; To write some string things on a console: Get a string thing from the string things. Loop. If the string thing is nil, write "" on the console; exit. Write the string thing's string on the console without advancing. If the string thing's next is not nil, write ", " on the console without advancing. Put the string thing's next into the string thing. Repeat.</lang>

Output:

g, HHH, 5, YY, ++, ///, \

PowerShell

Translation of: BBC BASIC

<lang PowerShell> function Split-String ([string]$String) {

   [string]$c = $String.Substring(0,1)
   [string]$splitString = $c

   for ($i = 1; $i -lt $String.Length; $i++)
   { 
       [string]$d = $String.Substring($i,1)

       if ($d -ne $c)
       {
           $splitString += ", "
           $c = $d
       }

       $splitString += $d
   }

   $splitString

} </lang> <lang PowerShell> Split-String "gHHH5YY++///\" </lang>

Output:

g, HHH, 5, YY, ++, ///, \

PureBasic

<lang purebasic>Procedure splitstring(s$)

 Define *p.Character = @s$,
        c_buf.c = *p\c  
 While *p\c
   If *p\c = c_buf      
     Print(Chr(c_buf))
   Else
     Print(", ")
     c_buf = *p\c
     Continue
   EndIf
   *p + SizeOf(Character)
 Wend

EndProcedure

If OpenConsole()

 splitstring("gHHH5YY++///\")
 Input()

EndIf</lang>

Output:

g, HHH, 5, YY, ++, ///, \

Python

Python3.6+

Using [itertools.groupby]. <lang python>from itertools import groupby

def splitter(text):

   return ', '.join(.join(group) for key, group in groupby(text))

if __name__ == '__main__':

   txt = 'gHHH5YY++///\\'      # Note backslash is the Python escape char.
   print(f'Input: {txt}\nSplit: {splitter(txt)}')</lang>

Output:

Input: gHHH5YY++///\
Split: g, HHH, 5, YY, ++, ///, \

Python: Using zip

<lang python>def splitterz(text):

   return (.join(x + ( if x == nxt else ', ') 
           for x, nxt in zip(txt, txt[1:] + txt[-1])))

if __name__ == '__main__':

   txt = 'gHHH5YY++///\\'
   print(splitterz(txt))</lang>

Output:

g, HHH, 5, YY, ++, ///, \

Python2

<lang python>import itertools

try: input = raw_input except: pass

s = input() groups = [] for _, g in itertools.groupby(s):

   groups.append(.join(g))

print(' input string: %s' % s) print(' output string: %s' % ', '.join(groups))</lang>

Output:

when using the default input

      input string:  gHHH5YY++///\
     output string:  g, HHH, 5, YY, ++, ///, \

Quackery

<lang Quackery>[ dup size 2 <

 iff size done
 behead swap
 [] nested join
 witheach
   [ over != if 
     [ drop i^ 1+
       conclude ] ] ] is $run  ( $ --> n )

[ dup size 2 < if done

 dup $run split
 dup [] = 
 iff drop done
 dip [ $ ", " join ]
 recurse join ]       is runs$ ( $ --> $ )

</lang> Testing in Quackery shell.

/O> $ "gHHH5YY++///\" runs$ echo$
... 
g, HHH, 5, YY, ++, ///, \
Stack empty.

Racket

Translation of: Python

<lang racket>#lang racket (define (split-strings-on-change s)

 (map list->string (group-by values (string->list s) char=?)))

(displayln (string-join (split-strings-on-change #<<< gHHH5YY++///\ <

                                                )
                       ", "))</lang>

Output:

g, HHH, 5, YY, ++, ///, \

Raku

(formerly Perl 6)

Works with: Rakudo version 2017.05

<lang perl6>sub group-chars ($str) { $str.comb: / (.) $0* / }

Testing:

for Q[gHHH5YY++///\], Q[fffn⃗n⃗n⃗»»» ℵℵ☄☄☃☃̂☃🤔🇺🇸🤦‍♂️👨‍👩‍👧‍👦] -> $string {

   put 'Original: ', $string;
   put '   Split: ', group-chars($string).join(', ');

}</lang>

Output:

Original: gHHH5YY++///\
   Split: g, HHH, 5, YY, ++, ///, \
Original: fffn⃗n⃗n⃗»»»  ℵℵ☄☄☃☃̂☃🤔🇺🇸🤦‍♂️👨‍👩‍👧‍👦
   Split: fff, , n⃗n⃗n⃗, »»»,   , ℵℵ, ☄☄, ☃, ☃̂, ☃, 🤔, 🇺🇸, 🤦‍♂️, 👨‍👩‍👧‍👦

The second test-case is to show that Raku works with strings on the Unicode grapheme level, handles whitespace, combiners, and zero width characters up to Unicode Version 13.0 correctly. (Raku generally tracks updates to the Unicode spec and typically lags no more than a month behind.) For those of you with browsers unable to display the second string, it consists of:

{LATIN SMALL LETTER F} x 3
{ZERO WIDTH NO-BREAK SPACE} x 3
{LATIN SMALL LETTER N, COMBINING RIGHT ARROW ABOVE} x 3
{RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK} x 3
{SPACE} x 2,
{ALEF SYMBOL} x 2,
{COMET} x 2,
{SNOWMAN} x 1,
{SNOWMAN, COMBINING CIRCUMFLEX ACCENT} x 1
{SNOWMAN} x 1,
{THINKING FACE} x 1
{REGIONAL INDICATOR SYMBOL LETTER U, REGIONAL INDICATOR SYMBOL LETTER S} x 1
{FACE PALM, ZERO WIDTH JOINER, MALE SIGN, VARIATION SELECTOR-16} x 1
{MAN, ZERO WIDTH JOINER, WOMAN, ZERO WIDTH JOINER, GIRL, ZERO WIDTH JOINER, BOY} x 1

REXX

version 1

<lang rexx>/*REXX program splits a string based on change of character ───► a comma delimited list.*/ parse arg str /*obtain optional arguments from the CL*/ if str== then str= 'gHHH5YY++///\' /*Not specified? Then use the default.*/ p=left(str, 1) /*placeholder for the "previous" string*/ $= /* " " " output " */

    do j=1  for length(str);  @=substr(str,j,1) /*obtain a character from the string.  */
    if @\==p  then $=$', '                      /*Not replicated char? Append delimiter*/
    p=@;           $=$ || @                     /*append a character to the  $  string.*/
    end   /*j*/                                 /* [↓]  keep peeling chars until done. */

say ' input string: ' str /*display the original string & output.*/ say ' output string: ' $ /*stick a fork in it, we're all done. */</lang>

output when using the default input:

          input string:  gHHH5YY++///\
         output string:  g, HHH, 5, YY, ++, ///, \

version 2

<lang rexx>/* REXX */ Parse arg str /*obtain optional arguments from the CL*/ if str== then str= 'gHHH5YY++///\' /*Not specified? Then use the default.*/ input=str x= cp= result= Do While str<>

 Parse Var str c +1 str
 If c==cp Then x=x||c
 Else Do
   If x>> Then
     result=result||x', '
   x=c
   End
 cp=c
 End

result=result||x say ' input string: ' input say ' output string: ' result </lang> {{out]]

      input string:  gHHH5YY++///\
     output string:  g, HHH, 5, YY, ++, ///, \

Ring

<lang ring> see split("gHHH5YY++///\")

func split(s )

    c =left (s, 1)
    split = ""
    for i = 1 to len(s)
        d = substr(s, i, 1)
        if d != c
           split = split + ", "
           c = d 
        ok
        split = split + d 
    next
    return split

</lang> Output:

g, HHH, 5, YY, ++, ///, \

Ruby

<lang ruby>def split(str)

 puts " input string: #{str}"
 s = str.chars.chunk(&:itself).map{|_,a| a.join}.join(", ")
 puts "output string: #{s}"
 s

end

split("gHHH5YY++///\\")</lang>

Output:

 input string: gHHH5YY++///\
output string: g, HHH, 5, YY, ++, ///, \

Rust

<lang Rust>fn splitter(string: &str) -> String {

   let chars: Vec<_> = string.chars().collect();
   let mut result = Vec::new();
   let mut last_mismatch = 0;
   for i in 0..chars.len() {
       if chars.len() == 1 {
           return chars[0..1].iter().collect();
       }
       if i > 0 && chars[i-1] != chars[i] {
           let temp_result: String = chars[last_mismatch..i].iter().collect();
           result.push(temp_result);
           last_mismatch = i;
       }
       if i == chars.len() - 1 {
           let temp_result: String = chars[last_mismatch..chars.len()].iter().collect();
           result.push(temp_result);
       }
   }
   result.join(", ")

}

fn main() {

   let test_string = "g";
   println!("input string: {}", test_string);
   println!("output string: {}", splitter(test_string));

   let test_string = "";
   println!("input string: {}", test_string);
   println!("output string: {}", splitter(test_string));

   let test_string = "gHHH5YY++///\\";
   println!("input string: {}", test_string);
   println!("output string: {}", splitter(test_string));

}</lang>

Output:

input string: g
output string: g
input string: 
output string: 
input string: gHHH5YY++///\
output string: g, HHH, 5, YY, ++, ///, \

Alternate using IterTools

<lang Rust>use itertools::Itertools;

pub fn split_text(s: &str) -> Vec<String> {

   let mut r = Vec::new();
   for (_, group) in &s.chars().into_iter().group_by(|e| *e) {
       r.push(group.map(|e| e.to_string()).join(""));
   }
   r

}

[cfg(test)]

mod tests {

   use super::*;

   #[test]
   fn test_splitting_text() {
       assert_eq!(split_text("gHHH5YY++///\\"), vec!["g", "HHH", "5", "YY", "++", "///", "\\"]);
       assert!(split_text("").is_empty());
   }

} </lang>

Scala

<lang Scala>// Split a (character) string into comma (plus a blank) delimited strings // based on a change of character (left to right). // See https://rosettacode.org/wiki/Split_a_character_string_based_on_change_of_character#Scala

def runLengthSplit(s: String): String = /// Add a guard letter

 (s + 'X').sliding(2).map(pair => pair.head + (if (pair.head != pair.last) ", " else "")).mkString("")

println(runLengthSplit("""gHHH5YY++///\"""))</lang>

Output:

See it in running in your browser by ScalaFiddle (JavaScript)

or by Scastie (JVM).

<lang Scala> def runLengthSplit(s:String):List[String] = {

 def recursiveSplit(acc:List[String], rest:String): List[String] = rest match {
   case "" => acc
   case _ => {
     val (h, t) = rest.span(_ == rest.head)
     recursiveSplit(acc :+ h, t)
   }
 }

 recursiveSplit(Nil, s)

}

val result = runLengthSplit("""gHHH5YY++///\""") println(result.mkString(",")) </lang>

Output:

g,HHH,5,YY,++,///,\

Sed

<lang sed>echo 'gHHH5YY++///\' | sed 's/$.$\1*/&, /g;s/, $//'</lang> Output:

g, HHH, 5, YY, ++, ///, \

Sidef

<lang ruby>func group(str) {

   gather {
       while (var match = (str =~ /((.)\g{-1}*)/g)) {
           take(match[0])
       }
   }

}

say group(ARGV[0] \\ 'gHHH5YY++///\\').join(', ')</lang>

Output:

g, HHH, 5, YY, ++, ///, \

Standard ML

<lang sml>(*

* Head-Tail implementation of grouping
*)

fun group' ac nil = [ac]

 | group'     nil (y::ys) = group' [y] ys
 | group' (x::ac) (y::ys) = if x=y then group' (y::x::ac) ys else (x::ac) :: group' [y] ys

fun group xs = group' nil xs

fun groupString str = String.concatWith ", " (map implode (group (explode str)))</lang>

Output:

- groupString "gHHH5YY++///\\";
val it = "g, HHH, 5, YY, ++, ///, \\" : string

Swift

<lang swift>public extension String {

 func splitOnChanges() -> [String] {
   guard !isEmpty else {
     return []
   }

   var res = [String]()
   var workingChar = first!
   var workingStr = "\(workingChar)"

   for char in dropFirst() {
     if char != workingChar {
       res.append(workingStr)
       workingStr = "\(char)"
       workingChar = char
     } else {
       workingStr += String(char)
     }
   }

   res.append(workingStr)

   return res
 }

}

print("gHHH5YY++///\\".splitOnChanges().joined(separator: ", "))</lang>

Output:

g, HHH, 5, YY, ++, ///, \

Tailspin

<lang tailspin> composer splitEquals

 <reps> <nextReps>*
 rule reps: <'(.)\1*'>
 rule nextReps: <reps> -> \(', ' ! $ ! \)

end splitEquals

'gHHH5YY++///\' -> splitEquals -> !OUT::write </lang>

Output:

g, HHH, 5, YY, ++, ///, \

tbas

Translation of: BBC BASIC

<lang basic>SUB SPLITUNIQUE$(s$) DIM c$, d$, split$, i% c$ = LEFT$(s$, 1) split$ = "" FOR i% = 1 TO LEN(s$) d$ = MID$(s$, i%, 1) IF d$ <> c$ THEN split$ = split$ + ", " c$ = d$ END IF split$ = split$ + d$ NEXT RETURN split$ END SUB

PRINT SPLITUNIQUE$("gHHH5YY++///\") END</lang>

Tcl

This is most concise with regular expressions. Note well the two steps: it could be achieved in one very clever regexp, but being that clever is usually a bad idea (for both readability and performance, in this case).

<lang Tcl>set string "gHHH5YY++///\\"

regsub -all {(.)\1*} $string {\0, } string regsub {, $} $string {} string puts $string</lang>

Output:

g, HHH, 5, YY, ++, ///, \

VBA

<lang vb> Option Explicit

Sub Split_string_based_on_change_character() Dim myArr() As String, T As String

Const STRINPUT As String = "gHHH5YY++///\" Const SEP As String = ", "

   myArr = Split_Special(STRINPUT)
   T = Join(myArr, SEP)
   Debug.Print Left(T, Len(T) - Len(SEP))

End Sub

Function Split_Special(Ch As String) As String() 'return an array of Strings Dim tb, i&, st As String, cpt As Long, R() As String

   tb = Split(StrConv(Ch, vbUnicode), Chr(0))
   st = tb(LBound(tb))
   ReDim R(cpt)
   R(cpt) = st
   For i = 1 To UBound(tb)
       If tb(i) = st Then
           R(cpt) = R(cpt) & st
       Else
           st = tb(i)
           cpt = cpt + 1
           ReDim Preserve R(cpt)
           R(cpt) = st
       End If
   Next
   Split_Special = R

End Function </lang>

Output:

g, HHH, 5, YY, ++, ///, \

Wren

<lang ecmascript>var split = Fn.new { |s|

   if (s.count == 0) return ""
   var res = []
   var last = s[0]
   var curr = last
   for (c in s.skip(1)) {
       if (c == last) {
           curr = curr + c
       } else {
           res.add(curr)
           curr = c
       }
       last = c
   }
   res.add(curr)
   return res.join(", ")

}

var s = "gHHH5YY++///\\" System.print(split.call(s))</lang>

Output:

g, HHH, 5, YY, ++, ///, \

XLISP

<lang lisp>(defun delimit (s) (defun delim (old-list new-list current-char) (if (null old-list) new-list (delim (cdr old-list) (append new-list (if (not (equal (car old-list) current-char)) `(#\, #\Space ,(car old-list)) (cons (car old-list) nil) ) ) (car old-list) ) ) ) (list->string (delim (string->list s) '() (car (string->list s)))) )

(display (delimit "gHHH5YY++///\\")) ;; NB. The "\" character needs to be escaped</lang>

Output:

g, HHH, 5, YY, ++, ///, \

XPL0

<lang XPL0>string 0; \change to zero-terminated convention char S; [S:= "gHHH5YY++///\"; while S(0) do

       [ChOut(0, S(0));
       if S(1)#S(0) & S(1)#0 then Text(0, ", ");
       S:= S+1;
       ];

]</lang>

Output:

g, HHH, 5, YY, ++, ///, \

Yabasic

<lang freebasic>sub esplit$(instring$) if len(instring$) < 2 return instring$

   ret$ = left$(instring$,1)
   for i = 2 to len(instring$)

if mid$(instring$,i,1) <> mid$(instring$, i - 1, 1) ret$ = ret$ + ", "

       ret$ = ret$ + mid$(instring$, i, 1)
   next i
  return ret$

end sub

print esplit$("gHHH5YY++///\\")</lang>

Z80 Assembly

<lang z80>PrintChar equ &BB5A ;Amstrad CPC BIOS call Terminator equ 0 ;marks the end of a string

       org &8000

LD HL,StringA loop: ld a,(HL) ;load a char from (HL) cp Terminator ;is it the terminator? ret z ;if so, exit ld e,a ;store this char in E temporarily inc hl ;next char ld a,(HL) ;get next char cp Terminator ;is the next char the terminator? jp z,StringDone ;if so, print E and exit.

;needed to prevent the last char from getting a comma and space.

dec hl ;go back one so we don't skip any chars cp e ;does (HL) == (HL+1)?

push af ld a,e call PrintChar ;either way, print E to screen. pop af ;retrieve the results of the last compare.

jr z,SkipComma ;if A=E, no comma or space. Just loop again. ld a,',' call PrintChar ld a,' ' call PrintChar SkipComma: inc hl ;next char jp loop ;back to start StringDone: ld a,e ;last character in string is printed here. jp PrintChar

ReturnToBasic: RET

StringA: byte "gHHH5YY++///\",0</lang>

Output:

g, HHH, 5, YY, ++, ///, \

zkl

<lang zkl>fcn group(str){

  C,out := str[0],Sink(C);
  foreach c in (str[1,*]){ out.write(if(c==C) c else String(", ",C=c)) }
  out.close();

} group("gHHH5YY++///\\").println();</lang>

Output:

g, HHH, 5, YY, ++, ///, \

ZX Spectrum Basic

<lang basic> 10 LET s$="gHHH5YY++///\"

20 LET c$=s$(1)
30 LET n$=c$
40 FOR i=2 TO LEN s$
50 IF s$(i)<>c$ THEN LET n$=n$+", "
60 LET n$=n$+s$(i)
70 LET c$=s$(i)
80 NEXT i
90 PRINT n$</lang>

Output:

g, HHH, 5, YY, ++, ///, \