Split a character string based on change of character: Difference between revisions
(Added Ruby) |
(promoted draft-task to a task.) |
||
Line 1: | Line 1: | ||
[[Category: String manipulation]] |
[[Category: String manipulation]] |
||
[[Category:Simple]] |
[[Category:Simple]] |
||
{{ |
{{task}} |
||
<!-- this problem is also known as "splitsville" elsewhere. !--> |
<!-- this problem is also known as "splitsville" elsewhere. !--> |
||
<!-- I imagine this Rosetta Code task will lead to quite a few code-golf solutions. !--> |
<!-- I imagine this Rosetta Code task will lead to quite a few code-golf solutions. !--> |
Revision as of 04:03, 21 January 2017
You are encouraged to solve this task according to the task description, using any language you may know.
- Task
Split a (character) string into comma (plus a blank) delimited strings based on a change of character (left to right).
Show the output here (use the 1st example below).
Blanks should be treated as any other character (except
they are problematic to display clearly). The same applies
to commas.
For instance, the string:
gHHH5YY++///\
should be split and show:
g, HHH, 5, YY, ++, ///, \
ALGOL 68
<lang algol68>BEGIN
# returns s with ", " added between each change of character # PROC split on characters = ( STRING s )STRING: IF s = "" THEN # empty string # "" ELSE # allow for 3 times as many characters as in the string # # this would handle a string of unique characters # [ 3 * ( ( UPB s - LWB s ) + 1 ) ]CHAR result; INT r pos := LWB result; INT s pos := LWB s; CHAR s char := s[ LWB s ]; FOR s pos FROM LWB s TO UPB s DO IF s char /= s[ s pos ] THEN # change of character - insert ", " # result[ r pos ] := ","; result[ r pos + 1 ] := " "; r pos +:= 2; s char := s[ s pos ] FI; result[ r pos ] := s[ s pos ]; r pos +:= 1 OD; # return the used portion of the result # result[ 1 : r pos - 1 ] FI ; # split on characters #
print( ( split on characters( "gHHH5YY++///\" ), newline ) )
END</lang>
- Output:
g, HHH, 5, YY, ++, ///, \
AppleScript
<lang AppleScript>on run
intercalate(", ", ¬ map(curry(intercalate)'s lambda(""), ¬ group("gHHH5YY++///\\"))) --> "g, HHH, 5, YY, ++, ///, \\"
end run
-- GENERIC FUNCTIONS
-- group :: Eq a => [a] -> a on group(xs)
script eq on lambda(a, b) a = b end lambda end script groupBy(eq, xs)
end group
-- groupBy :: (a -> a -> Bool) -> [a] -> a on groupBy(f, xs)
set mf to mReturn(f) script enGroup on lambda(a, x) set h to cond(length of (active of a) > 0, item 1 of active of a, missing value) if h is not missing value and mf's lambda(h, x) then {active:(active of a) & x, sofar:sofar of a} else {active:{x}, sofar:(sofar of a) & {active of a}} end if end lambda end script if length of xs > 0 then set dct to foldl(enGroup, {active:{item 1 of xs}, sofar:{}}, tail(xs)) sofar of dct & cond(length of (active of dct) > 0, {active of dct}, {}) else {} end if
end groupBy
-- foldl :: (a -> b -> a) -> a -> [b] -> a on foldl(f, startValue, xs)
tell mReturn(f) set v to startValue set lng to length of xs repeat with i from 1 to lng set v to lambda(v, item i of xs, i, xs) end repeat return v end tell
end foldl
-- cond :: Bool -> a -> a -> a on cond(bool, f, g)
if bool then f else g end if
end cond
-- intercalate :: Text -> [Text] -> Text on intercalate(strText, lstText)
set {dlm, my text item delimiters} to {my text item delimiters, strText} set strJoined to lstText as text set my text item delimiters to dlm return strJoined
end intercalate
-- map :: (a -> b) -> [a] -> [b] on map(f, xs)
tell mReturn(f) set lng to length of xs set lst to {} repeat with i from 1 to lng set end of lst to lambda(item i of xs, i, xs) end repeat return lst end tell
end map
-- curry :: (Script|Handler) -> Script on curry(f)
script on lambda(a) script on lambda(b) lambda(a, b) of mReturn(f) end lambda end script end lambda end script
end curry
-- Lift 2nd class handler function into 1st class script wrapper -- mReturn :: Handler -> Script on mReturn(f)
if class of f is script then f else script property lambda : f end script end if
end mReturn
-- tail :: [a] -> [a] on tail(xs)
if length of xs > 1 then items 2 thru -1 of xs else {} end if
end tail</lang>
- Output:
g, HHH, 5, YY, ++, ///, \
BBC BASIC
<lang bbcbasic>REM >split PRINT FN_split( "gHHH5YY++///\" ) END
DEF FN_split( s$ ) LOCAL c$, d$, split$, i% c$ = LEFT$( s$, 1 ) split$ = "" FOR i% = 1 TO LEN s$
d$ = MID$( s$, i%, 1 ) IF d$ <> c$ THEN split$ += ", " c$ = d$ ENDIF split$ += d$
NEXT = split$</lang>
- Output:
g, HHH, 5, YY, ++, ///, \
Elixir
<lang elixir>split = fn str ->
IO.puts " input string: #{str}" String.graphemes(str) |> Enum.chunk_by(&(&1)) |> Enum.map_join(", ", &Enum.join &1) |> fn s -> IO.puts "output string: #{s}" end.() end
split.("gHHH5YY++///\\")</lang>
- Output:
input string: gHHH5YY++///\ output string: g, HHH, 5, YY, ++, ///, \
Haskell
<lang Haskell>import Data.List (group, intercalate)
main :: IO () main = putStrLn $ intercalate ", " (group "gHHH5YY++///\\")</lang>
- Output:
g, HHH, 5, YY, ++, ///, \
JavaScript
ES6
<lang JavaScript>(() => {
'use strict';
// GENERIC FUNCTIONS
// group :: Eq a => [a] -> a const group = xs => groupBy((a, b) => a === b, xs);
// groupBy :: (a -> a -> Bool) -> [a] -> a const groupBy = (f, xs) => { const dct = xs.slice(1) .reduce((a, x) => { const h = a.active.length > 0 ? a.active[0] : undefined, blnGroup = h !== undefined && f(h, x);
return { active: blnGroup ? a.active.concat(x) : [x], sofar: blnGroup ? a.sofar : a.sofar.concat([a.active]) }; }, { active: xs.length > 0 ? [xs[0]] : [], sofar: [] }); return dct.sofar.concat(dct.active.length > 0 ? [dct.active] : []); };
// intercalate :: String -> [a] -> String const intercalate = (s, xs) => xs.join(s);
// TEST return intercalate(", ", group("gHHH5YY++///\\".split()) .map(x => x.join()));
// -> "g, HHH, 5, YY, ++, ///, \\"
})();</lang>
- Output:
g, HHH, 5, YY, ++, ///, \
Lua
Note that the backslash must be quoted as a double backslash as Lua uses C-like escape sequences. <lang Lua>function charSplit (inStr)
local outStr, nextChar = inStr:sub(1, 1) for pos = 2, #inStr do nextChar = inStr:sub(pos, pos) if nextChar ~= outStr:sub(#outStr, #outStr) then outStr = outStr .. ", " end outStr = outStr .. nextChar end return outStr
end
print(charSplit("gHHH5YY++///\\"))</lang>
- Output:
g, HHH, 5, YY, ++, ///, \
Perl 6
<lang perl6>sub group-chars ($str) { $str.comb: / (.) $0* / }
- Testing:
for Q[gHHH5YY++///\], Q[fffn⃗n⃗n⃗»»» ℵℵ☄☄☃☃̂☃☄☄] -> $string {
put 'Original: ', $string; put ' Split: ', group-chars($string).join(', ');
}</lang>
- Output:
Original: gHHH5YY++///\ Split: g, HHH, 5, YY, ++, ///, \ Original: fffn⃗n⃗n⃗»»» ℵℵ☄☄☃☃̂☃☄☄ Split: fff, , n⃗n⃗n⃗, »»», , ℵℵ, ☄☄, ☃, ☃̂, ☃, ☄☄
The second test-case is to show that Perl 6 works with strings on the Unicode grapheme level, and handles combiners and zero width characters correctly. For those of you with crappy browsers, that string consists of:
- {LATIN SMALL LETTER F} x 3
- {ZERO WIDTH NO-BREAK SPACE} x 3
- {LATIN SMALL LETTER N COMBINING RIGHT ARROW ABOVE} x 3
- {RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK} x 3
- {SPACE} x 2,
- {ALEF SYMBOL} x 2,
- {COMET} x 2,
- {SNOWMAN} x 1,
- {SNOWMAN COMBINING CIRCUMFLEX ACCENT} x 1
- {SNOWMAN} x 1,
- {COMET} x 2
Python
<lang python>import itertools
try: input = raw_input except: pass
s = input() groups = [] for _, g in itertools.groupby(s):
groups.append(.join(g))
print(' input string: %s' % s) print(' output string: %s' % ', '.join(groups))</lang>
- Output:
when using the default input
input string: gHHH5YY++///\ output string: g, HHH, 5, YY, ++, ///, \
Racket
<lang racket>#lang racket (define (split-strings-on-change s)
(map list->string (group-by values (string->list s) char=?)))
(displayln (string-join (split-strings-on-change #<<< gHHH5YY++///\ <
) ", "))</lang>
- Output:
g, HHH, 5, YY, ++, ///, \
REXX
<lang rexx>/*REXX program splits a string based on change of character ───► a comma delimited list.*/ parse arg str . /*obtain optional arguments from the CL*/ if str== | str=="," then str= 'gHHH5YY++///\' /*Not specified? Then use the default.*/ p=left(str, 1) /*placeholder for the "previous" string*/ $= /* " " " output " */
do j=1 for length(str); @=substr(str, j, 1) /*obtain a char from string. */ if @\==left(p, 1) then do; $=$',' @; p=; end /*different then previous? */ else $=$ || @ /*a replicated character. */ p=p || @ /*append char to current list*/ end /*j*/
say ' input string: ' str say ' output string: ' $ /*stick a fork in it, we're all done. */</lang> output when using the default input:
input string: gHHH5YY++///\ output string: g, HHH, 5, YY, ++, ///, \
Ruby
<lang ruby>def split(str)
puts " input string: #{str}" s = str.chars.chunk(&:itself).map{|_,a| a.join}.join(", ") puts "output string: #{s}" s
end
split("gHHH5YY++///\\")</lang>
- Output:
input string: gHHH5YY++///\ output string: g, HHH, 5, YY, ++, ///, \
Sidef
<lang ruby>func group(str) {
gather { while (var match = (str =~ /((.)\g{-1}*)/g)) { take(match[0]) } }
}
say group(ARGV[0] \\ 'gHHH5YY++///\\').join(', ')</lang>
- Output:
g, HHH, 5, YY, ++, ///, \
zkl
<lang zkl>fcn group(str){
C,out := str[0],Sink(C); foreach c in (str[1,*]){ out.write(if(c==C) c else String(", ",C=c)) } out.close();
} group("gHHH5YY++///\\").println();</lang>
- Output:
g, HHH, 5, YY, ++, ///, \