Tokenize a string
From Rosetta Code
You are encouraged to solve this task according to the task description, using any language you may know.
Separate the string "Hello,How,Are,You,Today" by commas into an array (or list) so that each element of it stores a different word. Display the words to the 'user', in the simplest manner possible, separated by a period. To simplify, you may display a trailing period.
[edit] ActionScript
var hello:String = "Hello,How,Are,You,Today";
var tokens:Array = hello.split(",");
trace(tokens.join("."));
// Or as a one-liner
trace("Hello,How,Are,You,Today".split(",").join("."));
[edit] Ada
with Ada.Strings.Fixed; use Ada.Strings.Fixed;
with Ada.Text_Io; use Ada.Text_Io;
procedure Parse_Commas is
Source_String : String := "Hello,How,Are,You,Today";
Index_List : array(1..256) of Natural;
Next_Index : Natural := 1;
begin
Index_List(Next_Index) := 1;
while Index_List(Next_Index) < Source_String'Last loop
Next_Index := Next_Index + 1;
Index_List(Next_Index) := 1 + Index(Source_String(Index_List(Next_Index - 1)..Source_String'Last), ",");
if Index_List(Next_Index) = 1 then
Index_List(Next_Index) := Source_String'Last + 2;
end if;
Put(Source_String(Index_List(Next_Index - 1)..Index_List(Next_Index)-2) & ".");
end loop;
end Parse_Commas;
[edit] ALGOL 68
main:(Output:
OP +:= = (REF FLEX[]STRING in out, STRING item)VOID:(
[LWB in out: UPB in out+1]STRING new;
new[LWB in out: UPB in out]:=in out;
new[UPB new]:=item;
in out := new
);
PROC string split = (REF STRING beetles, STRING substr)[]STRING:(
""" Split beetles where substr is found """;
FLEX[1:0]STRING out;
INT start := 1, pos;
WHILE string in string(substr, pos, beetles[start:]) DO
out +:= STRING(beetles[start:start+pos-2]);
start +:= pos + UPB substr - 1
OD;
IF start > LWB beetles THEN
out +:= STRING(beetles[start:])
FI;
out
);
PROC char split = (REF STRING beetles, STRING chars)[]STRING: (
""" Split beetles where character is found in chars """;
FLEX[1:0]STRING out;
FILE beetlef;
associate(beetlef, beetles); # associate a FILE handle with a STRING #
make term(beetlef, chars); # make term: assign CSV string terminator #
PROC raise logical file end = (REF FILE f)BOOL: except logical file end;
on logical file end(beetlef, raise logical file end);
STRING solo;
DO
getf(beetlef, ($g$, solo));
out+:=solo;
getf(beetlef, ($x$)) # skip CHAR separator #
OD;
except logical file end:
SKIP;
out
);
STRING beetles := "John Lennon, Paul McCartney, George Harrison, Ringo Starr";
printf(($g"."$, string split(beetles, ", "),$l$));
printf(($g"."$, char split(beetles, ", "),$l$))
)
John Lennon.Paul McCartney.George Harrison.Ringo Starr. John.Lennon..Paul.McCartney..George.Harrison..Ringo.Starr.
[edit] AutoHotkey
string := "Hello,How,Are,You,Today"
stringsplit, string, string, `,
loop, % string0
{
msgbox % string%A_Index%
}
[edit] AWK
BEGIN {
s = "Hello,How,Are,You,Today"
split(s, arr, ",")
for(i=1; i < length(arr); i++) {
printf arr[i] "."
}
}
A more idiomatic way for AWK is
BEGIN { FS = "," }
{
for(i=1; i <= NF; i++) printf $i ".";
print ""
}
which "tokenize" each line of input and this is achieved by using "," as field separator
[edit] Batch File
@echo off
setlocal enabledelayedexpansion
call :tokenize %1 res
echo %res%
goto :eof
:tokenize
set str=%~1
:loop
for %%i in (%str%) do set %2=!% class="re2">2!.%% class="re2">i
set %2=!% class="re2">2:~1!
goto :eof
Demo
>tokenize.cmd "Hello,How,Are,You,Today"
Hello.How.Are.You.Today
[edit] BASIC
Works with: QBasic
DIM parseMe AS STRING
parseMe = "Hello,How,Are,You,Today"
DIM tmpLng1 AS INTEGER, tmpLng2 AS INTEGER, parsedCount AS INTEGER
tmpLng2 = 1
parsedCount = -1
'count number of tokens
DO
tmpLng1 = INSTR(tmpLng2, parseMe, ",")
IF tmpLng1 THEN
parsedCount = parsedCount + 1
tmpLng2 = tmpLng1 + 1
ELSE
IF tmpLng2 < (LEN(parseMe) + 1) THEN parsedCount = parsedCount + 1
EXIT DO
END IF
LOOP
IF parsedCount > -1 THEN
REDIM parsed(parsedCount) AS STRING
tmpLng2 = 1
parsedCount = -1
'parse
DO
tmpLng1 = INSTR(tmpLng2, parseMe, ",")
IF tmpLng1 THEN
parsedCount = parsedCount + 1
parsed(parsedCount) = MID$(parseMe, tmpLng2, tmpLng1 - tmpLng2)
tmpLng2 = tmpLng1 + 1
ELSE
IF tmpLng2 < (LEN(parseMe) + 1) THEN
parsedCount = parsedCount + 1
parsed(parsedCount) = MID$(parseMe, tmpLng2)
END IF
EXIT DO
END IF
LOOP
PRINT parsed(0);
FOR L0 = 1 TO parsedCount
PRINT "."; parsed(L0);
NEXT
END IF
[edit] C
Works with: ANSI C
Library: POSIX
This example uses the strtok() function to separate the tokens. This function is destructive (replacing token separators with '\0'), so we have to make a copy of the string (using strdup()) before tokenizing. strdup() is not part of ANSI C, but is available on most platforms. It can easily be implemented with a combination of strlen(), malloc(), and strcpy().
#include<string.h>
#include<stdio.h>
#include<stdlib.h>
int main(void)
{
char *a[5];
const char *s="Hello,How,Are,You,Today";
int n=0, nn;
char *ds=strdup(s);
a[n]=strtok(ds, ",");
while(a[n] && n<4) a[++n]=strtok(NULL, ",");
for(nn=0; nn<=n; ++nn) printf("%s.", a[nn]);
putchar('\n');
free(ds);
return 0;
}
[edit] C#
string str = "Hello,How,Are,You,Today";
// or Regex.Split ( "Hello,How,Are,You,Today", "," );
// (Regex is in System.Text.RegularExpressions namespace
string[] strings = str.Split(',');
foreach (string s in strings)
{
Console.WriteLine (s + ".");
}
[edit] C++
Works with: ANSI C++
Library: STL
This is not the most efficient method as it involves redundant copies in the background, but it is very easy to use. In most cases it will be a good choice as long as it is not used as an inner loop in a performance critical system.
Note doxygen tags in comments before function, describing details of interface.
#include <string>
#include <vector>
/// \brief convert input string into vector of string tokens
///
/// \note consecutive delimiters will be treated as single delimiter
/// \note delimiters are _not_ included in return data
///
/// \param input string to be parsed
/// \param delims list of delimiters.
std::vector<std::string> tokenize_str(const std::string & str,
const std::string & delims=", \t")
{
using namespace std;
// Skip delims at beginning, find start of first token
string::size_type lastPos = str.find_first_not_of(delims, 0);
// Find next delimiter @ end of token
string::size_type pos = str.find_first_of(delims, lastPos);
// output vector
vector<string> tokens;
while (string::npos != pos || string::npos != lastPos)
{
// Found a token, add it to the vector.
tokens.push_back(str.substr(lastPos, pos - lastPos));
// Skip delims. Note the "not_of". this is beginning of token
lastPos = str.find_first_not_of(delims, pos);
// Find next delimiter at end of token.
pos = str.find_first_of(delims, lastPos);
}
return tokens;
}
here is sample usage code:
#include <iostream>
int main() {
using namespace std;
string s("Hello,How,Are,You,Today");
vector<string> v(tokenize_str(s));
for (unsigned i = 0; i < v.size(); i++)
cout << v[i] << ".";
cout << endl;
return 0;
}
[edit] Common Lisp
There are libraries out there that handle splitting (e.g., SPLIT-SEQUENCE, and the more-general CL-PPCRE), but this is a simple one-off, too. When the words are written with write-with-periods, there is no final period after the last word.
(defun comma-split (string)
(loop :for start := 0 :then (1+ finish)
:for finish := (position #\, string :start start)
:collecting (subseq string start finish)
:until (null finish)))
(defun write-with-periods (strings)
(format t "~{~A~^.~}" strings))
[edit] Clojure
(apply str (interpose "." (seq (.split #"," "Hello,How,Are,You,Today"))))
[edit] D
writefln( "Hello,How,Are,You,Today".split(",").join(".") );
[edit] E
".".rjoin("Hello,How,Are,You,Today".split(","))
[edit] Erlang
-module(tok).
-export([start/0]).
start() ->
Lst = string:tokens("Hello,How,Are,You,Today",","),
io:fwrite("~s~n", [string:join(Lst,".")]),
ok.
[edit] Factor
"Hello,How,Are,You,Today" "," split "." join print
[edit] Forth
There is no standard string split routine, but it is easily written. The results are saved temporarily to the dictionary.
: split ( str len separator len -- tokens count )
here >r 2swap
begin
2dup 2, \ save this token ( addr len )
2over search \ find next separator
while
dup negate here 2 cells - +! \ adjust last token length
2over nip /string \ start next search past separator
repeat
2drop 2drop
r> here over - ( tokens length )
dup negate allot \ reclaim dictionary
2 cells / ; \ turn byte length into token count
: .tokens ( tokens count -- )
1 ?do dup 2@ type ." ." cell+ cell+ loop 2@ type ;
s" Hello,How,Are,You,Today" s" ," split .tokens \ Hello.How.Are.You.Today
[edit] Fortran
Works with: Fortran version 90 and later
PROGRAM Example
CHARACTER(23) :: str = "Hello,How,Are,You,Today"
CHARACTER(5) :: word(5)
INTEGER :: pos1 = 1, pos2, n = 0, i
DO
pos2 = INDEX(str(pos1:), ",")
IF (pos2 == 0) THEN
n = n + 1
word(n) = str(pos1:)
EXIT
END IF
n = n + 1
word(n) = str(pos1:pos1+pos2-2)
pos1 = pos2+pos1
END DO
DO i = 1, n
WRITE(*,"(2A)", ADVANCE="NO") TRIM(word(i)), "."
END DO
END PROGRAM Example
[edit] Go
package main
import (
"fmt"
"strings"
}
func main() {
s := "Hello,How,Are,You,Today"
fmt.Println(strings.Join(strings.Split(s, ",", 0), "."))
}
[edit] Haskell
The necessary operations are unfortunately not in the standard library (yet), but simple to write:
splitBy :: (a -> Bool) -> [a] -> [[a]]
splitBy _ [] = []
splitBy f list = first : splitBy f (dropWhile f rest) where
(first, rest) = break f list
splitRegex :: Regex -> String -> [String]
joinWith :: [a] -> [[a]] -> [a]
joinWith d xs = concat $ List.intersperse d xs
-- "concat $ intersperse" can be replaced with "intercalate" from the Data.List in GHC 6.8 and later
putStrLn $ joinWith "." $ splitBy (== ',') $ "Hello,How,Are,You,Today"
-- using regular expression to split:
import Text.Regex
putStrLn $ joinWith "." $ splitRegex (mkRegex ",") $ "Hello,How,Are,You,Today"
Tokenizing can also be realized by using unfoldr and break:
*Main> mapM_ putStrLn $ takeWhile (not.null) $ unfoldr (Just . second(drop 1). break (==',')) "Hello,How,Are,You,Today"
Hello
How
Are
You
Today
- You need to import the modules Data.List and Control.Arrow
As special cases, splitting / joining by white space and by newlines are provided by the Prelude functions words / unwords and lines / unlines, respectively.
[edit] Groovy
println 'Hello,How,Are,You,Today'.split(',').join('.')
[edit] HicEst
CHARACTER string="Hello,How,Are,You,Today", list
nWords = INDEX(string, ',', 256) + 1
maxWordLength = LEN(string) - 2*nWords
ALLOCATE(list, nWords*maxWordLength)
DO i = 1, nWords
EDIT(Text=string, SePaRators=',', item=i, WordEnd, CoPyto=CHAR(i, maxWordLength, list))
ENDDO
DO i = 1, nWords
WRITE(APPend) TRIM(CHAR(i, maxWordLength, list)), '.'
ENDDO
[edit] Icon and Unicon
[edit] Icon
procedure main()
A := []
"Hello,How,Are,You,Today" ? while put(A, 1(tab(upto(',')|0),=","))
every writes(!A,".")
write()
end
Output:
->ss Hello.How.Are.You. ->
[edit] Unicon
The Icon solution also works in Unicon.
[edit] Io
"Hello,How,Are,You,Today" split(",") join(".") println
[edit] J
s=: 'Hello,How,Are,You,Today'
] t=: <;._1 ',',s
+-----+---+---+---+-----+
|Hello|How|Are|You|Today|
+-----+---+---+---+-----+
; t,&.>'.'
Hello.How.Are.You.Today.
'.' (I.','=s)}s NB. two steps combined
Hello.How.Are.You.Today
Alternatively using the system library/script strings
require 'strings'
',' splitstring s
+-----+---+---+---+-----+
|Hello|How|Are|You|Today|
+-----+---+---+---+-----+
'.' joinstring ',' splitstring s
Hello.How.Are.You.Today
splitstring and joinstring also work with longer "delimiters":
'"'([ ,~ ,) '","' joinstring ',' splitstring s
"Hello","How","Are","You","Today"
[edit] Java
Works with: Java version 1.0+
There are multiple ways to tokenize a String in Java. The first is by splitting the String into an array of Strings, and the other way is to use StringTokenizer with a delimiter. The second way given here will skip any empty tokens. So if two commas are given in line, there will be an empty string in the array given by the split function, but no empty string with the StringTokenizer object.
String toTokenize = "Hello,How,Are,You,Today";
//First way
String word[] = toTokenize.split(",");
for(int i=0; i<word.length; i++) {
System.out.print(word[i] + ".");
}
//Second way
StringTokenizer tokenizer = new StringTokenizer(toTokenize, ",");
while(tokenizer.hasMoreTokens()) {
System.out.print(tokenizer.nextToken() + ".");
}
[edit] JavaScript
Works with: Firefox version 2.0
alert( "Hello,How,Are,You,Today".split(",").join(".") );
[edit] Logo
Works with: UCB Logo
to split :str :sep
output parse map [ifelse ? = :sep ["| |] [?]] :str
end
This form is more robust, doing the right thing if there are embedded spaces.
to split :str :by [:acc []] [:w "||]
if empty? :str [output lput :w :acc]
ifelse equal? first :str :by ~
[output (split butfirst :str :by lput :w :acc)] ~
[output (split butfirst :str :by :acc lput first :str :w)]
end
? show split "Hello,How,Are,You,Today ",
[Hello How Are You Today]
[edit] Lua
require"re"
record = re.compile[[
record <- ( <field> (',' <field>)* ) -> {} (%nl / !.)
field <- <escaped> / <nonescaped>
nonescaped <- { [^,"%nl]* }
escaped <- '"' {~ ([^"] / '""' -> '"')* ~} '"'
]]
print(unpack(record:match"hello,how,are,you,today"))
[edit] M4
define(`s',`Hello,How,Are,You,Today')
define(`set',`define(`$1[$2]',`$3')')
define(`get',`defn($1[$2])')
define(`n',0)
define(`fill',
`set(a,n,$1)`'define(`n',incr(n))`'ifelse(eval($#>1),1,`fill(shift($@))')')
fill(s)
define(`j',0)
define(`show',
`ifelse(eval(j<n),1,`get(a,j).`'define(`j',incr(j))`'show')')
show
Output:
Hello.How.Are.You.Today.
[edit] MAXScript
output = ""
for word in (filterString "Hello,How,Are,You,Today" ",") do
(
output += (word + ".")
)
format "%\n" output
[edit] MMIX
sep IS ','
EOS IS 0
NL IS 10
// main registers
p IS $255
tp GREG
c GREG
t GREG
LOC Data_Segment
GREG @
Text BYTE "Hello,How,Are,You,Today",EOS
token BYTE 0
eot IS @+255
LOC #100 % main () {
Main LDA p,Text %
LDA tp,token % initialize pointers
2H LDBU c,p % DO get char
BZ c,5F % break if char == EOS
CMP t,c,sep % if char != sep then
PBNZ t,3F % store char
SET t,NL % terminate token with NL,EOS
STBU t,tp
SET t,EOS
INCL tp,1
STBU t,tp
JMP 4F % continue
3H STBU c,tp % store char
4H INCL tp,1 % update pointers
INCL p,1
JMP 2B % LOOP
5H SET t,NL % terminate last token and buffer
STBU t,tp
SET t,EOS
INCL tp,1
STBU t,tp
% next part is not really necessary
% program runs only once
% INCL tp,1 % terminate buffer
% STBU t,tp
LDA tp,token % reset token pointer
% REPEAT
2H ADD p,tp,0 % start of token
TRAP 0,Fputs,StdOut % output token
ADD tp,tp,p
INCL tp,1 % step to next token
LDBU t,tp
PBNZ t,2B % UNTIL EOB(uffer)
TRAP 0,Halt,0
Output:
~/MIX/MMIX/Progs> mmix tokenizing Hello How Are You Today
[edit] Modula-3
MODULE Tokenize EXPORTS Main;
IMPORT IO, TextConv;
TYPE Texts = REF ARRAY OF TEXT;
VAR tokens: Texts;
string := "Hello,How,Are,You,Today";
sep := SET OF CHAR {','};
BEGIN
tokens := NEW(Texts, TextConv.ExplodedSize(string, sep));
TextConv.Explode(string, tokens^, sep);
FOR i := FIRST(tokens^) TO LAST(tokens^) DO
IO.Put(tokens[i] & ".");
END;
IO.Put("\n");
END Tokenize.
[edit] MUMPS
TOKENSIn use:
NEW I,J,INP
SET INP="Hello,how,are,you,today"
NEW I FOR I=1:1:$LENGTH(INP,",") SET INP(I)=$PIECE(INP,",",I)
NEW J FOR J=1:1:I WRITE INP(J) WRITE:J'=I "."
KILL I,J,INP
QUIT
USER>D TOKENS^ROSETTA Hello.how.are.you.today
[edit] Objective-C
Works with: GNUstep
Works with: Cocoa
NSString *text = @"Hello,How,Are,You,Today";
NSArray *tokens = [text componentsSeparatedByString:@","];
NSString *result = [tokens componentsJoinedByString:@"."];
NSLog(result);
[edit] OCaml
To split on a single-character separator:
let rec split_char sep str =
try
let i = String.index str sep in
String.sub str 0 i ::
split_char sep (String.sub str (i+1) (String.length str - i - 1))
with Not_found ->
[str]
Or the tail-recursive equivalent:
(* [try .. with] structures break tail-recursion,
so we externalise it in a sub-function *)
let string_index str c =
try Some(String.index str c)
with Not_found -> None
let split_char sep str =
let rec aux acc str =
match string_index str sep with
| Some i ->
let this = String.sub str 0 i
and next = String.sub str (i+1) (String.length str - i - 1) in
aux (this::acc) next
| None ->
List.rev(str::acc)
in
aux [] str
;;
But both of these will process extraneous String.sub (so one string alloc). For N tokens there will be (N - 2) unneeded allocs. To resolve this here is a version which first gets the indices, and then extracts the tokens:
let split_char sep str =
let rec indices acc i =
try
let i = succ(String.index_from str i sep) in
indices (i::acc) i
with Not_found ->
(String.length str + 1) :: acc
in
let is = indices [0] 0 in
let rec aux acc = function
| last::start::tl ->
let w = String.sub str start (last-start-1) in
aux (w::acc) (start::tl)
| _ -> acc
in
aux [] is
Splitting on a string separator using the regular expressions library:
#load "str.cma";;
let split_str sep str =
Str.split (Str.regexp_string sep) str
There is already a library function for joining:
String.concat sep strings
[edit] Oz
for T in {String.tokens "Hello,How,Are,You,Today" &,} do
{System.printInfo T#"."}
end
[edit] Perl
print join('.', split /,/, 'Hello,How,Are,You,Today'), "\n";
[edit] Perl 6
Works with: Rakudo version #22 "Thousand Oaks"
'Hello,How,Are,You,Today'.split(',').join('.').say;
[edit] PicoLisp
(mapcar pack
(split (chop "Hello,How,Are,You,Today") ",") )
[edit] PHP
Works with: PHP version 5.x
<?php
$str = 'Hello,How,Are,You,Today';
echo implode('.', explode(',', $str));
?>
[edit] PL/I
declare s character (100) initial ('Hello,How,Are,You,Today');
declare n fixed binary (31);
n = tally(s, ',')+1;
begin;
declare table(n) character (50) varying;
declare c character (1);
declare (i, k) fixed binary (31);
table = ''; k = 1;
do i = 1 to length(s);
c = substr(s, i, 1);
if c = ',' then k = k + 1;
else table(k) = table(k) || c;
end;
/* display the table */
table = table || '.';
put skip list (string(table));
end;
[edit] Pop11
The natural solution in Pop11 uses lists.
There are built in libraries for tokenising strings, illustrated below, along with code that the user could create for the task.
First show the use of sysparse_string to break up a string and make a list of strings.
;;; Make a list of strings from a string using space as separator
lvars list;
sysparse_string('the cat sat on the mat') -> list;
;;; print the list of strings
list =>
** [the cat sat on the mat]
By giving it an extra parameter 'true' we can make it recognize numbers and produce a list of strings and numbers
lvars list;
sysparse_string('one 1 two 2 three 3 four 4', true) -> list;
;;; print the list of strings and numbers
list =>
** [one 1 two 2 three 3 four 4]
;;; check that first item is a string and second an integer
isstring(list(1))=>
** <true>
isinteger(list(2))=>
** <true>
Now show some uses of the built in procedure sys_parse_string, which allows more options:
;;; Make pop-11 print strings with quotes
true -> pop_pr_quotes;
;;;
;;; Create a string of tokens using comma as token separator
lvars str='Hello,How,Are,You,Today';
;;;
;;; Make a list of strings by applying sys_parse_string
;;; to str, using the character `,` as separator (the default
;;; separator, if none is provided, is the space character).
lvars strings;
[% sys_parse_string(str, `,`) %] -> strings;
;;;
;;; print the list of strings
strings =>
** ['Hello' 'How' 'Are' 'You' 'Today']
If {% ... %} were used instead of [% ... %] the result would be a vector (i.e. array) of strings rather than a list of strings.
{% sys_parse_string(str, `,`) %} -> strings;
;;; print the vector
strings =>
** {'Hello' 'How' 'Are' 'You' 'Today'}
It is also possible to give sys_parse_string a 'conversion' procedure, which is applied to each of the tokens. E.g. it could be used to produce a vector of numbers, using the conversion procedure 'strnumber', which converts a string to a number:
lvars numbers;
{% sys_parse_string('100 101 102 103 99.9 99.999', strnumber) %} -> numbers;
;;; the result is a vector containing integers and floats,
;;; which can be printed thus:
numbers =>
** {100 101 102 103 99.9 99.999}
Using lower level pop-11 facilities to tokenise the string:
;;; Declare and initialize variables
lvars str='Hello,How,Are,You,Today';
;;; Iterate over string
lvars ls = [], i, j = 1;
for i from 1 to length(str) do
;;; If comma
if str(i) = `,` then
;;; Prepend word (substring) to list
cons(substring(j, i - j, str), ls) -> ls;
i + 1 -> j;
endif;
endfor;
;;; Prepend final word (if needed)
if j <= length(str) then
cons(substring(j, length(str) - j + 1, str), ls) -> ls;
endif;
;;; Reverse the list
rev(ls) -> ls;
Since the task requires to use array we convert list to array
;;; Put list elements and lenght on the stack
destlist(ls);
;;; Build a vector from them
lvars ar = consvector();
;;; Display in a loop, putting trailing period
for i from 1 to length(ar) do
printf(ar(i), '%s.');
endfor;
printf('\n');
We could use list directly for printing:
for i in ls do
printf(i, '%s.');
endfor;
so the conversion to vector is purely to satisfy task formulation.
[edit] PowerBASIC
PowerBASIC has a few keywords that make parsing strings trivial: PARSE, PARSE$, and PARSECOUNT. (PARSE$, not shown here, is for extracting tokens one at a time, while PARSE extracts all tokens at once into an array. PARSECOUNT returns the number of tokens found.)
FUNCTION PBMAIN () AS LONG
DIM parseMe AS STRING
parseMe = "Hello,How,Are,You,Today"
REDIM parsed(PARSECOUNT(parseMe) - 1) AS STRING
PARSE parseMe, parsed() 'comma is default delimiter
DIM L0 AS LONG, outP AS STRING
outP = parsed(0)
FOR L0 = 1 TO UBOUND(parsed) 'could reuse parsecount instead of ubound
outP = outP & "." & parsed(L0)
NEXT
MSGBOX outP
END FUNCTION
[edit] PowerShell
Works with: PowerShell version 1
$words = "Hello,How,Are,You,Today".Split(',')
[string]::Join('.', $words)
Works with: PowerShell version 2
$words = "Hello,How,Are,You,Today" -split ','
$words -join '.'
[edit] PureBasic
As described
NewList MyStrings.s()
For i=1 To 5
AddElement(MyStrings())
MyStrings()=StringField("Hello,How,Are,You,Today",i,",")
Next i
ForEach MyStrings()
Print(MyStrings()+".")
Next
Still, easier would be
Print(ReplaceString("Hello,How,Are,You,Today",",","."))
[edit] Python
Works with: Python version 2.5
text = "Hello,How,Are,You,Today"
tokens = text.split(',')
print '.'.join(tokens)
[edit] R
text <- "Hello,How,Are,You,Today"
junk <- strsplit(text, split=",")
print(paste(unlist(junk), collapse="."))
or the one liner
paste(unlist(strsplit(text, split=",")), collapse=".")
[edit] Raven
'Hello,How,Are,You,Today' ',' split '.' join print
[edit] REBOL
print ["Original:" original: "Hello,How,Are,You,Today"]
tokens: parse original ","
dotted: "" repeat i tokens [append dotted rejoin [i "."]]
print ["Dotted: " dotted]
Output:
Original: Hello,How,Are,You,Today Dotted: Hello.How.Are.You.Today.
[edit] Ruby
puts "Hello,How,Are,You,Today".split(',').join('.')
[edit] Scala
println("Hello,How,Are,You,Today" split "," mkString ".")
[edit] Seed7
var array string: tokens is 0 times "";
tokens := split("Hello,How,Are,You,Today", ",");
[edit] Slate
('Hello,How,Are,You,Today' splitWith: $,) join &separator: '.'.
[edit] Smalltalk
|array |
array := 'Hello,How,Are,You,Today' subStrings: $,.
array fold: [:concatenation :string | concatenation, '.', string ]
Some implementations also have a join: convenience method that allows the following shorter solution:
('Hello,How,Are,You,Today' subStrings: $,) join: '.'
The solution displaying a trailing period would be:
|array |
array := 'Hello,How,Are,You,Today' subStrings: $,.
array inject: '' into: [:concatenation :string | concatenation, string, '.' ]
[edit] SNOBOL4
For this task, it's convenient to define Perl-style split( ) and join( ) functions.
define('split(chs,str)i,j,t,w2') :(split_end)
split t = table()
sp1 str pos(0) (break(chs) | rem) $ t<i = i + 1>
+ span(chs) (break(chs) | '') . w2 = w2 :s(sp1)
* t<i> = differ(str,'') str ;* Uncomment for CSnobol
split = array(i)
sp2 split<j = j + 1> = t<j> :s(sp2)f(return)
split_end
define('join(ch,a)i,') :(join_end)
join join = join a<i = i + 1>
join = join ?a<i + 1> ch :s(join)f(return)
join_end
* # Test and display
output = join('.',split(',','Hello,How,Are,You,Today'))
end
Output:
Hello.How.Are.You.Today
[edit] Standard ML
val splitter = String.tokens (fn c => c = #",");
val main = (String.concatWith ".") o splitter;
Test:
- main "Hello,How,Are,You,Today"
val it = "Hello.How.Are.You.Today" : string
[edit] Tcl
Generating a list form a string by splitting on a comma:
split string ,
Joining the elements of a list by a period:
join list .
Thus the whole thing would look like this:
puts [join [split "Hello,How,Are,You,Today" ,] .]
If you'd like to retain the list in a variable with the name "words", it would only be marginally more complex:
puts [join [set words [split "Hello,How,Are,You,Today" ,]] .]
(In general, the regexp command is also used in Tcl for tokenization of strings, but this example does not need that level of complexity.)
[edit] tr
echo 'Hello,How,Are,You,Today' | tr ',' '.'
[edit] UnixPipes
rtoken() {
(IFS=\ read A B ; echo $A; test -n "$B" && (echo $B | token) )
}
tokens() {
IFS=, read A ; echo $A | rtoken
}
echo "Hello,How,Are,You" | tokens
[edit] Ursala
A list of strings is made by separating at the commas using the library function, sep. A single string is then made by joining the list of strings with periods using the library function, mat. Each of these is a second order function parameterized by the delimiter. Character literals are preceded by a backquote.
#import std
token_list = sep`, 'Hello,How,Are,You,Today'
#cast %s
main = mat`. token_list
output:
'Hello.How.Are.You.Today'
[edit] VBScript
[edit] One liner
wscript.echo Join( Split( "Hello,How,Are,You,Today", "," ), "." )
In fact, the following Visual Basic could have done the same, as Join() is available.
[edit] Visual Basic
Translation of: PowerBASIC
Unlike PowerBASIC, there is no need to know beforehand how many tokens are in the string -- Split automagically builds the array for you.
Sub Main()
Dim parseMe As String, parsed As Variant
parseMe = "Hello,How,Are,You,Today"
parsed = Split(parseMe, ",")
Dim L0 As Long, outP As String
outP = parsed(0)
For L0 = 1 To UBound(parsed)
outP = outP & "." & parsed(L0)
Next
MsgBox outP
End Sub
[edit] Vedit macro language
Vedit does not use the concepts of array or list. Normally, the text is processed as text in an edit buffer.
However, this example shows how to split the text into multiple text registers (10, 11, 12 etc.). The contents of each text register is then displayed to user, separated by a period.
Buf_Switch(Buf_Free)
Ins_Text("Hello,How,Are,You,Today")
// Split the text into text registers 10, 11, ...
BOF
#1 = 9
Repeat(ALL) {
#1++
#2 = Cur_Pos
Search(",", ADVANCE+ERRBREAK)
Reg_Copy_Block(#1, #2, Cur_Pos-1)
}
Reg_Copy_Block(#1, #2, EOB_Pos)
// Display the list
for (#3 = 10; #3 <= #1; #3++) {
Reg_Type(#3) Message(".")
}
Buf_Quit(OK)
[edit] Zsh
str='Hello,How,Are,You,Today'
tokens=(${(s:,:)str})
print ${(j:.:)tokens}

