Anonymous user
Multisplit: Difference between revisions
Added C++23 version
imported>Spikeysnack (Added C++23 version) |
|||
(36 intermediate revisions by 20 users not shown) | |||
Line 24:
'''Extra Credit:''' provide information that indicates which separator was matched at each separation point and where in the input string that separator was matched.
=={{header|11l}}==
{{trans|Python}}
<syntaxhighlight lang="11l">F multisplit(text, sep)
V lastmatch = 0
V i = 0
V matches = ‘’
L i < text.len
L(s) sep
V j = L.index
I text[i..].starts_with(s)
I i > lastmatch
matches ‘’= text[lastmatch .< i]
matches ‘’= ‘{’s‘}’
lastmatch = i + s.len
i += s.len
L.break
L.was_no_break
i++
I i > lastmatch
matches ‘’= text[lastmatch .< i]
R matches
print(multisplit(‘a!===b=!=c’, [‘==’, ‘!=’, ‘=’]))</syntaxhighlight>
{{out}}
<pre>
a{!=}{==}b{=}{!=}c
</pre>
=={{header|Ada}}==
multisplit.adb:
<
with Ada.Text_IO;
Line 107 ⟶ 137:
Pos := String_Lists.Next (Pos);
end loop;
end Multisplit;</
{{out}}
Line 114 ⟶ 144:
=={{header|ALGOL 68}}==
<
# MODE to hold the split results #
Line 179 ⟶ 209:
SPLITINFO token = test tokens[ t ];
print( ( "token: [", text OF token, "] at: ", whole( position OF token, 0 ), " delimiter: (", delimiter OF token, ")", newline ) )
OD</
{{out}}
<pre>
Line 188 ⟶ 218:
token: [c] at: 10 delimiter: ()
</pre>
=={{header|Arturo}}==
<syntaxhighlight lang="rebol">print split.by:["==" "!=" "="] "a!===b=!=c"</syntaxhighlight>
{{out}}
<pre>a b c</pre>
=={{header|AutoHotkey}}==
<
Sep := ["==","!=", "="]
Res := StrSplit(Str, Sep)
Line 198 ⟶ 236:
for k, v in Sep
N .= (N?"|":"") "\Q" v "\E"
MsgBox % RegExReplace(str, "(.*?)(" N ")", "$1 {$2}")</
{{out}}
<pre>a,,b,,c
Line 204 ⟶ 242:
=={{header|AWK}}==
<syntaxhighlight lang="awk">
# syntax: GAWK -f MULTISPLIT.AWK
BEGIN {
Line 228 ⟶ 266:
exit(0)
}
</syntaxhighlight>
{{out}}
<pre>
Line 240 ⟶ 278:
separators: '!=' '==' '=' '!='
</pre>
=={{header|BBC BASIC}}==
<
sep$() = "==", "!=", "="
PRINT "String splits into:"
Line 264 ⟶ 303:
ENDIF
UNTIL m% = LEN(s$)
= o$ + """" + MID$(s$, p%) + """"</
{{out}}
<pre>
Line 275 ⟶ 314:
=={{header|Bracmat}}==
This is a surprisingly difficult task to solve in Bracmat, because in a naive solution using a alternating pattern ("=="|"!="|"=") the shorter pattern <code>"="</code> would have precedence over <code>"=="</code>. In the solution below the function <code>oneOf</code> iterates (by recursion) over the operators, trying to match the start of the current subject string <code>sjt</code> with one operator at a time, until success or reaching the end of the list with operators, whichever comes first. If no operator is found at the start of the current subject string, the variable <code>nonOp</code> is extended with one byte, thereby shifting the start of the current subject string one byte to the right. Then a new attempt is made to find an operator. This is repeated until either an operator is found, in which case the unparsed string is restricted to the part of the input after the found operator, or no operator is found, in which case the <code>whl</code> loop terminates.
<
= operator
. !arg:%?operator ?arg
Line 292 ⟶ 331:
& put$!unparsed
& put$\n
);</
{{out}}
<pre>a {!=} {==} b {=} {!=} c</pre>
Line 298 ⟶ 337:
=={{header|C}}==
What kind of silly parsing is this?
<
#include <string.h>
Line 321 ⟶ 360:
return 0;
}</
{{out}}<syntaxhighlight lang="text">a{!=}{==}b{=}{!=}c</
=={{header|C sharp}}==
Line 352 ⟶ 367:
'''Extra Credit Solution'''
<
using System.Collections.Generic;
using System.Linq;
Line 408 ⟶ 423:
}
}
}</
{{out}}
<pre>a{"!=", (1, 3)}{"==", (3, 5)}b{"=", (6, 7)}{"!=", (7, 9)}c
</pre>
=={{header|C++}}==
using the Boost library tokenizer!
<syntaxhighlight lang="cpp">#include <iostream>
#include <boost/tokenizer.hpp>
#include <string>
int main( ) {
std::string str( "a!===b=!=c" ) , output ;
typedef boost::tokenizer<boost::char_separator<char> > tokenizer ;
boost::char_separator<char> separator ( "==" , "!=" ) , sep ( "!" ) ;
tokenizer mytok( str , separator ) ;
tokenizer::iterator tok_iter = mytok.begin( ) ;
for ( ; tok_iter != mytok.end( ) ; ++tok_iter )
output.append( *tok_iter ) ;
tokenizer nexttok ( output , sep ) ;
for ( tok_iter = nexttok.begin( ) ; tok_iter != nexttok.end( ) ;
++tok_iter )
std::cout << *tok_iter << " " ;
std::cout << '\n' ;
return 0 ;
}</syntaxhighlight>
{{out}}
<PRE>a b c</PRE>
===Without external libraries===
<syntaxhighlight lang="c++">
#include <cstdint>
#include <iomanip>
#include <iostream>
#include <string>
#include <vector>
struct Split_data {
std::string segment;
int32_t index;
std::string separator;
};
std::vector<Split_data> multi_split(const std::string& text, const std::vector<std::string>& separators) {
std::vector<Split_data> result;
uint64_t i = 0;
std::string segment = "";
while ( i < text.length() ) {
bool found = false;
for ( std::string separator : separators ) {
if ( text.substr(i, separator.length()) == separator ) {
found = true;
result.emplace_back(segment, i, separator);
i += separator.length();
segment = "";
break;
}
}
if ( ! found ) {
segment += text[i];
i += 1;
}
}
result.emplace_back(segment, i, "");
return result;
}
int main() {
for ( Split_data splits : multi_split("a!===b=!=c", { "==", "!=", "=" } ) ) {
std::cout << std::left << std::setw(3) << "\"" + splits.segment + "\""
<< std::setw(18) << " ( split with \"" + splits.separator + "\""
<< " at index " << splits.index << " )" << std::endl;
}
}
</syntaxhighlight>
{{ out }}
<pre>
"a" ( split with "!=" at index 1 )
"" ( split with "==" at index 3 )
"b" ( split with "=" at index 6 )
"" ( split with "!=" at index 7 )
"c" ( split with "" at index 10 )
</pre>
===C++23===
<syntaxhighlight lang="c++">
/* multisplit.cpp */
#include <features.h>
#include <iostream>
#include <string>
#include <vector>
#include <format>
/* C++23 example for Multisplit 6 Jan 2024
email:
spikeysnack@gmail.com
compile:
g++-13 -std=c++23 -Wall -o multisplit multisplit.cpp
*/
// extra info
#define _EXTRA
// aliases
using std::string;
using std::vector;
using str_vec = vector<string>;
using std::cout;
// constants
constexpr static const size_t npos = -1;
// function signatures
string replace_all(string& str, string& remove, string& insert );
str_vec split_on_delim(string& str, const string& delims);
str_vec Multisplit( string& input, const str_vec& seps);
// functions
// replace all substrings in string
// a = "dogs and cats and dogs and cats and birds"
// replace(a, "cats" , "fish");
// ==> "dogs and fish and dogs and fish and birds"
string replace_all(string& str,
const string& remove,
const string& insert ){
string s{str};
string::size_type pos = 0;
#ifdef _EXTRA
const string rightarrow{"\u2B62"}; //unicode arrow
auto ex = std::format("match: {}\t{} ", remove, rightarrow);
std::cerr << ex;
#endif
while ((pos = s.find(remove, pos)) != npos){
s.replace(pos, remove.size(), insert);
pos++;
}
return s;
}
// create a string vector from a string,
// split on a delimiter string
// x = "ab:cde:fgh:ijk"
// split_on_delim( x, ":");
// ==> { "ab", "cde", "fgh", "ijk" }
str_vec split_on_delim(string& str, const string& delims) {
string::size_type beg, pos = 0;
str_vec sv;
string tmp;
while ( (beg = str.find_first_not_of(delims, pos)) != npos ){
pos = str.find_first_of(delims, beg + 1);
tmp = { str.substr(beg, pos - beg) };
sv.push_back(tmp);
}
return sv;
}
str_vec Multisplit( string& input, const str_vec& seps) {
string s1{input};
str_vec sv;
for( auto sep : seps){
s1 = replace_all(s1, sep, "^"); // space sep
#ifdef _EXTRA
std::cerr << s1 << "\n";
#endif
sv = split_on_delim(s1, "^"); // split
}
return sv;
}
/* main program */
int main(){
string sample{"a!===b=!=c"};
const str_vec seps {"!=", "==", "="};
auto s = std::format("sample: \t{}\n", sample);
cout << s;
auto sv = Multisplit(sample, seps);
for( auto s : sv){
auto out = std::format( "{}\t" , s);
cout << out;
}
cout << "\n";
return 0;
}
// end
</syntaxhighlight>
{{ out }}
<pre>
sample: a!===b=!=c
match: != ⭢ a^==b=^c
match: == ⭢ a^^b=^c
match: = ⭢ a^^b^^c
a b c
</pre>
=={{header|CoffeeScript}}==
<
multi_split = (text, separators) ->
# Split text up, using separators to break up text and discarding
Line 446 ⟶ 683:
console.log multi_split 'a!===b=!=c', ['==', '!=', '='] # [ 'a', '', 'b', '', 'c' ]
console.log multi_split '', ['whatever'] # [ '' ]
</syntaxhighlight>
=={{header|D}}==
<
string[] multiSplit(in string s, in string[] divisors) pure nothrow {
Line 486 ⟶ 723:
.join(" {} ")
.writeln;
}</
{{out}} (separator locations indicated by braces):
<pre>a {} {} b {} {} c</pre>
=={{header|Delphi}}==
{{libheader| System.SysUtils}}
<syntaxhighlight lang="delphi">
program Multisplit;
{$APPTYPE CONSOLE}
uses
System.SysUtils;
begin
write('[');
for var s in 'a!===b=!=c'.Split(['==', '!=', '=']) do
write(s.QuotedString('"'), ' ');
write(']');
readln;
end.</syntaxhighlight>
{{out}}
<pre>["a" "" "b" "" "c" ]</pre>
=={{header|Elixir}}==
{{trans|Erlang}}
<
["a", "", "", "b", "", "c"]</
=={{header|Erlang}}==
Line 500 ⟶ 755:
["a",[],"b",[],"c"]
</pre>
=={{header|F_Sharp|F#}}==
If we ignore the "Extra Credit" requirements and skip 'ordered separators' condition (i.e. solving absolute different task), this is exactly what one of the overloads of .NET's <code>String.Split</code> method does. Using F# Interactive:
<syntaxhighlight lang="fsharp">> "a!===b=!=c".Split([|"=="; "!="; "="|], System.StringSplitOptions.None);;
val it : string [] = [|"a"; ""; "b"; ""; "c"|]
> "a!===b=!=c".Split([|"="; "!="; "=="|], System.StringSplitOptions.None);;
val it : string [] = [|"a"; ""; ""; "b"; ""; "c"|]</syntaxhighlight>
<code>System.StringSplitOptions.None</code> specifies that empty strings should be included in the result.
=={{header|Factor}}==
<syntaxhighlight lang="factor">USING: arrays fry kernel make sequences ;
IN: rosetta-code.multisplit
: first-subseq ( seq separators -- n separator )
tuck
[ [ subseq-index ] dip 2array ] withd map-index sift-keys
[ drop f f ] [ [ first ] infimum-by first2 rot nth ] if-empty ;
: multisplit ( string separators -- seq )
'[
[ dup _ first-subseq dup ] [
length -rot cut-slice [ , ] dip swap tail-slice
] while 2drop ,
] { } make ;</syntaxhighlight>
{{out}}
<pre>> "a!===b=!=c" { "==" "!=" "=" } multisplit [ >string ] map .
{ "a" "" "b" "" "c" }</pre>
=={{header|FreeBASIC}}==
FreeBASIC does not have a built in 'split' function so we need to write one:
<
Sub Split(s As String, sepList() As String, result() As String, removeEmpty As Boolean = False, showSepInfo As Boolean = False)
Line 576 ⟶ 865:
Print
Print "Press any key to quit"
Sleep</
{{out}}
Line 595 ⟶ 884:
5 : c
</pre>
=={{header|Go}}==
<
import (
Line 633 ⟶ 911:
func main() {
fmt.Printf("%q\n", ms("a!===b=!=c", []string{"==", "!=", "="}))
}</
{{out}}
<pre>
Line 640 ⟶ 918:
=={{header|Haskell}}==
<
intercalate,
isPrefixOf,
stripPrefix,
)
------------------------ MULTISPLIT ----------------------
multisplit :: [String] -> String -> [(String, String, Int)]
multisplit delims = go [] 0
where
go acc pos [] = [(acc, [], pos)]
go acc pos l@(s : sx) =
case trysplit delims l of
Nothing -> go (s : acc) (pos + 1) sx
Just (d, sxx) ->
(acc, d, pos) :
go [] (pos + genericLength d) sxx
trysplit :: [String] ->
trysplit
case filter (`isPrefixOf` s) delims of
[] -> Nothing
(d : _) -> Just (d, (\(Just x) -> x) $ stripPrefix d s)
--------------------------- TEST -------------------------
main :: IO ()
main = do
let parsed = multisplit
mapM_
putStrLn
[ "split string:",
]</
{{out}}
<pre>split string:
Line 676 ⟶ 963:
Or as a fold:
<
import Data.Bool (bool)
multiSplit :: [String] -> String -> [(String, String, Int)]
multiSplit ds s =
let
(\(tokens, parts, offset) (c, i) ->
let inDelim = offset > i
in
(bool (c : tokens) tokens inDelim,
(\x -> ([], (tokens, x, i)
([], [], 0)
(zip s [0 ..
in reverse $ (ts, [],
main :: IO ()
main = print $ multiSplit ["==", "!=", "="] "a!===b=!=c"</
{{Out}}
<pre>[("a","!=",1),("","==",3),("b","=",6),("","!=",7),("c","",10)]</pre>
=={{header|Icon}} and {{header|Unicon}}==
<
s := "a!===b=!=c"
# just list the tokens
Line 725 ⟶ 1,005:
procedure arb()
suspend .&subject[.&pos:&pos <- &pos to *&subject + 1]
end</
{{out}}
Line 732 ⟶ 1,012:
=={{header|J}}==
<syntaxhighlight lang
'
while.
while. j>k{begin do. k=.k+1 end.
'b s'=. k{bs NB. character index where separator appears, separator index
if. _=b do. r,.(j}.x);'';'' return. end.
txt=. (j + i. b-j){x
j=. b+s{len
r=.r,.txt;(s{::y);b
end.
}}</syntaxhighlight>
Explanation:
First find all potentially relevant separator instances, and sort them in increasing order, by starting location and separator index. <code>sep</code> is separator index, and <code>begin</code> is starting
Then, loop through the possibilities, skipping over those separators which would overlap with previously used separators.
The result consists of
Example use:
<
┌──┬──┬─┬──┬─┐
│a │ │b│ │c│
├──┼──┼─┼──┼─┤
│!=│==│=│!=│ │
├──┼──┼─┼──┼─┤
│1 │3 │6│7 │ │
└──┴──┴─┴──┴─┘
S multisplit '=';'!=';'=='
┌──┬─┬─┬─┬──┬─┐
│a
├──┼─┼─┼─┼──┼─┤
│!=│=│=│=│!=│ │
├──┼─┼─┼─┼──┼─┤
│1 │3│4│6│7 │ │
└──┴─┴─┴─┴──┴─┘
'X123Y' multisplit '1';'12';'123';'23';'3'
┌─┬──┬─┐
├─┼──┼─┤
├─┼──┼─┤
│1│2 │ │
└─┴──┴─┘</syntaxhighlight>
=={{header|Java}}==
<
public class MultiSplit {
Line 808 ⟶ 1,095:
return result;
}
}</
<pre>Regex split:
Line 820 ⟶ 1,107:
Based on Ruby example.
{{libheader|Underscore.js}}
<
return text.replace(/[-[\]{}()*+?.,\\^$|#\s]/g, "\\$&");
}
Line 827 ⟶ 1,114:
var sep_regex = RegExp(_.map(seps, function(sep) { return RegExp.escape(sep); }).join('|'));
return string.split(sep_regex);
}</
===ES6===
Line 833 ⟶ 1,120:
{{Trans|Haskell}} (Multisplit by fold example)
<
/// Delimiter list -> String -> list of parts, delimiters, offsets
Line 951 ⟶ 1,238:
multiSplit(delims, strTest)
);
})();</
{{Out}}
<pre>[
Line 989 ⟶ 1,276:
Both helper functions could be made inner functions of the main function, but are kept separate here for clarity.
<
# a single character from the input string.
# The input should be a nonempty string, and delims should be
Line 1,035 ⟶ 1,322:
then .[0:length-1] + [ .[length-1] + $x ]
else . + [$x]
end ) ;</
'''Examples'''
("a!===b=!=c",
Line 1,046 ⟶ 1,333:
=={{header|Julia}}==
From REPL:
<
julia> split(s, r"==|!=|=")
5-element Array{SubString{String},1}:
Line 1,054 ⟶ 1,341:
""
"c"
</syntaxhighlight>
=={{header|Kotlin}}==
<
fun main(args: Array<String>) {
Line 1,088 ⟶ 1,374:
println("\nThe delimiters matched and the indices at which they occur are:")
println(matches)
}</
{{out}}
Line 1,101 ⟶ 1,387:
=={{header|Lua}}==
The function I've written here is really excessive for this task but it has historically been hard to find example code for a good Lua split function on the Internet. This one behaves the same way as Julia's Base.split and I've included a comment describing its precise operation.
<
Returns a table of substrings by splitting the given string on
occurrences of the given character delimiters, which may be specified
Line 1,153 ⟶ 1,439:
for k, v in pairs(multisplit) do
print(k, v)
end</
{{Out}}
<pre>Key Value
Line 1,166 ⟶ 1,452:
Code from BBC BASIC with little changes to fit in M2000.
<syntaxhighlight lang="m2000 interpreter">
Module CheckIt {
DIM sep$()
Line 1,196 ⟶ 1,482:
}
CheckIt
</syntaxhighlight>
=={{header|Mathematica}}/{{header|Wolfram Language}}==
Just use the built-in function "StringSplit":
<
{{Out}}
<pre>{a,,b,,c}</pre>
=={{header|
<syntaxhighlight lang="miniscript">parseSep = function(s, pats)
result = []
startPos = 0
pos = 0
while pos < s.len
for pat in pats
if s[pos : pos+pat.len] != pat then continue
result.push s[startPos : pos]
result.push "{" + pat + "}"
startPos = pos + pat.len
pos = startPos - 1
break
end for
pos = pos + 1
end while
return result
end function
print parseSep("a!===b=!=c", ["==", "!=", "="])</syntaxhighlight>
{{Out}}
<pre>["a", "{!=}", "", "{==}", "b", "{=}", "", "{!=}"]</pre>
=={{header|Nim}}==
<syntaxhighlight lang="nim">import strutils
iterator tokenize(text: string; sep: openArray[string]): tuple[token: string, isSep: bool] =
var i, lastMatch = 0
while i < text.len:
for j, s in sep:
if text[i..text.high].startsWith s:
if i > lastMatch: yield (text[lastMatch ..
yield (s, true)
lastMatch = i + s.len
Line 1,219 ⟶ 1,527:
break
inc i
if i > lastMatch: yield (text[lastMatch ..
for token, isSep in "a!===b=!=c".tokenize(["==", "!=", "="]):
if isSep: stdout.write '{',token,'}'
else: stdout.write token
echo ""</
{{out}}
<pre>a{!=}{==}b{=}{!=}c</pre>
Line 1,230 ⟶ 1,539:
=={{header|Perl}}==
<
my ($sep, $string, %opt) = @_ ;
$sep = join '|', map quotemeta($_), @$sep;
Line 1,240 ⟶ 1,549:
print "\n";
print "'$_' " for multisplit ['==','!=','='], "a!===b=!=c", keep_separators => 1;
print "\n";</
{{Out}}
Line 1,247 ⟶ 1,556:
'a' '!=' '' '==' 'b' '=' '' '!=' 'c'
</pre>
=={{header|Phix}}==
<!--<syntaxhighlight lang="phix">(phixonline)-->
<span style="color: #008080;">with</span> <span style="color: #008080;">javascript_semantics</span>
<span style="color: #008080;">procedure</span> <span style="color: #000000;">multisplit</span><span style="color: #0000FF;">(</span><span style="color: #004080;">string</span> <span style="color: #000000;">text</span><span style="color: #0000FF;">,</span> <span style="color: #004080;">sequence</span> <span style="color: #000000;">delims</span><span style="color: #0000FF;">)</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">k</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">1</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">kdx</span>
<span style="color: #008080;">while</span> <span style="color: #004600;">true</span> <span style="color: #008080;">do</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">kmin</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">0</span>
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">delims</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">ki</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">match</span><span style="color: #0000FF;">(</span><span style="color: #000000;">delims</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">],</span><span style="color: #000000;">text</span><span style="color: #0000FF;">,</span><span style="color: #000000;">k</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">ki</span><span style="color: #0000FF;">!=</span><span style="color: #000000;">0</span> <span style="color: #008080;">then</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">kmin</span><span style="color: #0000FF;">=</span><span style="color: #000000;">0</span> <span style="color: #008080;">or</span> <span style="color: #000000;">ki</span><span style="color: #0000FF;"><</span><span style="color: #000000;">kmin</span> <span style="color: #008080;">then</span>
<span style="color: #000000;">kmin</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">ki</span>
<span style="color: #000000;">kdx</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">i</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
<span style="color: #004080;">string</span> <span style="color: #000000;">token</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">text</span><span style="color: #0000FF;">[</span><span style="color: #000000;">k</span><span style="color: #0000FF;">..</span><span style="color: #000000;">kmin</span><span style="color: #0000FF;">-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">],</span>
<span style="color: #000000;">delim</span> <span style="color: #0000FF;">=</span> <span style="color: #008080;">iff</span><span style="color: #0000FF;">(</span><span style="color: #000000;">kmin</span><span style="color: #0000FF;">=</span><span style="color: #000000;">0</span><span style="color: #0000FF;">?</span><span style="color: #008000;">""</span><span style="color: #0000FF;">:</span><span style="color: #7060A8;">sprintf</span><span style="color: #0000FF;">(</span><span style="color: #008000;">", delimiter (%s) at %d"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">delims</span><span style="color: #0000FF;">[</span><span style="color: #000000;">kdx</span><span style="color: #0000FF;">],</span><span style="color: #000000;">kmin</span><span style="color: #0000FF;">}))</span>
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"Token: [%s] at %d%s\n"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">token</span><span style="color: #0000FF;">,</span><span style="color: #000000;">k</span><span style="color: #0000FF;">,</span><span style="color: #000000;">delim</span><span style="color: #0000FF;">})</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">kmin</span><span style="color: #0000FF;">=</span><span style="color: #000000;">0</span> <span style="color: #008080;">then</span> <span style="color: #008080;">exit</span> <span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #000000;">k</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">kmin</span><span style="color: #0000FF;">+</span><span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">delims</span><span style="color: #0000FF;">[</span><span style="color: #000000;">kdx</span><span style="color: #0000FF;">])</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">procedure</span>
<span style="color: #000000;">multisplit</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"a!===b=!=c"</span><span style="color: #0000FF;">,{</span><span style="color: #008000;">"=="</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"!="</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"="</span><span style="color: #0000FF;">})</span>
<!--</syntaxhighlight>-->
{{out}}
<pre>
Line 1,309 ⟶ 1,593:
=={{header|PicoLisp}}==
<
(setq Sep (mapcar chop Sep))
(make
Line 1,327 ⟶ 1,611:
(println (multisplit "a!===b=!=c" '("==" "!=" "=")))
(println (multisplit "a!===b=!=c" '("=" "!=" "==")))</
{{out}}
<pre>("a" (1 "!=") NIL (3 "==") "b" (6 "=") NIL (7 "!=") "c")
Line 1,333 ⟶ 1,617:
=={{header|Pike}}==
<
array sep = ({"==", "!=", "=" });
Line 1,349 ⟶ 1,633:
result;
Result: ({"a", ({"!=", 1}), "", ({"==", 3}), "b", ({"=", 6}), "", ({"!=", 7}), "c"})</
=={{header|PowerShell}}==
<syntaxhighlight lang="powershell">
$string = "a!===b=!=c"
$separators = [regex]"(==|!=|=)"
Line 1,364 ⟶ 1,648:
$matchInfo
</syntaxhighlight>
{{Out}}
<pre>
Line 1,376 ⟶ 1,660:
=={{header|Prolog}}==
Works with SWI-Prolog.
<
{!},
[].
Line 1,416 ⟶ 1,700:
my_sort(<, (N, N1, _), (N, N2, _)) :-
N1 > N2.
</syntaxhighlight>
{{out}}
<pre>?- multisplit(['==', '!=', '='], 'ax!===b=!=c', Lst, []).
Line 1,423 ⟶ 1,707:
=={{header|Python}}==
===Procedural===
<
>>> def ms2(txt="a!===b=!=c", sep=["==", "!=", "="]):
if not txt or not sep:
Line 1,439 ⟶ 1,723:
['a', (1, 1), '', (0, 3), 'b', (2, 6), '', (1, 7), 'c']
>>> ms2(txt="a!===b=!=c", sep=["=", "!=", "=="])
['a', (1, 1), '', (0, 3), '', (0, 4), 'b', (0, 6), '', (1, 7), 'c']</
'''Inspired by C-version'''
<
lastmatch = i = 0
matches = []
Line 1,465 ⟶ 1,749:
>>> multisplit('a!===b=!=c', ['!=', '==', '='])
['a', (0, 1), (1, 3), 'b', (2, 6), (0, 7), 'c']
</syntaxhighlight>
'''Alternative version'''
<
return List.index(min(List))
Line 1,534 ⟶ 1,818:
S = "a!===b=!=c"
multisplit(S, ["==", "!=", "="]) # output: ['a', [1, 1], '', [0, 3], 'b', [2, 6], '', [1, 7], 'c']
multisplit(S, ["=", "!=", "=="]) # output: ['a', [1, 1], '', [0, 3], '', [0, 4], 'b', [0, 6], '', [1, 7], 'c']</
===Functional===
In terms of a fold (reduce), without use of regular expressions:
{{Works with|Python|3.7}}
<syntaxhighlight lang="python">'''Multisplit'''
from functools import reduce
# multiSplit :: [String] -> String -> [(String, String, Int)]
def multiSplit(separators):
'''List of triples:
[(token, separator, start index of separator].
'''
def go(s):
def f(tokensPartsOffset, ic):
tokens, parts, offset = tokensPartsOffset
i, c = ic
inDelim = offset > i
return maybe(
(
tokens if inDelim
else c + tokens, parts, offset
)
)(
lambda x: (
'',
[(tokens, x, i)] + parts,
i + len(x)
)
)(
None if inDelim else find(
s[i:].startswith
)(separators)
)
ts, ps, _ = reduce(f, enumerate(s), ('', [], 0))
return list(reversed(ps)) + [(ts, '', len(s))]
return go
# ------------------------- TEST -------------------------
# main :: IO ()
def main():
'''String split on three successive separators.'''
print(
multiSplit(['==', '!=', '='])(
'a!===b=!=c'
)
)
# ------------------ GENERIC FUNCTIONS -------------------
# find :: (a -> Bool) -> [a] -> (a | None)
def find(p):
'''Just the first element in the list that matches p,
or None if no elements match.
'''
def go(xs):
try:
return next(x for x in xs if p(x))
except StopIteration:
return None
return go
# maybe :: b -> (a -> b) -> (a | None) -> b
def maybe(v):
'''Either the default value v, if m is None,
or the application of f to x.
'''
return lambda f: lambda m: v if (
None is m
) else f(m)
# MAIN ---
if __name__ == '__main__':
main()</syntaxhighlight>
{{Out}}
<pre>[('a', '!=', 1), ('', '==', 3), ('b', '=', 6), ('', '!=', 7), ('c', '', 10)]</pre>
=={{header|Racket}}==
<
#lang racket
(regexp-match* #rx"==|!=|=" "a!===b=!=c" #:gap-select? #t #:match-select values)
;; => '("a" ("!=") "" ("==") "b" ("=") "" ("!=") "c")
</syntaxhighlight>
=={{header|Raku}}==
(formerly Perl 6)
<syntaxhighlight lang="raku" line>sub multisplit($str, @seps) { $str.split: / ||@seps /, :v }
my @chunks = multisplit 'a!===b=!=c==d', < == != = >;
# Print the strings.
say @chunks».Str.raku;
# Print the positions of the separators.
for grep Match, @chunks -> $s {
say "{$s.fmt: '%2s'} from {$s.from.fmt: '%2d'} to {$s.to.fmt: '%2d'}";
}</syntaxhighlight>
{{out}}
<pre>("a", "!=", "", "==", "b", "=", "", "!=", "c", "==", "d")
!= from 1 to 3
== from 3 to 5
= from 6 to 7
!= from 7 to 9
== from 10 to 12</pre>
Using the array <tt>@seps</tt> in a pattern automatically does alternation.
By default this would do longest-term matching (that is, <tt>|</tt> semantics), but we can force it to do left-to-right matching by embedding the array in a short-circuit alternation (that is, <tt>||</tt> semantics).
As it happens, with the task's specified list of separators, it doesn't make any difference.
<p>
Raku automatically returns Match objects that will stringify to the matched pattern, but can also be interrogated for their match positions, as illustrated above by post-processing the results two different ways.
=={{header|REXX}}==
<
parse arg $ /*obtain optional string from the C.L. */
if $='' then $= "a!===b=!=c" /*None specified? Then use the default*/
Line 1,570 ⟶ 1,962:
$=changestr(null, $, showNull) /* ··· showing of "null" chars. */
say 'new string:' $ /*now, display the new string to term. */
/*stick a fork in it, we're all done. */</
Some older REXXes don't have a '''changestr''' BIF, so one is included here ──► [[CHANGESTR.REX]].
<br><br>'''output''' when using the default input:
Line 1,579 ⟶ 1,971:
=={{header|Ring}}==
<
# Project : Multisplit
Line 1,589 ⟶ 1,981:
see "" + n + ": " + substr(str, 1, pos-1) + " Sep By: " + sep[n] + nl
next
</syntaxhighlight>
Output:
<pre>
Line 1,602 ⟶ 1,994:
The simple method, using a regular expression to split the text.
<
separators = ['==', '!=', '=']
Line 1,610 ⟶ 2,002:
p multisplit_simple(text, separators) # => ["a", "", "b", "", "c"]
</syntaxhighlight>
The version that also returns the information about the separations.
<
sep_regex = Regexp.union(separators)
separator_info = []
Line 1,631 ⟶ 2,023:
p multisplit(text, separators)
# => [["a", "", "b", "", "c"], [["!=", 1], ["==", 3], ["=", 6], ["!=", 7]]]</
Also demonstrating a method to rejoin the string given the separator information.
<
str = info[0].zip(info[1])[0..-2].inject("") {|str, (piece, (sep, idx))| str << piece << sep}
str << info[0].last
Line 1,641 ⟶ 2,033:
p multisplit_rejoin(multisplit(text, separators)) == text
# => true</
=={{header|Run BASIC}}==
<
sep$ = "=== != =! b =!="
Line 1,652 ⟶ 2,044:
split$ = word$(str$,1,theSep$)
print i;" ";split$;" Sep By: ";theSep$
wend</
{{out}}
<pre>1 a! Sep By: ===
Line 1,661 ⟶ 2,053:
=={{header|Scala}}==
<
def multiSplit(str:String, sep:Seq[String])={
def findSep(index:Int)=sep find (str startsWith (_, index))
Line 1,676 ⟶ 2,068:
}
println(multiSplit("a!===b=!=c", Seq("!=", "==", "=")))</
{{out}}
<pre>List(a, , b, , c)</pre>
Line 1,682 ⟶ 2,074:
=={{header|Scheme}}==
{{works with|Gauche Scheme}}
<
(use srfi-42)
Line 1,696 ⟶ 2,088:
(define (glean shards)
(list-ec (: x (index i) shards)
(if (even? i)) x))</
<b>Testing:</b>
<pre>
Line 1,705 ⟶ 2,097:
("a" "!=" "" "==" "b" "=" "" "!=" "c")
</pre>
=={{header|SenseTalk}}==
First approach, using line delimiters. Lines are delimited by an array of separator strings, normally [CRLF, LF, CR, lineSeparator(0x2028), paragraphSeparator(0x2029)]. Supplying an alternate set of delimiters lets us split a string by a different (ordered) set of strings:
<syntaxhighlight lang="sensetalk">set source to "a!===b=!=c"
set separators to ["==", "!=", "="]
put each line delimited by separators of source</syntaxhighlight>
Output:
<syntaxhighlight lang="sensetalk">(a,,b,,c)</syntaxhighlight>
Second approach, using a pattern. SenseTalk's pattern language lets us define a pattern (a regex) which can then be used to split the string and also to display the actual separators that were found.
<syntaxhighlight lang="sensetalk">set source to "a!===b=!=c"
set separatorPattern to <"==" or "!=" or "=">
put source split by separatorPattern
put each occurrence of separatorPattern in source
</syntaxhighlight>
Output:
<syntaxhighlight lang="sensetalk">(a,,b,,c)
(!=,==,=,!=)</syntaxhighlight>
=={{header|Sidef}}==
<
sep = sep.map{.escape}.join('|');
var re = Regex.new(keep_sep ? "(#{sep})" : sep);
Line 1,715 ⟶ 2,129:
[false, true].each { |bool|
say multisplit(%w(== != =), 'a!===b=!=c', keep_sep: bool);
}</
{{out}}
<pre>
Line 1,721 ⟶ 2,135:
["a", "!=", "", "==", "b", "=", "", "!=", "c"]
</pre>
=={{header|Swift}}==
Swift strings are purposefully not index by integers to avoid confusion and performance traps when dealing with unicode. As such the indexes returned by this method are not very helpful to a human reader, but can be used to manipulate the original string.
{{trans|Python}}
<syntaxhighlight lang="swift">extension String {
func multiSplit(on seps: [String]) -> ([Substring], [(String, (start: String.Index, end: String.Index))]) {
var matches = [Substring]()
var matched = [(String, (String.Index, String.Index))]()
var i = startIndex
var lastMatch = startIndex
main: while i != endIndex {
for sep in seps where self[i...].hasPrefix(sep) {
if i > lastMatch {
matches.append(self[lastMatch..<i])
} else {
matches.append("")
}
lastMatch = index(i, offsetBy: sep.count)
matched.append((sep, (i, lastMatch)))
i = lastMatch
continue main
}
i = index(i, offsetBy: 1)
}
if i > lastMatch {
matches.append(self[lastMatch..<i])
}
return (matches, matched)
}
}
let (matches, matchedSeps) = "a!===b=!=c".multiSplit(on: ["==", "!=", "="])
print(matches, matchedSeps.map({ $0.0 }))</syntaxhighlight>
{{out}}
<pre>["a", "", "b", "", "c"] ["!=", "==", "=", "!="]</pre>
=={{header|Tcl}}==
This simple version does not retain information about what the separators were:
<
set map {}; foreach s $sep {lappend map $s "\uffff"}
return [split [string map $map $text] "\uffff"]
}
puts [simplemultisplit "a!===b=!=c" {"==" "!=" "="}]</
{{out}}
<pre>a {} b {} c</pre>
Line 1,735 ⟶ 2,197:
to the match information (because the two collections of information
are of different lengths).
<
foreach s $sep {lappend sr [regsub -all {\W} $s {\\&}]}
set sepRE [join $sr "|"]
Line 1,748 ⟶ 2,210:
}
return [list [lappend pieces [string range $text $start end]] $match]
}</
Demonstration code:
<
set matchers {"==" "!=" "="}
lassign [multisplit $input $matchers] substrings matchinfo
puts $substrings
puts $matchinfo</
{{out}}
<pre>
Line 1,771 ⟶ 2,233:
The <code>:gap 0</code> makes the horizontal collect repetitions strictly adjacent. This means that <code>coll</code> will quit when faced with a nonmatching suffix portion of the data rather than scan forward (no gap allowed!). This creates an opportunity for the <code>tail</code> variable to grab the suffix which remains, which may be an empty string.
<
@(coll :gap 0)@(choose :shortest tok)@\
@tok@{sep /==/}@\
Line 1,781 ⟶ 2,243:
@(output)
@(rep)"@tok" {@sep} @(end)"@tail"
@(end)</
Runs:
Line 1,806 ⟶ 2,268:
{{trans|Racket}}
<
("a" "!=" "" "==" "b" "=" "" "!=" "c")</
Here the third boolean argument means "keep the material between the tokens", which in the Racket version seems to be requested by the argument <code>#:gap-select? #:t</code>.
Line 1,813 ⟶ 2,275:
=={{header|UNIX Shell}}==
{{works with|bash}}
<
local str=$1
shift
Line 1,841 ⟶ 2,303:
if [[ $original == $recreated ]]; then
echo "successfully able to recreate original string"
fi</
{{out}}
Line 1,852 ⟶ 2,314:
=={{header|VBScript}}==
<syntaxhighlight lang="vb">
Function multisplit(s,sep)
arr_sep = Split(sep,"|")
Line 1,879 ⟶ 2,341:
WScript.StdOut.WriteLine
WScript.StdOut.Write "Extra Credit: " & multisplit_extra("a!===b=!=c","!=|==|=")
WScript.StdOut.WriteLine</
{{out}}
<pre>
Line 1,885 ⟶ 2,347:
Extra Credit: a(!=)(==)b(=)(!=)c
</pre>
=={{header|V (Vlang)}}==
Without using additional libraries or regular expressions:
<syntaxhighlight lang="v (vlang)">fn main() {
str := "a!===b=!=c"
sep := ["==","!=","="]
println(ms(str, sep))
}
fn ms(txt string, sep []string) (map[int]string, []string, []string) {
mut ans, mut extra := []string{}, []string{}
mut place := map[int]string{}
mut temp :=''
mut vlen := 0
for slen in sep {if slen.len > vlen {vlen = slen.len}}
for cidx, cval in txt {
temp += cval.ascii_str()
for value in sep {
if temp.contains(value) && temp.len >= vlen {
place[cidx] = value
temp =''
}
}
}
for tidx, tval in txt {
for pkey, pval in place {
if tidx == pkey {
ans << ''
extra << '(' + pval + ')'
}
}
if sep.any(it.contains(tval.ascii_str())) == false {
ans << tval.ascii_str()
extra << tval.ascii_str()
}
}
println('Ending indices: $place')
println('Answer: $ans')
println('Extra: $extra')
return place, ans, extra
}</syntaxhighlight>
{{out}}
<pre>
Ending indices: {2: '!=', 4: '==', 6: '=', 8: '!='}
Answer: ['a', '', '', 'b', '', '', 'c']
Extra: ['a', '(!=)', '(==)', 'b', '(=)', '(!=)', 'c']
({2: '!=', 4: '==', 6: '=', 8: '!='}, ['a', '', '', 'b', '', '', 'c'], ['a', '(!=)', '(==)', 'b', '(=)', '(!=)', 'c'])
</pre>
=={{header|Wren}}==
{{libheader|Wren-pattern}}
{{libheader|Wren-fmt}}
<syntaxhighlight lang="wren">import "./pattern" for Pattern
import "./fmt" for Fmt
var input = "a!===b=!=c"
var p = Pattern.new("[/=/=|!/=|/=]")
var separators = p.findAll(input)
System.print("The separators matched and their starting/ending indices are:")
for (sep in separators) {
System.print(" %(Fmt.s(-4, Fmt.q(sep.text))) between %(sep.span)")
}
var parts = p.splitAll(input)
System.print("\nThe substrings between the separators are:")
System.print(parts.map { |p| (p != "") ? Fmt.q(p) : "empty string" }.toList)</syntaxhighlight>
{{out}}
<pre>
The separators matched and their starting/ending indices are:
"!=" between [1, 2]
"==" between [3, 4]
"=" between [6, 6]
"!=" between [7, 8]
The substrings between the separators are:
["a", empty string, "b", empty string, "c"]
</pre>
=={{header|XPL0}}==
<syntaxhighlight lang "XPL0">include xpllib; \for StrLen, StrNCmp, and Print
proc MultiSplit(Str, Seps, N);
char Str; int Seps, N;
int S, Ch, SepLen;
[while Str(0) # 0 do
[for S:= 0 to N-1 do
[SepLen:= StrLen(Seps(S));
if StrNCmp(Str, Seps(S), SepLen) = 0 then
[Print(" (%s) ", Seps(S));
Str:= Str + SepLen;
S:= 100;
];
];
if S < 100 then
[Ch:= Str(0); Str:= Str+1;
if Ch # 0 then ChOut(0, Ch);
];
];
];
MultiSplit("a!===b=!=c", ["==", "!=", "="], 3)</syntaxhighlight>
{{out}}
<pre>
a (!=) (==) b (=) (!=) c</pre>
=={{header|Yabasic}}==
<
s$ = "==,!=,="
Line 1,905 ⟶ 2,474:
print left$(t$, l - 1), " with separator ", n$(j)
t$ = right$(t$, len(t$) - (l + len(n$(j))) + 1)
loop</
=={{header|zkl}}==
{{trans|Python}}
<
lastmatch := i := 0; matches := List();
while(i < text.len()){
Line 1,925 ⟶ 2,494:
if(i > lastmatch) matches.append(text[lastmatch,i-lastmatch]);
return(matches);
}</
<
multisplit("a!===b=!=c", T("!=", "==", "=")).println();</
{{out}}
<pre>
|