Commatizing numbers: Difference between revisions

Line 236:

=={{header|D}}==

The function commatize is a solution. The function commatizeSpec adds extra features, to detect all formats in the examples in one function call.

<lang d>import std.stdio, std.regex, std.range, std.array, std.algorithm, std.typecons;

<lang d>import std.stdio, std.regex, std.range, std.typecons;

auto commatize(bool smart=false)(in char[] txt, in uint start=0, uint step=3,

// A decimal integer field begins with zero then no digit, or a non-zero digit.

string ins=",", string[string] specials=null)

auto decIntField = ctRegex!("0(?![0-9])|[1-9][0-9]*");

// A decimal fractional field is joined by only a point, and is only digits.

auto decFracField = ctRegex!("(?<=^\\.)[0-9]+");

auto commatize(in char[] txt, in uint start=0, in uint step=3,

in string ins=",")

in {

assert(step > 0);

Line 245:

Line 251:

if (start > txt.length || step > txt.length)

return txt;

auto matchInt = matchFirst(txt[start .. $], decIntField);

if (!matchInt)

auto preSpan = txt[0 .. start];

auto scanSpan = txt[start .. $];

// The first number field begins with zero then no digit, or non-zero.

auto numField = ctRegex!("0(?![0-9])|[1-9][0-9]*");

auto matchNum = matchFirst(scanSpan, numField);

if (!matchNum)

return txt;

return txt[0 .. start] ~ matchInt.pre ~

matchInt.hit

.replace!(m => m.hit.retro.chunks(step).join(ins).retro)(decIntField) ~

matchInt.post

.replace!(m => m.hit.chunks(step).join(ins))(decFracField);

}

// The following function can be called with the same arguments as commatize,

// Pass only a point and capture a decimal fractional field, if any.

// or with more optional arguments as well, for an overcomplicated file task,

auto decField = ctRegex!("(?<=^\\.)[0-9]+");

// such as writing only one call to meet this task's minimal requirements.

auto commatizeSpec(bool extraSpecial=false, // extra special treatments:

static if (smart) {

// A ~~fractional~~ ~~part~~ ~~over~~ 33 digits ~~needs~~ a ~~word~~ ~~for~~ ~~less~~ ~~than~~ a

// 36 digits allows decillions (US size). Integer parts long enough

// ~~decillionth~~ ~~(to~~ ~~read~~ ~~aloud~~ in US ~~number~~ ~~names)~~.

// for undecillions will use the alternate separator at least.

// The task's Wikipedia reference uses spaces in Eddington number.

// Group by 5 with spaces to read like scientific notation instead.

uint sepAltDigitLen=36, string sepAlt=" ",

auto matchDec = matchFirst(matchNum.post, decField);

// 33 digits allows decillionths. Decimal fractions long enough

if (matchDec && matchDec.hit.length > 33) {

// for undecillionths will use the alternate separator and step.

step = 5;

~~ins~~ = " ";

uint stepAltDigitLen=33, uint stepAlt=5,)

(in char[] txt, in uint start=0, in uint step=3,

}

in string sep=",", string[string] sepByPrefix=null) {

// A whole part over 36 digits needs a word for more than a decillion

uint stepAdj = step;

// (to read aloud in US number names).

string sepAdj = sep;

// Group anyway, with spaces as in the task's Wikipedia reference.

~~else~~ if (~~matchNum.hit.length~~ > 36) {

if (sepByPrefix !is null) {

~~ins~~ = " ";

auto preAnyDigit = matchFirst(txt[start .. $], ctRegex!"[0-9]").pre;

// A longer prefix match will override a shorter match length.

ulong matchLength = 0;

Tuple!(string, string)[] pairs; // These lines quote a std

foreach (pair; sepByPrefix.byPair) { // library example, with

pairs ~= pair; // the substitution of

} // `sepByPrefix`.

foreach (pair; pairs) {

auto prefix = pair[0];

if (preAnyDigit.length >= prefix.length &&

prefix.length > matchLength &&

prefix == preAnyDigit[$ - prefix.length .. $])

sepAdj = pair[1];

matchLength = pair[0].length;

}

if (extraSpecial) {

auto wholeDig = matchFirst(txt[start .. $], decIntField);

if (specials != null) {

auto fracDig = matchFirst(wholeDig.post, decFracField);

// There may be special prefixed formats that use different separators.

if (wholeDig && fracDig && fracDig.hit.length > stepAltDigitLen) {

// Any format with a longer prefix should override a shorter one.

~~Tuple!(string,~~ ~~string)[]~~ ~~pairs;~~

sepAdj = sepAlt;

stepAdj = stepAlt;

foreach (pair; specials.byPair) {

} else if (wholeDig && wholeDig.hit.length > sepAltDigitLen) {

pairs ~= pair;

}

sepAdj = sepAlt;

std.algorithm.sort!("a[0].length < b[0].length")(pairs);

auto preAnyDigit = matchNum.pre.stripRight('0');

foreach (symbol; pairs) {

if (preAnyDigit.length >= symbol[0].length &&

symbol[0] == preAnyDigit[$ - symbol[0].length .. $])

ins = symbol[1];

}

return sep == "" ? txt : commatize(txt, start, stepAdj, sepAdj);

return preSpan ~ matchNum.pre ~ matchNum.hit

.replace!(m => m.hit.retro.chunks(step).join(ins).retro)(numField)

~ matchNum.post

.replace!(m => m.hit.chunks(step).join(ins))(decField);

}

void main() {

// Current New Zealand dollar format overrides old Zimbabwe dollar format.

foreach (const line; "commatizing_numbers_data.txt".File.byLine)

line.~~commatize~~!true(0, 3, ",", ["Z$":".~~", "NZ$":",~~"]).writeln;

line.commatizeSpec!true(0, 3, ",", ["Z$":"."]).writeln;

}

unittest {

assert("0.0123456".commatize == "0.012,345,6");

assert("1. NZ$300000".commatize(1, 3, " ", ["Z$":".", "NZ$":","]) ==

"1. NZ$300,000");

assert("1000 2.3000".commatize == "1,000 2.3000");

assert("0001123.456789".commatize == "0001,123.456,789");

// tests of the special prefix switch:

assert("Z$01000".commatize(0, 3, ",", ["Z$":" "]) == "Z$01 000");

assert("Z$01000".commatizeSpec(0, 3, ",", ["Z$":" "]) == "Z$01 000");

assert("1. NZ$300000".commatizeSpec(1, 3, " ", ["Z$":".", "NZ$":","]) ==

"1. NZ$300,000");

// tests of the extra special switch on exceeding some number of digits:

assert("100000".commatizeSpec!(true, 6, " ", 10)() == "100,000");

assert("1000000".commatizeSpec!(true, 6, " ", 10)() == "1 000 000");

assert("0.000001".commatizeSpec!(true, 10, " ", 6)() == "0.000,001");

assert("0.0000001".commatizeSpec!(true, 10, " ", 6)() == "0.00000 01");

}</lang>