Commatizing numbers: Difference between revisions

→‎{{header|D}}: replaced extra features version with more solidly tested version
(→‎{{header|D}}: inserted missing braces)
(→‎{{header|D}}: replaced extra features version with more solidly tested version)
Line 236:
 
=={{header|D}}==
Better to have more tests than more features.
The function commatize is a solution. The function commatizeSpec adds extra features, to detect all formats in the examples in one function call.
<lang d>import std.stdio, std.regex, std.range;
 
// A decimal integer field begins with zero then no digit, or a non-zero digit.
auto decIntField = ctRegex!("0(?![0-9])|[1-9][0-9]*");
// A decimal fractional field is joined by only a point, and is only digits.
auto decFracField = ctRegex!("(?<=^\\.)[0-9]+");
 
auto commatize(in char[] txt, in uint start=0, in uint step=3,
in string ins=",") @safe
in {
assert(step > 0);
Line 251 ⟶ 246:
if (start > txt.length || step > txt.length)
return txt;
auto matchInt = matchFirst(txt[start .. $], decIntField);
if (!matchInt)
return txt;
return txt[0 .. start] ~ matchInt.pre ~
matchInt.hit
.replace!(m => m.hit.retro.chunks(step).join(ins).retro)(decIntField) ~
matchInt.post
.replace!(m => m.hit.chunks(step).join(ins))(decFracField);
 
// First number may begin with digit or decimal point. Exponents ignored.
// The following function can be called with the same arguments as commatize,
enum decFloField = ctRegex!("[0-9]*\\.[0-9]+|[0-9]+");
// or with more optional arguments as well, for an overcomplicated file task,
// such as writing only one call to meet this task's minimal requirements.
 
auto matchIntmatchDec = matchFirst(txt[start .. $], decIntFielddecFloField);
auto commatizeSpec(bool extraSpecial=false, // extra special treatments:
if (!matchIntmatchDec)
// 36 digits allows decillions (US size). Integer parts long enough
// for undecillions will use the alternate separator at least.
// The task's Wikipedia reference uses spaces in Eddington number.
uint sepAltDigitLen=36, string sepAlt=" ",
// 33 digits allows decillionths. Decimal fractions long enough
// for undecillionths will use the alternate separator and step.
uint stepAltDigitLen=33, uint stepAlt=5,)
(in char[] txt, in uint start=0, in uint step=3,
in string sep=",", string[string] sepByPrefix=null) {
if (start > txt.length)
return txt;
uint stepAdj = step;
string sepAdj = sep;
if (sepByPrefix !is null) {
auto preAnyDigit = matchFirst(txt[start .. $], ctRegex!"[0-9]").pre;
// A longer prefix match will override a shorter match length.
ulong matchLength = 0;
foreach (pair; sepByPrefix.byPair) {
auto prefix = pair[0];
if (preAnyDigit.length >= prefix.length &&
prefix.length > matchLength &&
prefix == preAnyDigit[$ - prefix.length .. $]) {
sepAdj = pair[1];
matchLength = prefix.length;
}
}
}
if (extraSpecial) {
auto wholeDig = matchFirst(txt[start .. $], decIntField);
auto fracDig = matchFirst(wholeDig.post, decFracField);
if (wholeDig && fracDig && fracDig.hit.length > stepAltDigitLen) {
sepAdj = sepAlt;
stepAdj = stepAlt;
} else if (wholeDig && wholeDig.hit.length > sepAltDigitLen) {
sepAdj = sepAlt;
}
}
return sep == "" ? txt : commatize(txt, start, stepAdj, sepAdj);
}
 
// Within a decimal float field:
void main() {
// A decimal integer field to commatize is positive and not after a point.
foreach (const line; "commatizing_numbers_data.txt".File.byLine)
auto enum decIntField = ctRegex!("0(?![0-9]<=\\.)|[1-9][0-9]*");
line.commatizeSpec!true(0, 3, ",", ["Z$":"."]).writeln;
// A decimal fractional field is joinedpreceded by only a point, and is only digits.
auto enum decFracField = ctRegex!("(?<=^\\.)[0-9]+");
 
return txt[0 .. start] ~ matchIntmatchDec.pre ~ matchDec.hit
.replace!(m => m.hit.retro.chunks(step).join(ins).retro)(decIntField) ~
.replace!(m => m.hit.chunks(step).join(ins))(decFracField);
matchInt~ matchDec.post;
}
 
unittest {
// An attempted solution may have one or more of the following errors:
// ignoring a number that has only zero before its decimal point
assert("0.0123456".commatize == "0.012,345,6");
// commatizing numbers other than the first
assert("1000 2.3000".commatize == "1,000 2.3000");
// only commatizing in one direction from the decimal point
assert("0001123.456789".commatize == "0001,123.456,789");
// detecting prefixes such as "Z$" requires detecting other prefixes
// tests of the special prefix switch:
assert("Z NZ$01000300000".commatizeSpec(0, 3, ",", ["Z$":" "])commatize == "Z NZ$01 300,000");
// detecting a decimal field that isn't attached to the first number
assert("1. NZ$300000".commatizeSpec(1, 3, " ", ["Z$":".", "NZ$":","]) ==
assert(" 2600 and .0125".commatize == "1. NZ$3002,000600 and .0125");
// tests of ignoring the extrastart value, or specialconfusing switchbase on0 exceeding(used somehere) numberwith ofbase digits:1
assert("1000001 77000".commatizeSpec!(true, 6, " ", 10)commatize(1) == "1001 77,000");
// ignoring a number that begins with a point, or treating it as integer
assert("1000000".commatizeSpec!(true, 6, " ", 10)() == "1 000 000");
assert("0 .0000010104004".commatizeSpec!(true, 10, " ", 6)()commatize == "0 .000010,001400,4");
assert("0.0000001".commatizeSpec!(true, 10, " ", 6)() == "0.00000 01");
 
assert("x".commatizeSpec!true(2) == "x");
void main() {
"pi=3.14159265358979323846264338327950288419716939937510582097494459231"
.commatize(0, 5, " ").writeln;
"The author has two Z$100000000000000 Zimbabwe notes (100 trillion)."
line.commatizeSpec!truecommatize(0, 3, ",", ["Z$":"."]).writeln;
foreach (const line; "commatizing_numbers_datacommatizing_numbers_using_defaults.txt".File.byLine)
line.commatize.writeln;
}</lang>
{{out}}
Line 335 ⟶ 301:
Ain't no numbers in this here words, nohow, no way, Jose.
James was never known as 0000000007
Arthur Eddington wrote: I believe there are 15 ,747 ,724 ,136 ,275 ,002 ,577 ,605 ,653 ,961 ,181 ,555 ,468 ,044 ,717 ,914 ,527 ,116 ,709 ,366 ,231 ,425 ,076 ,185 ,631 ,031 ,296 protons in the universe.
$-140,000±100 millions.
6/9/1946 was a good year for some.</pre>
Anonymous user