Text processing/2: Difference between revisions
m
→{{header|Wren}}: Minor tidy
No edit summary |
m (→{{header|Wren}}: Minor tidy) |
||
(10 intermediate revisions by 9 users not shown) | |||
Line 24:
# Report the number of records that have good readings for all instruments.
<br><br>
=={{header|11l}}==
{{trans|Python}}
<syntaxhighlight lang="11l">V debug = 0B
V datePat = re:‘\d{4}-\d{2}-\d{2}’
V valuPat = re:‘[-+]?\d+\.\d+’
V statPat = re:‘-?\d+’
V totalLines = 0
Set[String] dupdate
Set[String] badform
Set[String] badlen
V badreading = 0
Set[String] datestamps
L(line) File(‘readings.txt’).read().rtrim("\n").split("\n")
totalLines++
V fields = line.split("\t")
V date = fields[0]
V pairs = (1 .< fields.len).step(2).map(i -> (@fields[i], @fields[i + 1]))
V lineFormatOk = datePat.match(date)
& all(pairs.map(p -> :valuPat.match(p[0])))
& all(pairs.map(p -> :statPat.match(p[1])))
I !lineFormatOk
I debug
print(‘Bad formatting ’line)
badform.add(date)
I pairs.len != 24 | any(pairs.map(p -> Int(p[1]) < 1))
I debug
print(‘Missing values ’line)
I pairs.len != 24
badlen.add(date)
I any(pairs.map(p -> Int(p[1]) < 1))
badreading++
I date C datestamps
I debug
print(‘Duplicate datestamp ’line)
dupdate.add(date)
datestamps.add(date)
print("Duplicate dates:\n "sorted(Array(dupdate)).join("\n "))
print("Bad format:\n "sorted(Array(badform)).join("\n "))
print("Bad number of fields:\n "sorted(Array(badlen)).join("\n "))
print("Records with good readings: #. = #2.2%\n".format(
totalLines - badreading, (totalLines - badreading) / Float(totalLines) * 100))
print(‘Total records: ’totalLines)</syntaxhighlight>
{{out}}
<pre>
Duplicate dates:
1990-03-25
1991-03-31
1992-03-29
1993-03-28
1995-03-26
Bad format:
Bad number of fields:
Records with good readings: 5017 = 91.70%
Total records: 5471
</pre>
=={{header|Ada}}==
{{libheader|Simple components for Ada}}
<
with Ada.Text_IO; use Ada.Text_IO;
with Strings_Edit; use Strings_Edit;
Line 89 ⟶ 156:
Close (File);
Put_Line ("Valid records " & Image (Count) & " of " & Image (Line_No) & " total");
end Data_Munging_2;</
Sample output
<pre>
Line 101 ⟶ 168:
=={{header|Aime}}==
<
{
integer i;
Line 153 ⟶ 220:
0;
}</
{{out}} (the "reading.txt" needs to be converted to UNIX end-of-line)
<pre>duplicate 19900325 line
Line 161 ⟶ 228:
duplicate 19950326 line
5017 good lines</pre>
=={{header|Amazing Hopper}}==
{{Trans|AWK}}
<syntaxhighlight lang="c">
#include <basico.h>
algoritmo
número de campos correcto = `awk 'NF != 49' basica/readings.txt`
fechas repetidas = `awk '++count[$1] >= 2{print $1, "(",count[$1],")"}' basica/readings.txt`
resultados buenos = `awk '{rec++;ok=1; for(i=0;i<24;i++){if($(2*i+3)<1){ok=0}}; recordok += ok} END {print "Total records",rec,"OK records", recordok, "or", recordok/rec*100,"%"}' basica/readings.txt`
"Check field number by line: ", #( !(number(número de campos correcto)) ? "Ok\n" : "Nok\n";),\
"\nCheck duplicated dates:\n", fechas repetidas,NL, \
"Number of records have good readings for all instruments:\n",resultados buenos,\
"(including "
fijar separador( NL )
contar tokens en 'fechas repetidas'
" duplicated records)\n", luego imprime todo
terminar
</syntaxhighlight>
{{out}}
<pre>
Check field number by line: Ok
Check duplicated dates:
1990-03-25 ( 2 )
1991-03-31 ( 2 )
1992-03-29 ( 2 )
1993-03-28 ( 2 )
1995-03-26 ( 2 )
Number of records have good readings for all instruments:
Total records 5471 OK records 5017 or 91,7017 %
(including 5 duplicated records)
</pre>
=={{header|AutoHotkey}}==
<
data = %A_scriptdir%\readings.txt
Line 216 ⟶ 325:
msgbox, Duplicate Dates:`n%wrongDates%`nRead Lines: %lines%`nValid Lines: %valid%`nwrong lines: %totwrong%`nDuplicates: %TotWrongDates%`nWrong Formatted: %unvalidformat%`n
</syntaxhighlight>
Sample Output:
Line 243 ⟶ 352:
If their are any scientific notation fields then their will be an e in the file:
<
bash$</
Quick check on the number of fields:
<
bash$</
Full check on the file format using a regular expression:
<
bash$</
Full check on the file format as above but using regular expressions allowing intervals (gnu awk):
<
bash$</
Line 259 ⟶ 368:
Accomplished by counting how many times the first field occurs and noting any second occurrences.
<
1990-03-25
1991-03-31
Line 265 ⟶ 374:
1993-03-28
1995-03-26
bash$</
Line 271 ⟶ 380:
<div style="width:100%;overflow:scroll">
<
Total records 5471 OK records 5017 or 91.7017 %
bash$</
</div>
=={{header|C}}==
<
#include <string.h>
#include <stdlib.h>
Line 360 ⟶ 469:
read_file("readings.txt");
return 0;
}</
{{out}}
Line 374 ⟶ 483:
=={{header|C sharp|C#}}==
<
using System.Collections.Generic;
using System.Text.RegularExpressions;
Line 445 ⟶ 554:
}
}
}</
<pre>
Line 458 ⟶ 567:
=={{header|C++}}==
{{libheader|Boost}}
<
#include <fstream>
#include <iostream>
Line 509 ⟶ 618:
cout << all_ok << " records were complete and ok!\n" ;
return 0 ;
}</
{{out}}
Line 525 ⟶ 634:
=={{header|Clojure}}==
<syntaxhighlight lang="clojure">
(defn parse-line [s]
(let [[date & data-toks] (str/split s #"\s+")
Line 563 ⟶ 672:
(clojure.string/join " " (sort (:dupl-dates m)))))
(println (format "%d lines with no missing data" (:n-full-recs m)))))
</syntaxhighlight>
{{out}}
Line 574 ⟶ 683:
=={{header|COBOL}}==
{{works with|OpenCOBOL}}
<
PROGRAM-ID. text-processing-2.
Line 736 ⟶ 845:
INSPECT input-data (offset:) TALLYING data-len
FOR CHARACTERS BEFORE delim
.</
{{out}}
Line 751 ⟶ 860:
=={{header|D}}==
<
import std.stdio, std.array, std.string, std.regex, std.conv,
std.algorithm;
Line 787 ⟶ 896:
repeatedDates.byKey.filter!(k => repeatedDates[k] > 1));
writeln("Good reading records: ", goodReadings);
}</
{{out}}
<pre>Duplicated timestamps: 1990-03-25, 1991-03-31, 1992-03-29, 1993-03-28, 1995-03-26
Line 793 ⟶ 902:
=={{header|Eiffel}}==
<syntaxhighlight lang="eiffel">
class
APPLICATION
Line 909 ⟶ 1,018:
end
</syntaxhighlight>
{{out}}
<pre>
Line 929 ⟶ 1,038:
=={{header|Erlang}}==
Uses function from [[Text_processing/1]]. It does some correctness checks for us.
<syntaxhighlight lang="erlang">
-module( text_processing2 ).
Line 961 ⟶ 1,070:
value_flag_records() -> 24.
</syntaxhighlight>
{{out}}
<pre>
Line 970 ⟶ 1,079:
=={{header|F Sharp|F#}}==
<
let file = @"readings.txt"
Line 990 ⟶ 1,099:
ok <- ok + 1
printf "%d records were ok\n" ok
</syntaxhighlight>
Prints:
<
Date 1990-03-25 is duplicated
Date 1991-03-31 is duplicated
Line 999 ⟶ 1,108:
Date 1995-03-26 is duplicated
5017 records were ok
</syntaxhighlight>
=={{header|Factor}}==
{{works with|Factor|0.99 2020-03-02}}
<
prettyprint sequences sequences.extras sets splitting ;
Line 1,013 ⟶ 1,122:
[ "Duplicates:" print [ "\t" split1 drop ] map duplicates . ]
[ [ " \t" split rest <odds> [ string>number 0 <= ] none? ] count ]
bi pprint " records were good." print</
{{out}}
<pre>
Line 1,033 ⟶ 1,142:
Rather than copy today's data to a PDATA holder so that on the next read the new data may be compared to the old, a two-row array is used, with IT flip-flopping 1,2,1,2,1,2,... Comparison of the data as numerical values rather than text strings means that different texts that evoke the same value will not be regarded as different. If the data format were invalid, there would be horrible messages. There aren't, so ... the values should be read and plotted...
<syntaxhighlight lang="fortran">
Crunches a set of hourly data. Starts with a date, then 24 pairs of value,indicator for that day, on one line.
INTEGER Y,M,D !Year, month, and day.
Line 1,096 ⟶ 1,205:
900 CLOSE(IN) !Done.
END !Spaghetti rules.
</syntaxhighlight>
Output:
Line 1,108 ⟶ 1,217:
=={{header|Go}}==
<
import (
Line 1,182 ⟶ 1,291:
fmt.Println(uniqueGood,
"unique dates with good readings for all instruments.")
}</
{{out}}
<pre>
Line 1,197 ⟶ 1,306:
=={{header|Haskell}}==
<
import Data.List (nub, (\\))
Line 1,216 ⟶ 1,325:
putStr (unlines ("duplicated dates:": duplicatedDates (map date inputs)))
putStrLn ("number of good records: " ++ show (length $ goodRecords inputs))
</syntaxhighlight>
this script outputs:
Line 1,232 ⟶ 1,341:
duplicated timestamps that are on well-formed records.
<
dups := set()
goodRecords := 0
Line 1,264 ⟶ 1,373:
}
end</
Sample run:
Line 1,277 ⟶ 1,386:
=={{header|J}}==
<
dat=: TAB readdsv jpath '~temp/readings.txt'
Dates=: getdate"1 >{."1 dat
Line 1,296 ⟶ 1,405:
1992 3 29
1993 3 28
1995 3 26</
=={{header|Java}}==
{{trans|C++}}
{{works with|Java|1.5+}}
<
import java.util.regex.*;
import java.io.*;
Line 1,344 ⟶ 1,453:
}
}
}</
The program produces the following output:
<pre>
Line 1,358 ⟶ 1,467:
=={{header|JavaScript}}==
{{works with|JScript}}
<
function analyze_func(filename) {
var dates_seen = {};
Line 1,407 ⟶ 1,516:
var analyze = analyze_func('readings.txt');
analyze();</
=={{header|jq}}==
Line 1,413 ⟶ 1,522:
For this problem, it is convenient to use jq in a pipeline: the first invocation of jq will convert the text file into a stream of JSON arrays (one array per line):
<
The second part of the pipeline performs the task requirements. The following program is used in the second invocation of jq.
'''Generic Utilities'''
<
def runs:
reduce .[] as $item
Line 1,433 ⟶ 1,542:
def is_integral: test("^[-+]?[0-9]+$");
def is_date: test("[12][0-9]{3}-[0-9][0-9]-[0-9][0-9]");</
'''Validation''':
<
def validate_line(nr):
def validate_date:
Line 1,454 ⟶ 1,563:
def validate_lines:
. as $in
| range(0; length) as $i | ($in[$i] | validate_line($i + 1));</
'''Check for duplicate timestamps'''
<
[.[][0]] | sort | runs | map( select(.[1]>1) );</
'''Number of valid readings for all instruments''':
<
# but does check the validity of the record, including the date format:
def number_of_valid_readings:
Line 1,471 ⟶ 1,580:
and all(range(0; 24) | $in[2*. + 2] | (is_integral and tonumber >= 1) );
map(select(check)) | length ;</
'''Generate Report'''
<
"\nChecking for duplicate timestamps:",
duplicate_timestamps,
"\nThere are \(number_of_valid_readings) valid rows altogether."</
{{out}}
'''Part 1: Simple demonstration'''
To illustrate that the program does report invalid lines, we first use the six lines at the top but mangle the last line.
<
field 1 in line 6 has an invalid date: 991-04-03
line 6 has 47 fields
Line 1,497 ⟶ 1,606:
]
There are 5 valid rows altogether.</
'''Part 2: readings.txt'''
<
Checking for duplicate timestamps:
[
Line 1,525 ⟶ 1,634:
]
There are 5017 valid rows altogether.</
=={{header|Julia}}==
Refer to the code at https://rosettacode.org/wiki/Text_processing/1#Julia. Add at the end of that code the following:
<syntaxhighlight lang="julia">
dupdate = df[nonunique(df[:,[:Date]]),:][:Date]
println("The following rows have duplicate DATESTAMP:")
Line 1,535 ⟶ 1,644:
println("All values good in these rows:")
println(df[df[:ValidValues] .== 24,:])
</syntaxhighlight>
{{output}}
<pre>
Line 1,579 ⟶ 1,688:
=={{header|Kotlin}}==
<
import java.io.File
Line 1,620 ⟶ 1,729:
percent = allGood.toDouble() / count * 100.0
println("Number which are all good : $allGood (${"%5.2f".format(percent)}%)")
}</
{{out}}
Line 1,639 ⟶ 1,748:
=={{header|Lua}}==
<
io.input( filename )
Line 1,682 ⟶ 1,791:
for i = 1, #bad_format do
print( " ", bad_format[i] )
end</
Output:
<pre>Lines read: 5471
Line 1,696 ⟶ 1,805:
</pre>
=={{header|M2000 Interpreter}}==
File is in user dir. Use Win Dir$ to open the explorer window and copy there the readings.txt
<syntaxhighlight lang="m2000 interpreter">Module TestThis {
Document a$, exp$
\\ automatic find the enconding and the line break
Line 1,712 ⟶ 1,822:
If exist(k,Left$(b$, 10)) then
m++ : where=eval(k)
exp$=format$("Duplicate for {0} at {1}",
Else
Append k, Left$(b$, 10):=i
Line 1,730 ⟶ 1,840:
}
TestThis
</syntaxhighlight>
{{out}}
<pre>
Records: 5471
Duplicate for
Duplicate for
Duplicate for
Duplicate for
Duplicate for
Duplicates 5
Valid Records 5017
</pre>
=={{header|Mathematica}}/{{header|Wolfram Language}}==
<syntaxhighlight lang="mathematica">data = Import["Readings.txt","TSV"]; Print["duplicated dates: "];
Select[Tally@data[[;;,1]], #[[2]]>1&][[;;,1]]//Column
Print["number of good records: ", Count[(Times@@#[[3;;All;;2]])& /@ data, 1],
" (out of a total of ", Length[data], ")"]</
{{out}}
<pre>duplicated dates:
1990-03-25
Line 1,756 ⟶ 1,866:
1993-03-28
1995-03-26
number of good records: 5017 (out of a total of 5471)</pre>
=={{header|MATLAB}} / {{header|Octave}}==
<
% READDAT reads readings.txt file
%
Line 1,789 ⟶ 1,898:
dix = find(diff(d)==0) % check for to consequtive timestamps with zero difference
printf('number of valid records: %i\n ', sum( all( val(:,5:2:end) >= 1, 2) ) );</
<pre>>> [val,count]=readdat;
Line 1,804 ⟶ 1,913:
=={{header|Nim}}==
<syntaxhighlight lang="nim">import strutils, tables
const NumFields = 49
Line 1,811 ⟶ 1,919:
const FlagGoodValue = 1
var badRecords: int
var totalRecords: int
var badInstruments: int #
var seenDates:
proc checkFloats(floats: seq[string]): bool =
## Ensure we can parse all records as floats (except the date stamp).
for index in 1..<NumFields
try:
#
discard parseFloat(floats[index])
except ValueError:
Line 1,826 ⟶ 1,934:
true
proc areAllFlagsOk(instruments: seq[string]): bool =
## Ensure that all sensor flags are ok.
# Flags start at index 2, and occur every 2 fields.
for index in countup(2, NumFields, 2):
# We're assuming all instrument flags are floats not integers
var flag = parseFloat(instruments[index])
if flag < FlagGoodValue: return false
Line 1,836 ⟶ 1,945:
# Note: we're not checking the format of the date stamp.
#
var currentLine = 0
for line in "readings.txt".lines:
currentLine.inc
if line.len == 0: continue #
var tokens = line.split({' ', '\t'})
totalRecords.inc
if tokens.len != NumFields:
badRecords.inc
continue
Line 1,865 ⟶ 1,971:
echo tokens[DateField], " duplicated on line ", currentLine
echo "Total Records: ", totalRecords
echo "
echo "Records where all
{{out}}
<pre>1990-03-25 duplicated on line 85
1991-03-31 duplicated on line 456
1992-03-29 duplicated on line 820
1993-03-28 duplicated on line 1184
1995-03-26 duplicated on line 1911
Total Records: 5471
Records with wrong format: 0
Records where all instruments were OK: 5017</pre>
=={{header|OCaml}}==
<
open Str
Line 1,941 ⟶ 2,056:
Printf.printf "number of good records: %d\n" (num_good_records inputs);
;;</
this script outputs:
Line 1,955 ⟶ 2,070:
=={{header|Perl}}==
<
use constant FIELDS => 49;
Line 1,982 ⟶ 2,097:
map {" $_\n"}
grep {$dates{$_} > 1}
sort keys %dates;</
Output:
Line 1,994 ⟶ 2,109:
=={{header|Phix}}==
<!--<syntaxhighlight lang="phix">(phixonline)-->
<span style="color: #000080;font-style:italic;">-- demo\rosetta\TextProcessing2.exw</span>
<span style="color: #008080;">with</span> <span style="color: #008080;">javascript_semantics</span> <span style="color: #000080;font-style:italic;">-- (include version/first of next three lines only)</span>
<span style="color: #008080;">include</span> <span style="color: #000000;">readings</span><span style="color: #0000FF;">.</span><span style="color: #000000;">e</span> <span style="color: #000080;font-style:italic;">-- global constant lines, or:
--assert(write_lines("readings.txt",lines)!=-1) -- first run, then:
--constant lines = read_lines("readings.txt")</span>
<span style="color: #008080;">include</span> <span style="color: #000000;">builtins</span><span style="color: #0000FF;">\</span><span style="color: #004080;">timedate</span><span style="color: #0000FF;">.</span><span style="color: #000000;">e</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">all_good</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">0</span>
<span style="color: #004080;">string</span> <span style="color: #000000;">fmt</span> <span style="color: #0000FF;">=</span> <span style="color: #008000;">"%d-%d-%d\t"</span><span style="color: #0000FF;">&</span><span style="color: #7060A8;">join</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">repeat</span><span style="color: #0000FF;">(</span><span style="color: #008000;">"%f"</span><span style="color: #0000FF;">,</span><span style="color: #000000;">48</span><span style="color: #0000FF;">),</span><span style="color: #008000;">'\t'</span><span style="color: #0000FF;">)</span>
<span style="color: #004080;">sequence</span> <span style="color: #000000;">extset</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">sq_mul</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">tagset</span><span style="color: #0000FF;">(</span><span style="color: #000000;">24</span><span style="color: #0000FF;">),</span><span style="color: #000000;">2</span><span style="color: #0000FF;">),</span> <span style="color: #000080;font-style:italic;">-- {2,4,6,..48}</span>
<span style="color: #000000;">curr</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">last</span>
<span style="color: #008080;">for</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">=</span><span style="color: #000000;">1</span> <span style="color: #008080;">to</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">lines</span><span style="color: #0000FF;">)</span> <span style="color: #008080;">do</span>
<span style="color: #004080;">string</span> <span style="color: #000000;">li</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">lines</span><span style="color: #0000FF;">[</span><span style="color: #000000;">i</span><span style="color: #0000FF;">]</span>
<span style="color: #004080;">sequence</span> <span style="color: #000000;">r</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">scanf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">li</span><span style="color: #0000FF;">,</span><span style="color: #000000;">fmt</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">if</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">r</span><span style="color: #0000FF;">)!=</span><span style="color: #000000;">1</span> <span style="color: #008080;">then</span>
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"bad line [%d]:%s\n"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">i</span><span style="color: #0000FF;">,</span><span style="color: #000000;">li</span><span style="color: #0000FF;">})</span>
<span style="color: #008080;">else</span>
<span style="color: #000000;">curr</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">r</span><span style="color: #0000FF;">[</span><span style="color: #000000;">1</span><span style="color: #0000FF;">][</span><span style="color: #000000;">1</span><span style="color: #0000FF;">..</span><span style="color: #000000;">3</span><span style="color: #0000FF;">]</span>
<span style="color: #008080;">if</span> <span style="color: #000000;">i</span><span style="color: #0000FF;">></span><span style="color: #000000;">1</span> <span style="color: #008080;">and</span> <span style="color: #000000;">curr</span><span style="color: #0000FF;">=</span><span style="color: #000000;">last</span> <span style="color: #008080;">then</span>
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"duplicate line for %04d/%02d/%02d\n"</span><span style="color: #0000FF;">,</span><span style="color: #000000;">last</span><span style="color: #0000FF;">)</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #000000;">last</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">curr</span>
<span style="color: #000000;">all_good</span> <span style="color: #0000FF;">+=</span> <span style="color: #7060A8;">sum</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">sq_le</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">extract</span><span style="color: #0000FF;">(</span><span style="color: #000000;">r</span><span style="color: #0000FF;">[</span><span style="color: #000000;">1</span><span style="color: #0000FF;">][</span><span style="color: #000000;">4</span><span style="color: #0000FF;">..$],</span><span style="color: #000000;">extset</span><span style="color: #0000FF;">),</span><span style="color: #000000;">0</span><span style="color: #0000FF;">))=</span><span style="color: #000000;">0</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">for</span>
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"Valid records %d of %d total\n"</span><span style="color: #0000FF;">,{</span><span style="color: #000000;">all_good</span><span style="color: #0000FF;">,</span> <span style="color: #7060A8;">length</span><span style="color: #0000FF;">(</span><span style="color: #000000;">lines</span><span style="color: #0000FF;">)})</span>
<span style="color: #0000FF;">?</span><span style="color: #008000;">"done"</span>
<span style="color: #0000FF;">{}</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">wait_key</span><span style="color: #0000FF;">()</span>
<!--</syntaxhighlight>-->
{{out}}
<pre>
Line 2,038 ⟶ 2,155:
=={{header|PHP}}==
<
$missformcount = 0;
$totalcount = 0;
Line 2,072 ⟶ 2,189:
foreach ($duplicates as $key => $val){
echo $val . ' at Line : ' . $key . '<br>';
}</
<pre>Valid records 5017 of 5471 total
Duplicates :
Line 2,080 ⟶ 2,197:
1993-03-28 at Line : 1184
1995-03-26 at Line : 1911</pre>
=={{header|Picat}}==
<syntaxhighlight lang="picat">import util.
go =>
Readings = [split(Record) : Record in read_file_lines("readings.txt")],
DateStamps = new_map(),
GoodReadings = 0,
foreach({Rec,Id} in zip(Readings,1..Readings.length))
if Rec.length != 49 then printf("Entry %d has bad_length %d\n", Id, Rec.length) end,
Date = Rec[1],
if DateStamps.has_key(Date) then
printf("Entry %d (date %w) is a duplicate of entry %w\n", Id, Date, DateStamps.get(Date))
else
if sum([1: I in 3..2..49, check_field(Rec[I])]) == 0 then
GoodReadings := GoodReadings + 1
end
end,
DateStamps.put(Date, Id)
end,
nl,
printf("Total readings: %d\n",Readings.len),
printf("Good readings: %d\n",GoodReadings),
nl.
check_field(Field) =>
Field == "-2" ; Field == "-1" ; Field == "0".</syntaxhighlight>
{{out}}
<pre>Entry 85 (date 1990-03-25) is a duplicate of entry 84
Entry 456 (date 1991-03-31) is a duplicate of entry 455
Entry 820 (date 1992-03-29) is a duplicate of entry 819
Entry 1184 (date 1993-03-28) is a duplicate of entry 1183
Entry 1911 (date 1995-03-26) is a duplicate of entry 1910
Total readings: 5471
Good readings: 5013</pre>
=={{header|PicoLisp}}==
Put the following into an executable file "checkReadings":
<
(load "@lib/misc.l")
Line 2,104 ⟶ 2,259:
(bye 1) ) ) ) )
(bye)</
Then it can be called as
<pre>$ ./checkReadings readings.txt</pre>
=={{header|PL/I}}==
<
/* To process readings produced by automatic reading stations. */
Line 2,162 ⟶ 2,317:
put skip list ('There were ' || k-faulty || ' good readings' );
end check;
</syntaxhighlight>
=={{header|PowerShell}}==
<
$goodLineCount = 0
get-content c:\temp\readings.txt |
Line 2,188 ⟶ 2,343:
}
[string]$goodLineCount + " good lines"
</syntaxhighlight>
Output:
Line 2,199 ⟶ 2,354:
An alternative using regular expression syntax:
<
$dateHash = @{}
$goodLineCount = 0
Line 2,220 ⟶ 2,375:
}
[string]$goodLineCount + " good lines"
</syntaxhighlight>
Output:
Line 2,234 ⟶ 2,389:
=={{header|PureBasic}}==
Using regular expressions.
<
#instrumentCount = 24
Line 2,305 ⟶ 2,460:
CloseConsole()
EndIf
EndIf</
Sample output:
<pre>Duplicate date: 1990-03-25 occurs on lines 85 and 84.
Line 2,316 ⟶ 2,471:
=={{header|Python}}==
<
import zipfile
import StringIO
Line 2,356 ⟶ 2,511:
#readings = StringIO.StringIO(zfs.read('readings.txt'))
readings = open('readings.txt','r')
munge2(readings)</
The results indicate 5013 good records, which differs from the Awk implementation. The final few lines of the output are as follows
<pre style="height:10ex;overflow:scroll">
Line 2,375 ⟶ 2,530:
* Generate mostly summary information that is easier to compare to other solutions.
<
import zipfile
import StringIO
Line 2,419 ⟶ 2,574:
readings = open('readings.txt','r')
munge2(readings)</
<pre>bash$ /cygdrive/c/Python26/python munge2.py
Duplicate dates:
Line 2,437 ⟶ 2,592:
=={{header|R}}==
<
dfr <- read.delim("d:/readings.txt", colClasses=c("character", rep(c("numeric", "integer"), 24)))
dates <- strptime(dfr[,1], "%Y-%m-%d")
Line 2,449 ⟶ 2,604:
# Number of rows with no bad values
flags <- as.matrix(dfr[,seq(3,49,2)])>0
sum(apply(flags, 1, all))</
=={{header|Racket}}==
<
(read-decimal-as-inexact #f)
;; files to read is a sequence, so it could be either a list or vector of files
Line 2,501 ⟶ 2,656:
(printf "~a records have good readings for all instruments~%"
(text-processing/2 (current-command-line-arguments)))</
Example session:
<pre>$ racket 2.rkt readings/readings.txt
Line 2,528 ⟶ 2,683:
Note that we sort the pairs after we've grepped them, not before; this works fine in Raku, sorting on the key and value as primary and secondary keys. Finally, pairs and arrays provide a default print format that is sufficient without additional formatting in this case.
<syntaxhighlight lang="raku"
my $line;
my %dates;
Line 2,546 ⟶ 2,701:
say 'Repeated timestamps (with line numbers):';
.say for sort %dates.pairs.grep: *.value.elems > 1;</
Output:
<pre>5017 good records out of 5471 total
Line 2,575 ⟶ 2,730:
<br><br>
The program has (negated) code to write the report to a file in addition to the console.
<
numeric digits 20 /*allow for bigger numbers. */
ifid='READINGS.TXT' /*name of the input file. */
Line 2,714 ⟶ 2,869:
return y//100\==0 | y//400==0 /*apply the 100 and the 400 year rule.*/
/*────────────────────────────────────────────────────────────────────────────*/
sy: say arg(1); call lineout ofid,arg(1); return</
'''output''' when using the default input file:
<pre style="height:35ex">
Line 2,744 ⟶ 2,899:
=={{header|Ruby}}==
<
def munge2(readings, debug=false)
Line 2,796 ⟶ 2,951:
open('readings.txt','r') do |readings|
munge2(readings)
end</
=={{header|Scala}}==
{{works with|Scala|2.8}}
<
import scala.io.Source
import scala.collection.immutable.{TreeMap => Map}
Line 2,838 ⟶ 2,993:
dateMap.valuesIterable.sum))
}
}</
Sample output:
Line 2,858 ⟶ 3,013:
=={{header|Sidef}}==
{{trans|Raku}}
<
var dates = Hash();
Line 2,871 ⟶ 3,026:
say "#{good_records} good records out of #{$.} total";
say 'Repeated timestamps:';
say dates.to_a.grep{ .value > 1 }.map { .key }.sort.join("\n");</
{{out}}
<pre>
Line 2,888 ⟶ 3,043:
Developed using the Snobol4 dialect Spitbol for Linux, version 4.0
<
v = array(24)
Line 2,939 ⟶ 3,094:
end
</syntaxhighlight>
{{out}}
<pre>1990-03-25: datestamp at row 85 duplicates datestamp at 84
Line 2,955 ⟶ 3,110:
=={{header|Tcl}}==
<
set total [llength $data]
set correct $total
Line 2,979 ⟶ 3,134:
puts "$correct records with good readings = [expr $correct * 100.0 / $total]%"
puts "Total records: $total"</
<pre>$ tclsh munge2.tcl
Duplicate datestamp: 1990-03-25
Line 2,994 ⟶ 3,149:
To demonstate a different method to iterate over the file, and different ways to verify data types:
<
array set seen {}
set fh [open readings.txt]
Line 3,032 ⟶ 3,187:
puts "total: $total"
puts [format "good: %d = %5.2f%%" $good [expr {100.0 * $good / $total}]]</
Results:
<pre>duplicate date on line 85: 1990-03-25
Line 3,045 ⟶ 3,200:
compiled and run in a single step, with the input file accessed as a list of strings
pre-declared in readings_dot_txt
<
#import nat
Line 3,058 ⟶ 3,213:
#show+
main = valid_format?(^C/good_readings duplicate_dates,-[invalid format]-!) readings</
output:
<pre>5017 good readings
Line 3,069 ⟶ 3,224:
=={{header|VBScript}}==
<
Set objFile = objFSO.OpenTextFile(objFSO.GetParentFolderName(WScript.ScriptFullName) &_
"\readings.txt",1)
Line 3,122 ⟶ 3,277:
objFile.Close
Set objFSO = Nothing</
{{Out}}
Line 3,143 ⟶ 3,298:
* Reads flag value and checks if it is positive
* Requires 24 value/flag pairs on each line
<
File_Open("|(PATH_ONLY)\output.txt")
#51 = Buf_Num // Edit buffer for output file
Line 3,190 ⟶ 3,345:
IT("Date format errors: ") Num_Ins(#14)
IT("Invalid data records:") Num_Ins(#15)
IT("Total records: ") Num_Ins(#12)</
Sample output:
<
1991-03-31: duplicate record at 456
1992-03-29: duplicate record at 820
Line 3,202 ⟶ 3,357:
Date format errors: 0
Invalid data records: 454
Total records: 5471</
=={{header|Wren}}==
{{trans|Kotlin}}
{{libheader|Wren-pattern}}
{{libheader|Wren-fmt}}
{{libheader|Wren-sort}}
<syntaxhighlight lang="wren">import "io" for File
import "./pattern" for Pattern
import "./fmt" for Fmt
import "./sort" for Sort
var p = Pattern.new("+1/s")
var fileName = "readings.txt"
var lines = File.read(fileName).trimEnd().split("\r\n")
var count = 0
var invalid = 0
var allGood = 0
var map = {}
for (line in lines) {
count = count + 1
var fields = p.splitAll(line)
var date = fields[0]
if (fields.count == 49) {
map[date] = map.containsKey(date) ? map[date] + 1 : 1
var good = 0
var i = 2
while (i < fields.count) {
if (Num.fromString(fields[i]) >= 1) good = good + 1
i = i + 2
}
if (good == 24) allGood = allGood + 1
} else {
invalid = invalid + 1
}
}
Fmt.print("File = $s", fileName)
System.print("\nDuplicated dates:")
var keys = map.keys.toList
Sort.quick(keys)
for (k in keys) {
var v = map[k]
if (v > 1) Fmt.print(" $s ($d times)", k, v)
}
Fmt.print("\nTotal number of records : $d", count)
var percent = invalid/count * 100
Fmt.print("Number of invalid records : $d ($5.2f)\%", invalid, percent)
percent = allGood/count * 100
Fmt.print("Number which are all good : $d ($5.2f)\%", allGood, percent)</syntaxhighlight>
{{out}}
<pre>
File = readings.txt
Duplicated dates:
1990-03-25 (2 times)
1991-03-31 (2 times)
1992-03-29 (2 times)
1993-03-28 (2 times)
1995-03-26 (2 times)
Total number of records : 5471
Number of invalid records : 0 ( 0.00)%
Number which are all good : 5017 (91.70)%
</pre>
=={{header|zkl}}==
<
// I can't use it to select all fields, only verify them
re:=RegExp(0'|^(\d+-\d+-\d+)| + 0'|\s+\d+\.\d+\s+-*\d+| * 24 + ".+$");
Line 3,223 ⟶ 3,443:
good+=1;
}
println("%d records read, %d duplicate dates, %d valid".fmt(N,dd,good));</
{{out}}
<pre>
|