Text processing/1: Difference between revisions

m
no edit summary
No edit summary
mNo edit summary
Line 2,107:
There is one maximal run of lines with flag<=0.
The maximal run has length 93 and starts at line 5378 and has start date 2004-09-30.</lang>
 
=={{header|Julia}}==
<lang julia>
using DataFrames
 
function mungdata(filename)
lines = readlines(filename)
numlines = length(lines)
dates = Array{DateTime, 1}(numlines)
means = zeros(Float64, numlines)
numvalid = zeros(Int, numlines)
invalidlength = zeros(Int, numlines)
invalidpos = zeros(Int, numlines)
datamatrix = Array{Float64,2}(numlines, 24)
datamatrix .= NaN
totalsum = 0.0
totalgood = 0
for (linenum,line) in enumerate(lines)
data = split(line)
maxbad = longestpos = validcount = badlength = 0
validsum = 0.0
for i in 2:2:length(data)-1
if parse(Int, data[i+1]) >= 0
validsum += (datamatrix[linenum, Int(i/2)] = parse(Float64, data[i]))
validcount += 1
badlength = 0
else
badlength += 1
if badlength > invalidlength[linenum]
invalidlength[linenum] = badlength
invalidpos[linenum] = Int(i/2) - invalidlength[linenum] + 1
end
end
end
dates[linenum] = DateTime(data[1], "y-m-d")
means[linenum] = validsum / validcount
numvalid[linenum] = validcount
totalsum += validsum
totalgood += validcount
end
dt = DataFrame(Date = dates, Mean = means, ValidValues = numvalid,
MaximumGap = invalidlength, GapPosition = invalidpos)
for i in 1:size(datamatrix)[2]
dt[Symbol("$(i-1):00")] = datamatrix[:,i]
end
dt, totalsum/totalgood
end
 
datafilename = "data.txt" # this is taken from the example listed on the task, since the actual text file is not available
df, mean = mungdata(datafilename)
println(df)
println("The overall mean is $mean")
maxbadline = indmax(df[:MaximumGap])
maxbadval = df[:MaximumGap][maxbadline]
maxbadtime = df[:GapPosition][maxbadline] - 1
maxbaddate = replace("$(df[:Date][maxbadline])", r"T.+$", "")
println("The largest run of bad values is $(maxbadval), on $(maxbaddate) beginning at $(maxbadtime):00 hours.")
</lang>
{{output}}
<pre>
6×29 DataFrames.DataFrame
│ Row │ Date │ Mean │ ValidValues │ MaximumGap │ GapPosition │ 0:00 │ 1:00 │ 2:00 │ 3:00 │ 4:00 │
├─────┼─────────────────────┼─────────┼─────────────┼────────────┼─────────────┼──────┼──────┼──────┼──────┼──────┤
│ 1 │ 1991-03-30T00:00:00 │ 10.0 │ 24 │ 0 │ 0 │ 10.0 │ 10.0 │ 10.0 │ 10.0 │ 10.0 │
│ 2 │ 1991-03-31T00:00:00 │ 23.5417 │ 24 │ 0 │ 0 │ 10.0 │ 10.0 │ 10.0 │ 10.0 │ 10.0 │
│ 3 │ 1991-03-31T00:00:00 │ 40.0 │ 1 │ 23 │ 2 │ 40.0 │ NaN │ NaN │ NaN │ NaN │
│ 4 │ 1991-04-01T00:00:00 │ 23.2174 │ 23 │ 1 │ 1 │ NaN │ 13.0 │ 16.0 │ 21.0 │ 24.0 │
│ 5 │ 1991-04-02T00:00:00 │ 19.7917 │ 24 │ 0 │ 0 │ 8.0 │ 9.0 │ 11.0 │ 12.0 │ 12.0 │
│ 6 │ 1991-04-03T00:00:00 │ 13.9583 │ 24 │ 0 │ 0 │ 10.0 │ 9.0 │ 10.0 │ 10.0 │ 9.0 │
 
│ Row │ 5:00 │ 6:00 │ 7:00 │ 8:00 │ 9:00 │ 10:00 │ 11:00 │ 12:00 │ 13:00 │ 14:00 │ 15:00 │ 16:00 │ 17:00 │ 18:00 │
├─────┼──────┼──────┼──────┼──────┼──────┼───────┼───────┼───────┼───────┼───────┼───────┼───────┼───────┼───────┤
│ 1 │ 10.0 │ 10.0 │ 10.0 │ 10.0 │ 10.0 │ 10.0 │ 10.0 │ 10.0 │ 10.0 │ 10.0 │ 10.0 │ 10.0 │ 10.0 │ 10.0 │
│ 2 │ 10.0 │ 10.0 │ 20.0 │ 20.0 │ 20.0 │ 35.0 │ 50.0 │ 60.0 │ 40.0 │ 30.0 │ 30.0 │ 30.0 │ 25.0 │ 20.0 │
│ 3 │ NaN │ NaN │ NaN │ NaN │ NaN │ NaN │ NaN │ NaN │ NaN │ NaN │ NaN │ NaN │ NaN │ NaN │
│ 4 │ 22.0 │ 20.0 │ 18.0 │ 29.0 │ 44.0 │ 50.0 │ 43.0 │ 38.0 │ 27.0 │ 27.0 │ 24.0 │ 23.0 │ 18.0 │ 12.0 │
│ 5 │ 12.0 │ 27.0 │ 26.0 │ 27.0 │ 33.0 │ 32.0 │ 31.0 │ 29.0 │ 31.0 │ 25.0 │ 25.0 │ 24.0 │ 21.0 │ 17.0 │
│ 6 │ 10.0 │ 15.0 │ 24.0 │ 28.0 │ 24.0 │ 18.0 │ 14.0 │ 12.0 │ 13.0 │ 14.0 │ 15.0 │ 14.0 │ 15.0 │ 13.0 │
 
│ Row │ 19:00 │ 20:00 │ 21:00 │ 22:00 │ 23:00 │
├─────┼───────┼───────┼───────┼───────┼───────┤
│ 1 │ 10.0 │ 10.0 │ 10.0 │ 10.0 │ 10.0 │
│ 2 │ 20.0 │ 20.0 │ 20.0 │ 20.0 │ 35.0 │
│ 3 │ NaN │ NaN │ NaN │ NaN │ NaN │
│ 4 │ 13.0 │ 14.0 │ 15.0 │ 13.0 │ 10.0 │
│ 5 │ 14.0 │ 15.0 │ 12.0 │ 12.0 │ 10.0 │
│ 6 │ 13.0 │ 13.0 │ 12.0 │ 10.0 │ 10.0 │
The overall mean is 18.241666666666667
The largest run of bad values is 23, on 1991-03-31 beginning at 1:00 hours.
</pre>
 
=={{header|Lua}}==
4,105

edits