Text processing/1: Difference between revisions

Line 1,192:

=={{header|Scala}}==

=={{works with|Scala|2.8}}==

A fully functional solution:

<lang scala>object DataMunging {

import scala.io.Source

⚫

val pattern = """^(\d+-\d+-\d+)""" + """\s+(\d+\.\d+)\s+(-?\d+)""" * 24 + "$" r;

def spans[A](list: List[A]) = list.tail.foldLeft(List((list.head, 1))) {

case ((a, n) :: tail, b) if a == b => (a, n + 1) :: tail

Line 1,204:

Line 1,205:

type LineIterator = Iterator[Option[(Double, Int, Flags)]]

⚫

val pattern = """^(\d+-\d+-\d+)""" + """\s+(\d+\.\d+)\s+(-?\d+)""" * 24 + "$" r;

def linesIterator(file: java.io.File) = Source.fromFile(file).getLines().map(

pattern findFirstMatchIn _ map (

Line 1,234:

Line 1,237:

def main(args: Array[String]) {

val ~~filenames~~ = args map (new java.io.File(_)) filter (file => file.isFile && file.canRead)

val files = args map (new java.io.File(_)) filter (file => file.isFile && file.canRead)

val lines = ~~filenames~~.iterator flatMap linesIterator

val lines = files.iterator flatMap linesIterator

val (totalSum, totalSize, flags) = totalizeLines(lines)

val ((_, ~~run~~), startDate) = flags.filter(!_._1._1).max

val ((_, invalidCount), startDate) = flags.filter(!_._1._1).max

val report = """|

|File(s) = %s

Line 1,244:

Line 1,247:

|Average = %10.3f

|

|Maximum run(s) of %d consecutive false readings ~~ends~~ at ~~line starting~~ %s""".stripMargin

|Maximum run(s) of %d consecutive false readings began at %s""".stripMargin

println(report format (~~filenames~~ mkString " ", totalSum, totalSize, totalSum / totalSize, ~~run~~, startDate))

println(report format (files mkString " ", totalSum, totalSize, totalSum / totalSize, invalidCount, startDate))

}

}</lang>

A quick&dirty solution:

⚫

Last few lines of the sample output:

<lang scala>object AltDataMunging {

def main(args: Array[String]) {

var totalSum = 0.0

var totalSize = 0

var maxInvalidDate = ""

var maxInvalidCount = 0

var invalidDate = ""

var invalidCount = 0

val files = args map (new java.io.File(_)) filter (file => file.isFile && file.canRead)

files.iterator flatMap (file => Source fromFile file getLines ()) map (_.trim split "\\s+") foreach {

case Array(date, rawData @ _*) =>

val dataset = (rawData map (_ toDouble) iterator) grouped 2 toList;

val valid = dataset filter (_.last > 0) map (_.head)

val flags = spans(dataset map (_.last > 0)) map ((_, date))

println("Line: %11s Reject: %2d Accept: %2d Line_tot: %10.3f Line_avg: %10.3f" format

(date, 24 - valid.size, valid.size, valid.sum, valid.sum / valid.size))

totalSum += valid.sum

totalSize += valid.size

dataset foreach {

case _ :: flag :: Nil if flag > 0 =>

if (invalidCount > maxInvalidCount) {

maxInvalidDate = invalidDate

maxInvalidCount = invalidCount

}

invalidCount = 0

case _ =>

if (invalidCount == 0) invalidDate = date

invalidCount += 1

}

val report = """|

|File(s) = %s

|Total = %10.3f

|Readings = %6d

|Average = %10.3f

|

|Maximum run(s) of %d consecutive false readings began at %s""".stripMargin

println(report format (files mkString " ", totalSum, totalSize, totalSum / totalSize, maxInvalidCount, maxInvalidDate))

}

}</lang>

⚫

Last few lines of the sample output (either version):

<pre>

Line 1,260:

Line 1,307:

Readings = 129403

Average = 10.497

Maximum run(s) of 589 consecutive false readings began at 1993-02-09

</pre>

Though it is easier to show when the consecutive false readings ends, if longest run

is the last thing in the file, it hasn't really "ended".

=={{header|Tcl}}==