Text processing/1: Difference between revisions
Content added Content deleted
(Added Scala) |
(Added Scala) |
||
Line 1,192: | Line 1,192: | ||
=={{header|Scala}}== |
=={{header|Scala}}== |
||
=={{works with|Scala|2.8}}== |
=={{works with|Scala|2.8}}== |
||
A fully functional solution: |
|||
<lang scala>object DataMunging { |
<lang scala>object DataMunging { |
||
import scala.io.Source |
|||
⚫ | |||
def spans[A](list: List[A]) = list.tail.foldLeft(List((list.head, 1))) { |
def spans[A](list: List[A]) = list.tail.foldLeft(List((list.head, 1))) { |
||
case ((a, n) :: tail, b) if a == b => (a, n + 1) :: tail |
case ((a, n) :: tail, b) if a == b => (a, n + 1) :: tail |
||
Line 1,204: | Line 1,205: | ||
type LineIterator = Iterator[Option[(Double, Int, Flags)]] |
type LineIterator = Iterator[Option[(Double, Int, Flags)]] |
||
⚫ | |||
def linesIterator(file: java.io.File) = Source.fromFile(file).getLines().map( |
def linesIterator(file: java.io.File) = Source.fromFile(file).getLines().map( |
||
pattern findFirstMatchIn _ map ( |
pattern findFirstMatchIn _ map ( |
||
Line 1,234: | Line 1,237: | ||
def main(args: Array[String]) { |
def main(args: Array[String]) { |
||
val |
val files = args map (new java.io.File(_)) filter (file => file.isFile && file.canRead) |
||
val lines = |
val lines = files.iterator flatMap linesIterator |
||
val (totalSum, totalSize, flags) = totalizeLines(lines) |
val (totalSum, totalSize, flags) = totalizeLines(lines) |
||
val ((_, |
val ((_, invalidCount), startDate) = flags.filter(!_._1._1).max |
||
val report = """| |
val report = """| |
||
|File(s) = %s |
|File(s) = %s |
||
Line 1,244: | Line 1,247: | ||
|Average = %10.3f |
|Average = %10.3f |
||
| |
| |
||
|Maximum run(s) of %d consecutive false readings |
|Maximum run(s) of %d consecutive false readings began at %s""".stripMargin |
||
println(report format ( |
println(report format (files mkString " ", totalSum, totalSize, totalSum / totalSize, invalidCount, startDate)) |
||
} |
} |
||
}</lang> |
}</lang> |
||
A quick&dirty solution: |
|||
⚫ | |||
<lang scala>object AltDataMunging { |
|||
def main(args: Array[String]) { |
|||
var totalSum = 0.0 |
|||
var totalSize = 0 |
|||
var maxInvalidDate = "" |
|||
var maxInvalidCount = 0 |
|||
var invalidDate = "" |
|||
var invalidCount = 0 |
|||
val files = args map (new java.io.File(_)) filter (file => file.isFile && file.canRead) |
|||
files.iterator flatMap (file => Source fromFile file getLines ()) map (_.trim split "\\s+") foreach { |
|||
case Array(date, rawData @ _*) => |
|||
val dataset = (rawData map (_ toDouble) iterator) grouped 2 toList; |
|||
val valid = dataset filter (_.last > 0) map (_.head) |
|||
val flags = spans(dataset map (_.last > 0)) map ((_, date)) |
|||
println("Line: %11s Reject: %2d Accept: %2d Line_tot: %10.3f Line_avg: %10.3f" format |
|||
(date, 24 - valid.size, valid.size, valid.sum, valid.sum / valid.size)) |
|||
totalSum += valid.sum |
|||
totalSize += valid.size |
|||
dataset foreach { |
|||
case _ :: flag :: Nil if flag > 0 => |
|||
if (invalidCount > maxInvalidCount) { |
|||
maxInvalidDate = invalidDate |
|||
maxInvalidCount = invalidCount |
|||
} |
|||
invalidCount = 0 |
|||
case _ => |
|||
if (invalidCount == 0) invalidDate = date |
|||
invalidCount += 1 |
|||
} |
|||
} |
|||
val report = """| |
|||
|File(s) = %s |
|||
|Total = %10.3f |
|||
|Readings = %6d |
|||
|Average = %10.3f |
|||
| |
|||
|Maximum run(s) of %d consecutive false readings began at %s""".stripMargin |
|||
println(report format (files mkString " ", totalSum, totalSize, totalSum / totalSize, maxInvalidCount, maxInvalidDate)) |
|||
} |
|||
}</lang> |
|||
⚫ | |||
<pre> |
<pre> |
||
Line 1,260: | Line 1,307: | ||
Readings = 129403 |
Readings = 129403 |
||
Average = 10.497 |
Average = 10.497 |
||
Maximum run(s) of 589 consecutive false readings began at 1993-02-09 |
|||
</pre> |
</pre> |
||
Though it is easier to show when the consecutive false readings ends, if longest run |
|||
is the last thing in the file, it hasn't really "ended". |
|||
=={{header|Tcl}}== |
=={{header|Tcl}}== |