Text processing/2: Difference between revisions
Content added Content deleted
(Add Factor) |
|||
Line 521: | Line 521: | ||
1995-03-26 |
1995-03-26 |
||
2004-12-31 |
2004-12-31 |
||
</pre> |
|||
=={{header|Clojure}}== |
|||
<lang Clojure> |
|||
(defn parse-line [s] |
|||
(let [[date & data-toks] (str/split s #"\s+") |
|||
data-fields (map read-string data-toks) |
|||
valid-date? (fn [s] (re-find #"\d{4}-\d{2}-\d{2}" s)) |
|||
valid-line? (and (valid-date? date) |
|||
(= 48 (count data-toks)) |
|||
(every? number? data-fields)) |
|||
readings (for [[v flag] (partition 2 data-fields)] |
|||
{:val v :flag flag})] |
|||
(when (not valid-line?) |
|||
(println "Malformed Line: " s)) |
|||
{:date date |
|||
:no-missing-readings? (and (= 48 (count data-toks)) |
|||
(every? pos? (map :flag readings)))})) |
|||
(defn analyze-file [path] |
|||
(reduce (fn [m line] |
|||
(let [{:keys [all-dates dupl-dates n-full-recs invalid-lines]} m |
|||
this-date (:date line) |
|||
dupl? (contains? all-dates this-date) |
|||
full? (:no-missing-readings? line)] |
|||
(cond-> m |
|||
dupl? (update-in [:dupl-dates] conj this-date) |
|||
full? (update-in [:n-full-recs] inc) |
|||
true (update-in [:all-dates] conj this-date)))) |
|||
{:dupl-dates #{} :all-dates #{} :n-full-recs 0} |
|||
(->> (slurp path) |
|||
clojure.string/split-lines |
|||
(map parse-line)))) |
|||
(defn report-summary [path] |
|||
(let [m (analyze-file path)] |
|||
(println (format "%d unique dates" (count (:all-dates m)))) |
|||
(println (format "%d duplicated dates [%s]" |
|||
(count (:dupl-dates m)) |
|||
(clojure.string/join " " (sort (:dupl-dates m))))) |
|||
(println (format "%d lines with no missing data" (:n-full-recs m))))) |
|||
</lang> |
|||
{{out}} |
|||
<pre> |
|||
5466 unique dates |
|||
5 duplicated dates [1990-03-25 1991-03-31 1992-03-29 1993-03-28 1995-03-26] |
|||
5017 lines with no missing data |
|||
</pre> |
</pre> |
||