Merge and aggregate datasets: Difference between revisions

← Older edit

Merge and aggregate datasets (view source)

Revision as of 01:52, 31 March 2024

18,420 bytes added , 1 month ago

Added FreeBASIC

Jjuanhdez

2,122

edits

Revision as of 02:06, 24 November 2021 (view source) rosettacode>Samsergey m (→‎Pretty tabulation) ← Older edit		Latest revision as of 01:52, 31 March 2024 (view source) Jjuanhdez (talk \| contribs) (Added FreeBASIC)
(29 intermediate revisions by 6 users not shown)
Line 71: {{trans\|Python: Stdlib csv only}} <~~lang~~syntaxhighlight lang="11l">V patients_csv = ‘PATIENT_ID,LASTNAME 1001,Hopper Line 125: L(record) result print(‘\| ’record.map(r -> r.center(10)).join(‘ \| ’)‘ \|’)</~~lang~~syntaxhighlight> {{out}} Line 138: =={{header\|AutoHotkey}}== <~~lang~~syntaxhighlight ~~AutoHotkey~~lang="autohotkey">Merge_and_aggregate(patients, visits){ ID := [], LAST_VISIT := [], SCORE_SUM := [], VISIT := [] for i, line in StrSplit(patients, "`n", "`r"){ Line 161: output .= ID "`t" name "`t" LAST_VISIT[id] "`t" SCORE_SUM[id] "`t" SCORE_SUM[id]/VISIT[id] "`n" return output }</~~lang~~syntaxhighlight> Examples:<~~lang~~syntaxhighlight ~~AutoHotkey~~lang="autohotkey">patients = ( PATIENT_ID,LASTNAME Line 186: MsgBox % Merge_and_aggregate(patients, visits) return</~~lang~~syntaxhighlight> {{out}} <pre>PATIENT_ID LASTNAME LAST_VISIT SCORE_SUM SCORE_AVG Line 196: =={{header\|AWK}}== <syntaxhighlight lang="awk"> ~~<lang AWK>~~ # syntax: GAWK -f MERGE_AND_AGGREGATE_DATASETS.AWK RC-PATIENTS.CSV RC-VISITS.CSV # files may appear in any order Line 235: exit(0) } </syntaxhighlight> ~~</lang>~~ {{out}} <pre> Line 247: =={{header\|C++}}== Uses C++20 <~~lang~~syntaxhighlight lang="cpp">#include <iostream> #include <optional> #include <ranges> Line 323: cout << " \|\n"; } }</~~lang~~syntaxhighlight> {{out}} Line 335: \| 5005 \| Kurtz \| \| \| \| </pre> =={{header\|C sharp}}== <syntaxhighlight lang="csharp">using System; using System.Collections.Generic; using System.Globalization; using System.Linq; using System.Runtime.Serialization; public static class MergeAndAggregateDatasets { public static void Main() { string patientsCsv = @" PATIENT_ID,LASTNAME 1001,Hopper 4004,Wirth 3003,Kemeny 2002,Gosling 5005,Kurtz"; string visitsCsv = @" PATIENT_ID,VISIT_DATE,SCORE 2002,2020-09-10,6.8 1001,2020-09-17,5.5 4004,2020-09-24,8.4 2002,2020-10-08, 1001,,6.6 3003,2020-11-12, 4004,2020-11-05,7.0 1001,2020-11-19,5.3"; string format = "yyyy-MM-dd"; var formatProvider = new DateTimeFormat(format).FormatProvider; var patients = ParseCsv( patientsCsv.Split(Environment.NewLine, StringSplitOptions.RemoveEmptyEntries), line => (PatientId: int.Parse(line[0]), LastName: line[1])); var visits = ParseCsv( visitsCsv.Split(Environment.NewLine, StringSplitOptions.RemoveEmptyEntries), line => ( PatientId: int.Parse(line[0]), VisitDate: DateTime.TryParse(line[1], formatProvider, DateTimeStyles.None, out var date) ? date : default(DateTime?), Score: double.TryParse(line[2], out double score) ? score : default(double?) ) ); var results = patients.GroupJoin(visits, p => p.PatientId, v => v.PatientId, (p, vs) => ( p.PatientId, p.LastName, LastVisit: vs.Max(v => v.VisitDate), ScoreSum: vs.Sum(v => v.Score), ScoreAvg: vs.Average(v => v.Score) ) ).OrderBy(r => r.PatientId); Console.WriteLine("\| PATIENT_ID \| LASTNAME \| LAST_VISIT \| SCORE_SUM \| SCORE_AVG \|"); foreach (var r in results) { Console.WriteLine($"\| {r.PatientId,-10} \| {r.LastName,-8} \| {r.LastVisit?.ToString(format) ?? "",-10} \| {r.ScoreSum,9} \| {r.ScoreAvg,9} \|"); } } private static IEnumerable<T> ParseCsv<T>(string[] contents, Func<string[], T> constructor) { for (int i = 1; i < contents.Length; i++) { var line = contents[i].Split(','); yield return constructor(line); } } }</syntaxhighlight> {{out}} <pre> \| PATIENT_ID \| LASTNAME \| LAST_VISIT \| SCORE_SUM \| SCORE_AVG \| \| 1001 \| Hopper \| 2020-11-19 \| 17.4 \| 5.8 \| \| 2002 \| Gosling \| 2020-10-08 \| 6.8 \| 6.8 \| \| 3003 \| Kemeny \| 2020-11-12 \| 0 \| \| \| 4004 \| Wirth \| 2020-11-05 \| 15.4 \| 7.7 \| \| 5005 \| Kurtz \| \| 0 \| \|</pre> =={{header\|Datalog}}== The following implementation is for Souffle. Souffle does not currently include dates and times in the base distribution, but it is straightforward to use C timestamps: <~~lang~~syntaxhighlight Clang="c">// datetime.cpp #include <ctime> #include <cstdint> Line 346 ⟶ 429: return mktime(&tmInfo); // localtime } }</~~lang~~syntaxhighlight> Rather than combine the summaries, the date and score summaries have been presented separately. <~~lang~~syntaxhighlight lang="prolog">/* rosetta.dl / #define NaN to_float("NaN") .functor from_date(date:symbol) : number Line 384 ⟶ 467: score_mean = mean score: {Visit(id, _, score), score != NaN}. .output SummaryDates .output SummaryScores</~~lang~~syntaxhighlight> Then this is called using: <~~lang~~syntaxhighlight lang="bash">g++ -shared -fPIC datetime.cpp -o libfunctors.so souffle -D- rosetta.dl</~~lang~~syntaxhighlight> {{output}} <pre>--------------- Line 407 ⟶ 490: 4004 Wirth 2020-11-05 ===============</pre> =={{header\|FreeBASIC}}== {{trans\|C++}} <syntaxhighlight lang="vbnet">Type Patient ID As String LastName As String End Type Type Visit PatientID As String Fecha As String Score As Single HasScore As Integer End Type Dim As Patient patients(5) Dim As Visit visits(8) patients(1).ID = "1001": patients(1).LastName = "Hopper" patients(2).ID = "4004": patients(2).LastName = "Wirth" patients(3).ID = "3003": patients(3).LastName = "Kemeny" patients(4).ID = "2002": patients(4).LastName = "Gosling" patients(5).ID = "5005": patients(5).LastName = "Kurtz" visits(1).PatientID = "2002": visits(1).Fecha = "2020-09-10": visits(1).Score = 6.8: visits(1).HasScore = -1 visits(2).PatientID = "1001": visits(2).Fecha = "2020-09-17": visits(2).Score = 5.5: visits(2).HasScore = -1 visits(3).PatientID = "4004": visits(3).Fecha = "2020-09-24": visits(3).Score = 8.4: visits(3).HasScore = -1 visits(4).PatientID = "2002": visits(4).Fecha = "2020-10-08": visits(4).HasScore = 0 visits(5).PatientID = "1001": visits(5).Fecha = "" : visits(5).Score = 6.6: visits(5).HasScore = -1 visits(6).PatientID = "3003": visits(6).Fecha = "2020-11-12": visits(6).HasScore = 0 visits(7).PatientID = "4004": visits(7).Fecha = "2020-11-05": visits(7).Score = 7.0: visits(7).HasScore = -1 visits(8).PatientID = "1001": visits(8).Fecha = "2020-11-19": visits(8).Score = 5.3: visits(8).HasScore = -1 Print "\| PATIENT_ID \| LASTNAME \| LAST_VISIT \| SCORE_SUM \| SCORE_AVG \|" For i As Integer = 1 To 5 Dim As String lastVisit = "" Dim As Single sum = 0 Dim As Integer numScores = 0 For j As Integer = 1 To 8 If patients(i).ID = visits(j).PatientID Then If visits(j).HasScore Then sum += visits(j).Score numScores += 1 End If If visits(j).Fecha > lastVisit Then lastVisit = visits(j).Fecha End If End If Next j Print "\| "; patients(i).ID; " \| "; Print Using "\ \ \| \ \ \| "; patients(i).LastName; lastVisit; If numScores > 0 Then Print Using "#######.# \| #######.#"; sum; (sum / Csng(numScores)); Else Print " \| "; End If Print " \|" Next i Sleep</syntaxhighlight> {{out}} <pre>\| PATIENT_ID \| LASTNAME \| LAST_VISIT \| SCORE_SUM \| SCORE_AVG \| \| 1001 \| Hopper \| 2020-11-19 \| 17.4 \| 5.8 \| \| 4004 \| Wirth \| 2020-11-05 \| 15.4 \| 7.7 \| \| 3003 \| Kemeny \| 2020-11-12 \| \| \| \| 2002 \| Gosling \| 2020-10-08 \| 6.8 \| 6.8 \| \| 5005 \| Kurtz \| \| \| \|</pre> =={{header\|F_Sharp\|F#}}== Note that the scores are right justified to copy the task description. It would be more natural to leave them right justified. <~~lang~~syntaxhighlight lang="fsharp"> // Merge and aggregate datasets. Nigel Galloway: January 6th., 2021 let rFile(fName)=seq{use n=System.IO.File.OpenText(fName) Line 420 ⟶ 570: let fG n g=let z=G.[n]\|>Seq.sumBy(fun n->try float n.[2] with :? System.FormatException->0.0) fN n g (G.[n]\|>Seq.sort\|>Seq.last).[1] (if z=0.0 then "" else string z) (if z=0.0 then "" else string(z/(float(Seq.length G.[n])))) </syntaxhighlight> ~~</lang>~~ {{out}} <pre> Line 435 ⟶ 585: =={{header\|Go}}== {{trans\|Wren}} <~~lang~~syntaxhighlight lang="go">package main import ( Line 572 ⟶ 722: } mergePrint(merges) }</~~lang~~syntaxhighlight> {{out}} Line 590 ⟶ 740: Merging of fields and databases is defined as a monoid operation for corresponding types. <~~lang~~syntaxhighlight lang="haskell">import Data.List import Data.Maybe import System.IO (readFile) Line 655 ⟶ 805: where go [] = Nothing go s = Just $ drop 1 <$> span (/= ch) s</~~lang~~syntaxhighlight> <pre>let patients = readDB <$> readFile "patients.csv" Line 682 ⟶ 832: ===Pretty tabulation=== <~~lang~~syntaxhighlight lang="haskell"> tabulateDB (DB ps) header cols = intercalate "\|" <$> body where Line 704 ⟶ 854: , \p -> case scores p of {[] -> []; s -> show (mean s)} ] mean lst = sum lst / genericLength lst</~~lang~~syntaxhighlight> <pre>Main> main Line 713 ⟶ 863: 4004 \| Wirth \| 2020-11-05 \| 15.4 \| 7.7 5005 \| Kurtz \| \| \| </pre> =={{header\|Harbour}}== Harbour does not have special values for NA or NaN, and missing numerical values are represented as zeros. In the following, we have used -999 for missing scores. <syntaxhighlight lang="xbase"> #xcommand INSERT INTO <table> ( <uField1>[, <uFieldN> ] ) VALUE ( <uVal1>[, <uValN> ] ) => ; <table>->(dbAppend()); <table>-><uField1> := <uVal1> [; <table>-><uFieldN> := <uValN>] #xcommand INSERT INTO <table> ( <uFieldList,...> ) VALUES ( <uValList1,...> ) [, ( <uValListN,...> )] => ; INSERT INTO <table> ( <uFieldList> ) VALUE ( <uValList1> ) ; [; INSERT INTO <table> ( <uFieldList> ) VALUE ( <uValListN> )] && Singular cases (so we can use VALUES for all instances) #xcommand INSERT INTO <table> (<uField>) VALUE (<uVal>) => ; <table>->(dbAppend()); <table>-><uField> := <uVal> #xcommand INSERT INTO <table> ( <uFieldList,...> ) VALUES ( <uValList,...> ) => ; INSERT INTO <table> ( <uFieldList> ) VALUE ( <uValList> ) PROCEDURE Main() LOCAL pStruct, vStruct, rStruct, xCurId, aAgg SET DATE FORMAT "yyyy-mm-dd" && create and populate the patient table pStruct := {{"patient_id", "n", 8, 0}, {"lastname", "c", 10, 0 }} dbCreate( "patient", pStruct,, .T., "patient" ) INSERT INTO patient (patient_id, lastname) VALUES (1001, "Hopper"), (4004, "Wirth"), ; (3003, "Kemeny"), (2002, "Gosling"), (5005, "Kurtz") INDEX ON patient_id TO pat_id && create and populate the visit table vStruct := {{"patient_id", "n", 8, 0}, {"visit_date", "d", 10, 0}, {"score", "n", 8, 1}} dbCreate( "visit", vStruct,, .T., "visit" ) INSERT INTO visit (patient_id, visit_date, score) VALUES (2002, ctod("2020-09-10"), 6.8), ; (1001, ctod("2020-09-17"), 5.5), (4004, ctod("2020-09-24"), 8.4), ; (2002, ctod("2020-10-08"), -999), (1001, ctod("1900-01-01"), 6.6), ; (3003, ctod("2020-11-12"), -999), (4004, ctod("2020-11-05"), 7.0), ; (1001, ctod("2020-11-19"), 5.3) INDEX ON patient_id TO visit_id && create the result table rStruct := { {"patient_id", "n", 8, 0}, ; {"n", "i", 8, 0}, {"sum_score", "n", 8, 1}, ; {"avg_score", "n", 8, 1}, {"max_date", "d", 10, 0}} dbCreate("report", rStruct,, .T., "report") SELECT visit DO WHILE ! Eof() xCurId := patient_id && grouping variable aAgg := {0, 0, 0.0, ctod("1900-01-01")} && initial values DO WHILE ! Eof() .AND. xCurId == patient_id aAgg := {1+aAgg[1], iif(score==-999,aAgg[2],1+aAgg[2]), ; iif(score==-999, aAgg[3], score+aAgg[3]), max(visit_date, aAgg[4])} && update SKIP ENDDO INSERT INTO report (patient_id, n, sum_score, avg_score, max_date) ; VALUES (xCurId, aAgg[1], aAgg[3], aAgg[3]/aAgg[2], aAgg[4]) ENDDO SELECT report INDEX ON patient_id TO report_id SELECT patient SET RELATION TO patient_id INTO report ? "NUM", "PATIENT_ID", "LASTNAME", "N", "SUM_SCORE", "AVG_SCORE", "MAX_DATE" LIST patient_id, lastname, report->n, report->sum_score, report->avg_score, report->max_date RETURN</syntaxhighlight> With output: <pre>NUM PATIENT_ID LASTNAME N SUM_SCORE AVG_SCORE MAX_DATE 1 1001 Hopper 3 17.4 5.8 2020-11-19 4 2002 Gosling 2 6.8 6.8 2020-10-08 3 3003 Kemeny 1 0.0 0.0 2020-11-12 2 4004 Wirth 2 15.4 7.7 2020-11-05 5 5005 Kurtz 0 0.0 0.0 - - </pre> =={{header\|J}}== One approach here would be to use [[j:Jd/Index\|Jd]] In other words, we can set things up like this: <syntaxhighlight lang="j">NB. setup: require'jd pacman' load JDP,'tools/csv_load.ijs' F=: jpath '~temp/rosettacode/example/CSV' jdcreatefolder_jd_ CSVFOLDER=: F assert 0<{{)n PATIENTID,LASTNAME 1001,Hopper 4004,Wirth 3003,Kemeny 2002,Gosling 5005,Kurtz }} fwrite F,'patients.csv' assert 0<{{)n PATIENTID,VISIT_DATE,SCORE 2002,2020-09-10,6.8 1001,2020-09-17,5.5 4004,2020-09-24,8.4 2002,2020-10-08, 1001,,6.6 3003,2020-11-12, 4004,2020-11-05,7.0 1001,2020-11-19,5.3 }} fwrite F,'visits.csv' csvprepare 'patients';F,'patients.csv' csvprepare 'visits';F,'visits.csv' csvload 'patients';1 csvload 'visits';1 jd'ref patients PATIENTID visits PATIENTID'</syntaxhighlight> And, then we can run our query: <syntaxhighlight lang="j">require'jd' echo jd {{)n reads PATIENT_ID: first p.PATIENTID, LASTNAME: first p.LASTNAME, LAST_VISIT: max v.VISIT_DATE, SCORE_SUM: sum v.SCORE, SCORE_AVG: avg v.SCORE by p.PATIENTID from p:patients, v:p.visits }} -.LF</syntaxhighlight> Which displays this result: <pre>┌───────────┬──────────┬────────┬──────────┬─────────┬─────────┐ │p.PATIENTID│PATIENT_ID│LASTNAME│LAST_VISIT│SCORE_SUM│SCORE_AVG│ ├───────────┼──────────┼────────┼──────────┼─────────┼─────────┤ │1001 │1001 │Hopper │2020-09-17│5.5 │5.5 │ │4004 │4004 │Wirth │2020-09-24│8.4 │8.4 │ │3003 │3003 │Kemeny │2020-11-12│ __ │ __ │ │2002 │2002 │Gosling │2020-09-10│6.8 │6.8 │ │5005 │5005 │Kurtz │? │ 0 │ 0 │ └───────────┴──────────┴────────┴──────────┴─────────┴─────────┘</pre> Another approach would be to use J's csv library: <syntaxhighlight lang="j">require'csv' patients=: fixcsv {{)n PATIENTID,LASTNAME 1001,Hopper 4004,Wirth 3003,Kemeny 2002,Gosling 5005,Kurtz }} visits=: fixcsv {{)n PATIENTID,VISIT_DATE,SCORE 2002,2020-09-10,6.8 1001,2020-09-17,5.5 4004,2020-09-24,8.4 2002,2020-10-08, 1001,,6.6 3003,2020-11-12, 4004,2020-11-05,7.0 1001,2020-11-19,5.3 }} task=: {{ P=. <@:>"1\|:/:~}.patients V=. <@:>"1\|:/:~}.visits id=. 0 {:: P nm=. 1 {:: P sel1=. (0 {:: P) e. 0 {:: V sel2=. (~.0 {:: V) e. 0 {:: P NB. unnecessary for this example exp=. sel1 #inv sel2 # ] agg=. /.(&.:".) vdt=. exp (0 {:: V) {:/. 1 {:: V sum=. exp ":,.(0 {:: V) +//. 0". 2 {:: V avg=. exp ":,.(0 {:: V) (+/%#)/. 0". 2 {:: V labels=. ;:'PATIENT_ID LASTNAME LAST_VISIT SCORE_SUM SCORE_AVG' labels,:id;nm;vdt;sum;avg }}</syntaxhighlight> Here: <syntaxhighlight lang="j"> task'' ┌──────────┬────────┬──────────┬─────────┬─────────┐ │PATIENT_ID│LASTNAME│LAST_VISIT│SCORE_SUM│SCORE_AVG│ ├──────────┼────────┼──────────┼─────────┼─────────┤ │1001 │Hopper │2020-11-19│17.4 │5.8 │ │2002 │Gosling │2020-10-08│ 6.8 │3.4 │ │3003 │Kemeny │2020-11-12│ 0 │ 0 │ │4004 │Wirth │2020-11-05│15.4 │7.7 │ │5005 │Kurtz │ │ │ │ └──────────┴────────┴──────────┴─────────┴─────────┘</syntaxhighlight> If the empty score in visits was a display concern, we might instead do it this way: <syntaxhighlight lang="j">task=: {{ P=. <@:>"1\|:/:~}.patients V=. <@:>"1\|:/:~}.visits id=. 0 {:: P nm=. 1 {:: P sel1=. (0 {:: P) e. 0 {:: V sel2=. (~.0 {:: V) e. 0 {:: P NB. unnecessary for this example exp=. sel1 #inv sel2 # ] agg=. /.(&.:".) vdt=. exp (0 {:: V) {:/. 1 {:: V sel3=. (0 {:: V) +.//. 2 @#@{::"1 }.visits exp2=: [:exp sel3 #inv sel3 #] sum=. exp2 ":,.(0 {:: V) +//. 0". 2 {:: V avg=. exp2 ":,.(0 {:: V) (+/%#)/. 0". 2 {:: V labels=. ;:'PATIENT_ID LASTNAME LAST_VISIT SCORE_SUM SCORE_AVG' labels,:id;nm;vdt;sum;avg }}</syntaxhighlight> Which gives us: <syntaxhighlight lang="j"> task'' ┌──────────┬────────┬──────────┬─────────┬─────────┐ │PATIENT_ID│LASTNAME│LAST_VISIT│SCORE_SUM│SCORE_AVG│ ├──────────┼────────┼──────────┼─────────┼─────────┤ │1001 │Hopper │2020-11-19│17.4 │5.8 │ │2002 │Gosling │2020-10-08│ 6.8 │3.4 │ │3003 │Kemeny │2020-11-12│ │ │ │4004 │Wirth │2020-11-05│15.4 │7.7 │ │5005 │Kurtz │ │ │ │ └──────────┴────────┴──────────┴─────────┴─────────┘</syntaxhighlight> =={{header\|Java}}== <syntaxhighlight lang="java"> import java.util.Arrays; import java.util.Collections; import java.util.Comparator; import java.util.DoubleSummaryStatistics; import java.util.List; public final class MergeAndAggregateDatasets { public static void main(String[] args) { List<Patient> patients = Arrays.asList( new Patient("1001", "Hopper"), new Patient("4004", "Wirth"), new Patient("3003", "Kemeny"), new Patient("2002", "Gosling"), new Patient("5005", "Kurtz") ); List<Visit> visits = Arrays.asList( new Visit("2002", "2020-09-10", 6.8), new Visit("1001", "2020-09-17", 5.5), new Visit("4004", "2020-09-24", 8.4), new Visit("2002", "2020-10-08", null), new Visit("1001", "" , 6.6), new Visit("3003", "2020-11-12", null), new Visit("4004", "2020-11-05", 7.0), new Visit("1001", "2020-11-19", 5.3) ); Collections.sort(patients, Comparator.comparing(Patient::patientID)); System.out.println("\| PATIENT_ID \| LASTNAME \| LAST_VISIT \| SCORE_SUM \| SCORE_AVG \|"); for ( Patient patient : patients ) { List<Visit> patientVisits = visits.stream().filter( v -> v.visitID == patient.patientID() ).toList(); String lastVisit = patientVisits.stream() .map( v -> v.visitDate ).max(Comparator.naturalOrder()).orElseGet( () -> " None " ); DoubleSummaryStatistics statistics = patientVisits.stream() .filter( v -> v.score != null ).mapToDouble(Visit::score).summaryStatistics(); double scoreSum = statistics.getSum(); double scoreAverage = statistics.getAverage(); String patientDetails = String.format("%12s%11s%13s%12.2f%12.2f", patient.patientID, patient.lastName, lastVisit, scoreSum, scoreAverage); System.out.println(patientDetails); } private static record Patient(String patientID, String lastName) {}; private static record Visit(String visitID, String visitDate, Double score) {}; } } </syntaxhighlight> {{ out }} <pre> \| PATIENT_ID \| LASTNAME \| LAST_VISIT \| SCORE_SUM \| SCORE_AVG \| 1001 Hopper 2020-11-19 17.40 5.80 2002 Gosling 2020-10-08 6.80 6.80 3003 Kemeny 2020-11-12 0.00 0.00 4004 Wirth 2020-11-05 15.40 7.70 5005 Kurtz None 0.00 0.00 </pre> =={{header\|jq}}== Line 723 ⟶ 1,163: '''Ingesting CSV data''' <syntaxhighlight lang="jq"> ~~<lang jq>~~ # objectify/1 takes an array of atomic values as inputs, and packages # these into an object with keys specified by the "headers" array and Line 741 ⟶ 1,181: ([]; . + [ $row\|objectify($headers) ]); </syntaxhighlight> ~~</lang>~~ '''Aggregation functions''' <syntaxhighlight lang="jq"> ~~<lang jq>~~ # output {LAST_VISIT} def LAST_VISIT($patient_id): Line 753 ⟶ 1,193: \| {SCORE_SUM: add, count: length} \| {SCORE_SUM, SCORE_AVG: (if .SCORE_SUM and .count > 0 then .SCORE_SUM/.count else null end)}; </~~lang~~syntaxhighlight>'''The task'''<syntaxhighlight lang ="jq"> # Read the two tables: INDEX($patients \| [splits("\n")] \| map(split(",")) \| csv2jsonHelper[]; .PATIENT_ID) as $patients Line 764 ⟶ 1,204: # ... but display it as a sequence of JSON objects \| .[] </syntaxhighlight> ~~</lang>~~ '''Invocation''' Line 782 ⟶ 1,222: =={{header\|Julia}}== {{trans\|Python}} <~~lang~~syntaxhighlight lang="julia">using CSV, DataFrames, Statistics # load data from csv files Line 821 ⟶ 1,261: end println(df_result) </~~lang~~syntaxhighlight>{{out}} <pre> 5×5 DataFrame Line 835 ⟶ 1,275: =={{header\|Mathematica}}/{{header\|Wolfram Language}}== <~~lang~~syntaxhighlight ~~Mathematica~~lang="mathematica">a = ImportString["PATIENT_ID,LASTNAME 1001,Hopper 4004,Wirth Line 859 ⟶ 1,299: "SCORE_SUM" -> If[DeleteMissing@#[[All, "SCORE"]] =!= {}, Total@DeleteMissing@#[[All, "SCORE"]], Missing[]], "SCORE_AVG" -> If[DeleteMissing@#[[All, "SCORE"]] =!= {}, Mean@DeleteMissing@#[[All, "SCORE"]], Missing[]]\|> & /@ gr // Dataset</~~lang~~syntaxhighlight> {{out}} <pre> PATIENT_ID LASTNAME VISIT_DATE SCORE_SUM SCORE_AVG Line 868 ⟶ 1,308: 5005 5005 Kurtz \[LongDash] \[LongDash] \[LongDash]</pre> =={{header\|Mercury}}== <~~lang~~syntaxhighlight ~~Prolog~~lang="prolog">:- module rosetta. :- interface. Line 900 ⟶ 1,340: %% Utilities :- pred bag_aggr(pred(T)::(pred(out) is nondet), pred(T,U,U)::pred(in,in,out) is det, U::in, U::out) is det. :- pred bag_count(pred(T)::(pred(out) is nondet), int::out) is det. :- pred bag_sum(pred(float)::(pred(out) is nondet), float::out) is det. :- pred bag_avg(pred(float)::(pred(out) is nondet), float::out) is det. :- pred bag_max(pred(T)::(pred(out) is nondet), T::in, T::out) is det. :- pred bag_max_date(pred(maybe_date)::(pred(out) is nondet), maybe_date::out) is det. bag_aggr(Predicate, Aggregator, Initial, Result) :- promise_equivalent_solutions[Result] ( unsorted_aggregate(Predicate, Aggregator, Initial, Result)). bag_count(Predicate, Count) :- bag_aggr(Predicate, (pred(_X::in,Y::in,Z::out) is det :- Z = Y+1), 0, Count). ~~promise_equivalent_solutions[Count] (~~ ~~unsorted_aggregate(Predicate,~~ ~~(pred(_X::in,Y::in,Z::out) is det :- Z is Y+1),~~ ~~0, Count)).~~ bag_sum(Predicate, Sum) :- bag_aggr(Predicate, (pred(X::in,Y::in,Z::out) is det :- Z = X+Y), 0.0, Sum). ~~promise_equivalent_solutions[Sum] (~~ ~~unsorted_aggregate(Predicate,~~ ~~(pred(X::in,Y::in,Z::out) is det :- Z is X+Y),~~ ~~0.0, Sum)).~~ bag_avg(Predicate, Avg) :- bag_count(Predicate, N), bag_sum(Predicate, Sum), (if N = 0 then Avg = nan else Avg is= Sum/float(N)). bag_max(Predicate, Initial, Max) :- bag_aggr(Predicate, ~~promise_equivalent_solutions [Max] (~~ (pred(X::in,Y::in,Z::out) is det :- ~~unsorted_aggregate(Predicate,~~ ~~(pred~~compare(R,X~~::in~~,Y~~::in~~),~~Z::out) is det :-~~ (if R = (>) then Z = X else Z = Y)), ~~compare(R,X,Y),~~ Initial, Max). ~~(if R = (>) then Z = X else Z = Y)),~~ bag_max_date(Predicate, MaxDate) :- ~~Initial, Max)).~~ bag_max(Predicate, date(0,0,0), MaxDate1), (if MaxDate1 = date(0,0,0) then MaxDate = no else MaxDate = MaxDate1). main(!IO) :- ~~io.write_string~~print_line("{Id, Lastname, SumScores, AvgScores, MaxDate}:\n", !IO), ~~solutions~~aggregate((pred({Id,Lastname,Sum,Avg,MaxDate}::out) is nondet :- patient(Id,Lastname), Scores = (pred(Score::out) is nondet :- visit(Id,_,Score), \+is_nan(Score)), bag_avg(Scores, Avg), bag_sum(Scores, Sum), Dates = (pred(Date::out) is nondet :- visit(Id,Date,_), Date\=~~date(_,_,_)~~no), ~~bag_max~~bag_max_date(Dates, ~~date(0,0,0~~MaxDate)~~, MaxDate1~~), print_line, ~~(if MaxDate1 = date(0,0,0) then MaxDate = no else MaxDate = MaxDate1)),~~ ~~Solutions~~!IO),.</syntaxhighlight> ~~foldl(io.write_line, Solutions, !IO).</lang>~~ {{out}} <pre>{Id, Lastname, SumScores, AvgScores, MaxDate}: Line 948 ⟶ 1,389: =={{header\|Nim}}== ===CSV files and tables=== <~~lang~~syntaxhighlight ~~Nim~~lang="nim">import algorithm, parsecsv, strformat, strutils, tables const NoValue = -1.0 Line 1,000 ⟶ 1,441: let scoreSum = if count == 0: "" else: &"{sum:>4.1f}" let scoreAvg = if count == 0: "" else: &"{sum / count.toFloat: >4.2f}" echo &"\| {id:^10} \| {name:^10} \| {lastVisit:^10} \| {scoreSum:>7} \| {scoreAvg:>6} \|"</~~lang~~syntaxhighlight> {{out}} Line 1,013 ⟶ 1,454: {{trans\|Python}} We use the high level standard library "db_sqlite" rather than the low level one "sqlite3". <~~lang~~syntaxhighlight ~~Nim~~lang="nim">import parseCsv, db_sqlite, sequtils, strutils const FNames = ["patients1.csv", "patients2.csv"] Line 1,065 ⟶ 1,506: echo "\| PATIENT_ID \| LASTNAME \| LAST_VISIT \| SCORE_SUM \| SCORE_AVG \|" for row in result: echo "\| " & row.mapit(it.center(10)).join(" \| ") & '\|'</~~lang~~syntaxhighlight> {{out}} Line 1,076 ⟶ 1,517: =={{header\|Phix}}== <!--<~~lang~~syntaxhighlight ~~Phix~~lang="phix">(phixonline)--> <span style="color: #008080;">constant</span> <span style="color: #000000;">patients_txt</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">split</span><span style="color: #0000FF;">(</span><span style="color: #008000;">""" PATIENT_ID,LASTNAME Line 1,134 ⟶ 1,575: <span style="color: #0000FF;">{</span><span style="color: #7060A8;">id</span><span style="color: #0000FF;">,</span><span style="color: #000000;">name</span><span style="color: #0000FF;">,</span><span style="color: #000000;">dt</span><span style="color: #0000FF;">,</span><span style="color: #000000;">scstr</span><span style="color: #0000FF;">,</span><span style="color: #000000;">avstr</span><span style="color: #0000FF;">})</span> <span style="color: #008080;">end</span> <span style="color: #008080;">for</span> <!--</~~lang~~syntaxhighlight>--> {{out}} <pre> Line 1,148 ⟶ 1,589: Weird requirement: "Use the most common libraries only when built-in functionality is not sufficient." Not even a "use strict;" :) <~~lang~~syntaxhighlight lang="perl">#!/usr/bin/perl my $fmt = '\| %-11s' x 5 . "\|\n"; printf $fmt, qw( PATIENT_ID LASTNAME LAST_VISIT SCORE_SUM SCORE_AVG); Line 1,184 ⟶ 1,625: 3003,2020-11-12, 4004,2020-11-05,7.0 1001,2020-11-19,5.3</~~lang~~syntaxhighlight> {{out}} <pre> Line 1,196 ⟶ 1,637: =={{header\|Prolog}}== Implemented using SWI Prolog: <~~lang~~syntaxhighlight lang="prolog">patient(1001,'Hopper'). patient(4004,'Wirth'). patient(3003,'Kemeny'). Line 1,231 ⟶ 1,672: summaryScores(Id, Lastname, ScoreSum, ScoreMean), writeln(summaryScores(Id, Lastname, ScoreSum, ScoreMean)), fail.</~~lang~~syntaxhighlight> {{output}} <pre>summaryDates(1001,Hopper,2020-11-19) Line 1,242 ⟶ 1,683: false.</pre> Implemented using XSB Prolog (which allows for user-defined aggregates): <~~lang~~syntaxhighlight lang="prolog">:- import bagMax/2, bagCount/2, bagSum/2, bagReduce/4 from aggregs. :- import julian_date/7, date_string/3 from iso8601. :- import load_csv/2, add_cvt_type_hook/2 from proc_files. Line 1,290 ⟶ 1,731: sum(X,Y,Z) :- Z is X+Y. :- hilog successor. successor(X,_Y,Z) :- Z is X+1.</~~lang~~syntaxhighlight> =={{header\|PureBasic}}== <~~lang~~syntaxhighlight ~~PureBasic~~lang="purebasic">Structure Person Name$ EndStructure Line 1,353 ⟶ 1,794: PrintN("") : scs=0 : c=0 Next Input()</~~lang~~syntaxhighlight> {{out}} <pre>\| PATIENT_ID \| LASTNAME \| LAST_VISIT \| SCORE_SUM \| SCORE_AVG \| Line 1,365 ⟶ 1,806: ===Python: Using pandas library=== <~~lang~~syntaxhighlight ~~Python~~lang="python"># to install pandas library go to cmd prompt and type: # cd %USERPROFILE%\AppData\Local\Programs\Python\Python38-32\Scripts\ # pip install pandas Line 1,409 ⟶ 1,850: df_result = df_group.agg({'VISIT_DATE': 'max', 'SCORE': [lambda x: x.sum(min_count=1),'mean']}) print(df_result)</~~lang~~syntaxhighlight> <pre> PATIENT_ID LASTNAME LAST_VISIT SCORE Line 1,423 ⟶ 1,864: Using only standard libraries and input from csv files. <~~lang~~syntaxhighlight lang="python">import csv fnames = 'patients.csv patients_visits.csv'.split() Line 1,458 ⟶ 1,899: #%% for record in result: print(f"\| {' \| '.join(f'{r:^10}' for r in record)} \|")</~~lang~~syntaxhighlight> {{out}} Line 1,473 ⟶ 1,914: (The SQL SELECT statement is modelled on that of the SQL entry elsewhere on this page). <~~lang~~syntaxhighlight lang="python">import sqlite3 import csv Line 1,537 ⟶ 1,978: result = join_tables_and_group(conn) for record in result: print(f"\| {' \| '.join(f'{str(r):^10}' for r in record)} \|")</~~lang~~syntaxhighlight> {{out}} Line 1,548 ⟶ 1,989: =={{header\|R}}== <~~lang~~syntaxhighlight Rlang="r"># load data from csv files # setwd("C:\Temp\csv\") # df_patient <- read.csv(file="patients.csv", header = TRUE, sep = ",") Line 1,589 ⟶ 2,030: df_result <- merge(df_patient, df_agg, by = 'PATIENT_ID', all.x = TRUE) print(df_result)</~~lang~~syntaxhighlight> {{out}} <pre> Line 1,601 ⟶ 2,042: =={{header\|Raku}}== <syntaxhighlight lang="raku" ~~perl6~~line>my @names = map { ( <PATIENT_ID LASTNAME> Z=> .list ).hash }, ( 1001, 'Hopper' ), ( 4004, 'Wirth' ), Line 1,637 ⟶ 2,078: my @out_field_names = <PATIENT_ID LASTNAME LAST_VISIT SCORE_SUM SCORE_AVG>; my @rows = @result.sort(.<PATIENT_ID>).map(.{@out_field_names}); say .map({$_ // ''}).fmt('%-10s', ' \| ') for @out_field_names, \|@rows;</~~lang~~syntaxhighlight> {{out}} <pre> Line 1,649 ⟶ 2,090: =={{header\|REXX}}== <~~lang~~syntaxhighlight lang="rexx">/ REXX / patients='patients.csv' l=linein(patients) Line 1,729 ⟶ 2,170: swl=swl wa.i End Return strip(swl)</~~lang~~syntaxhighlight> {{out}} <pre>\| PATIENT_ID \| LASTNAME \| LAST_VISIT \| SCORE_SUM \| SCORE_AVG \| Line 1,738 ⟶ 2,179: \| 5005 \| Kurtz \| \| \| \|</pre> =={{header\|SAS}}== <~~lang~~syntaxhighlight lang="sas"> %let datefmt=E8601DA10.; data patient; infile "patient.csv" dsd dlm=','; Line 1,757 ⟶ 2,198: avg(score) as avg_score from visit group by id) natural right join patient order by id;</~~lang~~syntaxhighlight> {{out}} <pre>: id lastname max_date sum_score avg_score Line 1,771 ⟶ 2,212: Note: the PATIENT_ID 5005 is missing from the result, because MATCH FILES cannot add rows for missing keys, i.e. AFAIK a "left join" is not possible is SPSS. <~~lang~~syntaxhighlight ~~SPSS~~lang="spss"> set working directory to location of .csv files CD 'C:\Temp\csv\'. Line 1,836 ⟶ 2,277: /last_visit = MAX(VISIT_DATE) /score_avg = MEAN(SCORE) /score_sum = SUM(SCORE).</~~lang~~syntaxhighlight> {{out}} Line 1,849 ⟶ 2,290: =={{header\|SQL}}== <~~lang~~syntaxhighlight ~~SQL~~lang="sql">-- drop tables DROP TABLE IF EXISTS tmp_patients; DROP TABLE IF EXISTS tmp_visits; Line 1,907 ⟶ 2,348: p.LASTNAME ORDER BY p.PATIENT_ID;</~~lang~~syntaxhighlight> {{out}} <pre> Line 1,919 ⟶ 2,360: =={{header\|Transd}}== <~~lang~~syntaxhighlight lang="scheme">#lang transd MainModule: { Line 1,977 ⟶ 2,418: )) )) }</~~lang~~syntaxhighlight>{{out}} <pre> PATIENT_ID\| LASTNAME\|VISIT_DATE\| SCORE\| SCORE_AVG\| Line 1,988 ⟶ 2,429: =={{header\|TutorialD}}== <syntaxhighlight lang="text">BEGIN; TYPE Date UNION; TYPE DateValid IS {Date POSSREP {year INTEGER, month INTEGER, day INTEGER}}; Line 2,023 ⟶ 2,464: (SUMMARIZE visit BY {id}: {maxdate := MAX(date)} UNION (EXTEND (patient {id} MINUS (visit {id})): {maxdate := DateUnknown()})) JOIN patient</~~lang~~syntaxhighlight> {{out}} <pre> Line 2,036 ⟶ 2,477: {{libheader\|Wren-sort}} {{libheader\|Wren-fmt}} <~~lang~~syntaxhighlight ~~ecmascript~~lang="wren">import "./fmt" for Fmt import "./sort" for Sort class Patient { Line 2,131 ⟶ 2,572: var merges = Patient.ids.map { \|id\| Merge.new(id) }.toList Merge.print(merges)</~~lang~~syntaxhighlight> {{out}}