Top rank per group: Difference between revisions

Content added Content deleted

Inline

Revision as of 19:32, 28 July 2009

Find the top N salaries in each group, where N is provided as a parameter.

Use this data as a formatted internal data structure(adapt it to your language-native idioms, rather than parse at runtime), or identify your external data source: <lang csv>Employee Name,Employee ID,Salary,Department Tyler Bennett,E10297,32000,D101 John Rappl,E21437,47000,D050 George Woltman,E00127,53500,D101 Adam Smith,E63535,18000,D202 Claire Buckman,E39876,27800,D202 David McClellan,E04242,41500,D101 Rich Holcomb,E01234,49500,D202 Nathan Adams,E41298,21900,D050 Richard Potter,E43128,15900,D101 David Motsinger,E27002,19250,D202 Tim Sampair,E03033,27000,D101 Kim Arlich,E10001,57000,D190 Timothy Grove,E16398,29900,D190</lang>

C++

<lang cpp>#include <string>

include <set>
include <list>
include <map>
include <iostream>

struct Employee { std::string Name; std::string ID; unsigned long Salary; std::string Department; Employee(std::string _Name = "", std::string _ID = "", unsigned long _Salary = 0, std::string _Department = "") { Name = _Name; ID = _ID; Salary = _Salary; Department = _Department; }

void display(std::ostream& out) const { out << Name << "\t" << ID << "\t" << Salary << "\t" << Department << std::endl; } };

// We'll tell std::set to use this to sort our employees. struct CompareEarners { bool operator()(const Employee& e1, const Employee& e2) { return (e1.Salary > e2.Salary); } };

// A few typedefs to make the code easier to type, read and maintain. typedef std::list<Employee> EMPLOYEELIST;

// Notice the CompareEarners; We're telling std::set to user our specified comparison mechanism // to sort its contents. typedef std::set<Employee, CompareEarners> DEPARTMENTPAYROLL;

typedef std::map<std::string, DEPARTMENTPAYROLL> DEPARTMENTLIST;

void initialize(EMPLOYEELIST& Employees) { // Initialize our employee list data source. Employees.push_back(Employee("Tyler Bennett", "E10297", 32000, "D101")); Employees.push_back(Employee("John Rappl", "E21437", 47000, "D050")); Employees.push_back(Employee("George Woltman", "E21437", 53500, "D101")); Employees.push_back(Employee("Adam Smith", "E21437", 18000, "D202")); Employees.push_back(Employee("Claire Buckman", "E39876", 27800, "D202")); Employees.push_back(Employee("David McClellan", "E04242", 41500, "D101")); Employees.push_back(Employee("Rich Holcomb", "E01234", 49500, "D202")); Employees.push_back(Employee("Nathan Adams", "E41298", 21900, "D050")); Employees.push_back(Employee("Richard Potter", "E43128", 15900, "D101")); Employees.push_back(Employee("David Motsinger", "E27002", 19250, "D202")); Employees.push_back(Employee("Tim Sampair", "E03033", 27000, "D101")); Employees.push_back(Employee("Kim Arlich", "E10001", 57000, "D190")); Employees.push_back(Employee("Timothy Grove", "E16398", 29900, "D190")); }

void group(EMPLOYEELIST& Employees, DEPARTMENTLIST& Departments) { // Loop through all of our employees. for( EMPLOYEELIST::iterator iEmployee = Employees.begin(); Employees.end() != iEmployee; ++iEmployee ) { DEPARTMENTPAYROLL& groupSet = Departments[iEmployee->Department];

// Add our employee to this group. groupSet.insert(*iEmployee); } }

void present(DEPARTMENTLIST& Departments, unsigned int N) { // Loop through all of our departments for( DEPARTMENTLIST::iterator iDepartment = Departments.begin(); Departments.end() != iDepartment; ++iDepartment ) { std::cout << "In department " << iDepartment->first << std::endl; std::cout << "Name\t\tID\tSalary\tDepartment" << std::endl; // Get the top three employees for each employee unsigned int rank = 1; for( DEPARTMENTPAYROLL::iterator iEmployee = iDepartment->second.begin(); ( iDepartment->second.end() != iEmployee) && (rank <= N); ++iEmployee, ++rank ) { iEmployee->display(std::cout); } std::cout << std::endl; } }

int main(int argc, char* argv[]) { // Our container for our list of employees. EMPLOYEELIST Employees;

// Fill our list of employees initialize(Employees);

// Our departments. DEPARTMENTLIST Departments;

// Sort our employees into their departments. // This will also rank them. group(Employees, Departments);

// Display the top 3 earners in each department. present(Departments, 3);

return 0; }</lang>

Output:

In department D050
Name            ID      Salary  Department
John Rappl      E21437  47000   D050
Nathan Adams    E41298  21900   D050

In department D101
Name            ID      Salary  Department
George Woltman  E21437  53500   D101
David McClellan E04242  41500   D101
Tyler Bennett   E10297  32000   D101

In department D190
Name            ID      Salary  Department
Kim Arlich      E10001  57000   D190
Timothy Grove   E16398  29900   D190

In department D202
Name            ID      Salary  Department
Rich Holcomb    E01234  49500   D202
Claire Buckman  E39876  27800   D202
David Motsinger E27002  19250   D202

E

<lang e>/** Turn a list of arrays into a list of maps with the given keys. */ def addKeys(keys, rows) {

 def res := [].diverge()
 for row in rows { res.push(__makeMap.fromColumns(keys, row)) }
 return res.snapshot()

}

def data := addKeys(

 ["name",            "id",  "salary", "dept"],
[["Tyler Bennett",   "E10297", 32000, "D101"],
 ["John Rappl",      "E21437", 47000, "D050"],
 ["George Woltman",  "E00127", 53500, "D101"],
 ["Adam Smith",      "E63535", 18000, "D202"],
 ["Claire Buckman",  "E39876", 27800, "D202"],
 ["David McClellan", "E04242", 41500, "D101"],
 ["Rich Holcomb",    "E01234", 49500, "D202"],
 ["Nathan Adams",    "E41298", 21900, "D050"],
 ["Richard Potter",  "E43128", 15900, "D101"],
 ["David Motsinger", "E27002", 19250, "D202"],
 ["Tim Sampair",     "E03033", 27000, "D101"],
 ["Kim Arlich",      "E10001", 57000, "D190"],
 ["Timothy Grove",   "E16398", 29900, "D190"]])

def topSalaries(n, out) {

   var groups := [].asMap()
   for row in data {
       def [=> salary, => dept] | _ := row
       def top := groups.fetch(dept, fn {[]}).with([-salary, row]).sort()
       groups with= (dept, top.run(0, top.size().min(n)))
   }
   for dept => group in groups.sortKeys() {
       out.println(`Department $dept`)
       out.println(`---------------`)
       for [_, row] in group {
         out.println(`${row["id"]}  $$${row["salary"]}  ${row["name"]}`)
       }
       out.println()
   }

}</lang>

(Note: This uses an append-and-then-sort to maintain the list of top N; a sorted insert or a proper selection algorithm would be more efficient. As long as N is small, this does not matter much; the algorithm is O(n) with respect to the data set.)

<lang e>? topSalaries(3, stdout) Department D050

E21437 $47000 John Rappl E41298 $21900 Nathan Adams

Department D101

E00127 $53500 George Woltman E04242 $41500 David McClellan E10297 $32000 Tyler Bennett

Department D190

E10001 $57000 Kim Arlich E16398 $29900 Timothy Grove

Department D202

E01234 $49500 Rich Holcomb E39876 $27800 Claire Buckman E27002 $19250 David Motsinger </lang>

Haskell

<lang haskell> import Data.List import Control.Monad import Control.Arrow import Text.Printf

groupingOn f a b = f a == f b comparing f a b = compare (f a) (f b) comparingDwn f a b = compare (f b) (f a)

type ID = Int type DEP = [Char] type NAME = [Char] type SALARY = Double type Employee = (ID, DEP, NAME, SALARY)

employees :: [Employee] employees = [(1001,"AB","Janssen A.H.",41000), (101,"KA","'t Woud B.",45000),

            (1013,"AB","de Bont C.A.",65000), (1101,"CC","Modaal A.M.J.",30000),
            (1203,"AB","Anders H.",50000),    (100,"KA","Ezelbips P.J.",52000),
            (1102,"CC","Zagt A.",33000),     (1103,"CC","Ternood T.R.",21000),
            (1104,"CC","Lageln M.",23000),   (1105,"CC","Amperwat A.",19000),
            (1106,"CC","Boon T.J.",25000), (1107,"CC","Beloop L.O.",31000),
            (1009,"CD","Janszoon A.",38665), (1026,"CD","Janszen H.P.",41000),
            (1011,"CC","de Goeij J.",39000), (106,"KA","Pragtweik J.M.V.",42300),
            (111,"KA","Bakeuro S.",31000),  (105,"KA","Clubdrager C.",39800),
            (104,"KA","Karendijk F.",23000), (107,"KA","Centjes R.M.",34000),
            (119,"KA","Tegenstroom H.L.",39000), (1111,"CD","Telmans R.M.",55500),
            (1093,"AB","de Slegte S.",46987), (1199,"CC","Uitlaat G.A.S.",44500)
           ]

nr :: Employee -> ID nr (i,_,_,_) = i

dep :: Employee -> DEP dep (_,d,_,_) = d

name :: Employee -> NAME name (_,_,n,_) = n

sal :: Employee -> SALARY sal (_,_,_,s) = s

dorank :: Int ->

         (Employee -> DEP) ->
         (Employee -> SALARY) ->
         [Employee]-> Employee

dorank n o1 o2 = map (take n. sortBy (comparingDwn o2))

                . groupBy (groupingOn o1) . sortBy (comparing o1)

toprank :: IO () toprank = do

  printf "%-16s %3s %10s\n" "NAME" "DEP" "TIP" 
  printf "%s\n" $ replicate 31 '='
  mapM_ (mapM_ (ap (ap (printf "%-16s %3s %10.2g\n" . name) dep) sal)) $ dorank 3 dep sal employees

</lang> Output: top 3 per department

*Main> toprank
NAME             DEP        TIP
===============================
de Bont C.A.      AB   65000.00
Anders H.         AB   50000.00
de Slegte S.      AB   46987.00
Uitlaat G.A.S.    CC   44500.00
de Goeij J.       CC   39000.00
Zagt A.           CC   33000.00
Telmans R.M.      CD   55500.00
Janszen H.P.      CD   41000.00
Janszoon A.       CD   38665.00
Ezelbips P.J.     KA   52000.00
't Woud B.        KA   45000.00
Pragtweik J.M.V.  KA   42300.00

J

J has a rich set of primitive functions, which combine the power of an imperative language with the expressiveness of a declarative, SQL-like language:

   NB.  Dynamically generate convenience functions
   ('`',,;:^:_1: N=:{.Employees) =:, (_&{"1)`'' ([^:(_ -: ])L:0)"0 _~ i.# E =: {: Employees

   NB.  Show top ranked employees in each dept
   N , (<@:>"1@:|:@:((6 <. #) {. ] \: SALARY)/.~ DEPT) |: <"1&> E

</lang>

+-----+-----+-----------------+------+
|ID   |DEPT |NAME             |SALARY|
+-----+-----+-----------------+------+
|1013 |AB   |de Bont C.A.     |65000 |
|1203 |AB   |Anders H.        |50000 |
|1093 |AB   |de Slegte S.     |46987 |
|1001 |AB   |Janssen A.H.     |41000 |
+-----+-----+-----------------+------+
|100  |KA   |Ezelbips P.J.    |52000 |
|101  |KA   |'t Woud B.       |45000 |
|106  |KA   |Pragtweik J.M.V. |42300 |
|105  |KA   |Clubdrager C.    |39800 |
|119  |KA   |Tegenstroom H.L. |39000 |
|107  |KA   |Centjes R.M.     |34000 |
+-----+-----+-----------------+------+
|1199 |CC   |Uitlaat G.A.S.   |44500 |
|1011 |CC   |de Goeij J.      |39000 |
|1102 |CC   |Zagt A.          |33000 |
|1107 |CC   |Beloop L.O.      |31000 |
|1101 |CC   |Modaal A.M.J.    |30000 |
|1106 |CC   |Boon T.J.        |25000 |
+-----+-----+-----------------+------+
|1111 |CD   |Telmans R.M.     |55500 |
|1026 |CD   |Janszen H.P.     |41000 |
|1009 |CD   |Janszoon A.      |38665 |
+-----+-----+-----------------+------+

using the data set:

   Employees=: (<;.1~(1 1{.~#);+./@:(;:E.S:0])@:{.)];._2 noun define
   ID   DEPT NAME             SALARY
   1001 AB   Janssen A.H.     41000 
   101  KA   't Woud B.       45000 
   1013 AB   de Bont C.A.     65000 
   1101 CC   Modaal A.M.J.    30000 
   1203 AB   Anders H.        50000 
   100  KA   Ezelbips P.J.    52000 
   1102 CC   Zagt A.          33000 
   1103 CC   Ternood T.R.     21000 
   1104 CC   Lageln M.        23000 
   1105 CC   Amperwat A.      19000 
   1106 CC   Boon T.J.        25000 
   1107 CC   Beloop L.O.      31000 
   1009 CD   Janszoon A.      38665 
   1026 CD   Janszen H.P.     41000 
   1011 CC   de Goeij J.      39000 
   106  KA   Pragtweik J.M.V. 42300 
   111  KA   Bakeuro S.       31000 
   105  KA   Clubdrager C.    39800 
   104  KA   Karendijk F.     23000 
   107  KA   Centjes R.M.     34000 
   119  KA   Tegenstroom H.L. 39000 
   1111 CD   Telmans R.M.     55500 
   1093 AB   de Slegte S.     46987 
   1199 CC   Uitlaat G.A.S.   44500 
   )

Perl

<lang perl>sub zip

  {my @a = @{shift()};
   my @b = @{shift()};
   my @l;
   push @l, shift @a, shift @b while @a and @b;
   return @l;}

sub uniq

  {my %h;
   return grep {not $h{$_}++} @_;}

my @data =

   map {{ zip [qw(name id salary dept)], [split ','] }}
   split "\n",
   <<'EOF';

Tyler Bennett,E10297,32000,D101 John Rappl,E21437,47000,D050 George Woltman,E00127,53500,D101 Adam Smith,E63535,18000,D202 Claire Buckman,E39876,27800,D202 David McClellan,E04242,41500,D101 Rich Holcomb,E01234,49500,D202 Nathan Adams,E41298,21900,D050 Richard Potter,E43128,15900,D101 David Motsinger,E27002,19250,D202 Tim Sampair,E03033,27000,D101 Kim Arlich,E10001,57000,D190 Timothy Grove,E16398,29900,D190 EOF

@ARGV or die "Please provide a value for N.\n"; my $N = shift;

foreach my $d (sort {$a cmp $b} uniq map {$_->{dept}} @data)

  {print "$d\n";
   my @es =
       sort {$b->{salary} <=> $a->{salary}}
       grep {$_->{dept} eq $d}
       @data;
   foreach (1 .. $N)
      {@es or last;
       my $e = shift @es;
       printf "%-15s | %-6s | %5d\n", @{$e}{qw(name id salary)};}
   print "\n";}</lang>

Python

<lang python>from collections import defaultdict

data = [('Employee Name', 'Employee ID', 'Salary', 'Department'),

       ('Tyler Bennett', 'E10297', 32000, 'D101'),
       ('John Rappl', 'E21437', 47000, 'D050'),
       ('George Woltman', 'E00127', 53500, 'D101'),
       ('Adam Smith', 'E63535', 18000, 'D202'),
       ('Claire Buckman', 'E39876', 27800, 'D202'),
       ('David McClellan', 'E04242', 41500, 'D101'),
       ('Rich Holcomb', 'E01234', 49500, 'D202'),
       ('Nathan Adams', 'E41298', 21900, 'D050'),
       ('Richard Potter', 'E43128', 15900, 'D101'),
       ('David Motsinger', 'E27002', 19250, 'D202'),
       ('Tim Sampair', 'E03033', 27000, 'D101'),
       ('Kim Arlich', 'E10001', 57000, 'D190'),
       ('Timothy Grove', 'E16398', 29900, 'D190')]

departments = defaultdict(list) for rec in data[1:]:

   departments[rec[-1]].append(rec)

N = 3 format = "%-15s " * len(data[0]) for department, recs in departments.iteritems():

   print "Department", department
   print " ", format % data[0]
   for rec in sorted(recs, key=lambda rec: -rec[-2])[:N]:
       print " ", format % rec
   print</lang>

Output:

Department D101
  Employee Name   Employee ID     Salary          Department      
  George Woltman  E00127          53500           D101            
  David McClellan E04242          41500           D101            
  Tyler Bennett   E10297          32000           D101            

Department D202
  Employee Name   Employee ID     Salary          Department      
  Rich Holcomb    E01234          49500           D202            
  Claire Buckman  E39876          27800           D202            
  David Motsinger E27002          19250           D202            

Department D190
  Employee Name   Employee ID     Salary          Department      
  Kim Arlich      E10001          57000           D190            
  Timothy Grove   E16398          29900           D190            

Department D050
  Employee Name   Employee ID     Salary          Department      
  John Rappl      E21437          47000           D050            
  Nathan Adams    E41298          21900           D050

Alternative Solution

Uses namedtuples for database records, and groupby builtin to group records by Department: <lang python>from collections import namedtuple from itertools import groupby

N = 2

db = Employee Name,Employee ID,Salary,Department Tyler Bennett,E10297,32000,D101 John Rappl,E21437,47000,D050 George Woltman,E00127,53500,D101 Adam Smith,E63535,18000,D202 Claire Buckman,E39876,27800,D202 David McClellan,E04242,41500,D101 Rich Holcomb,E01234,49500,D202 Nathan Adams,E41298,21900,D050 Richard Potter,E43128,15900,D101 David Motsinger,E27002,19250,D202 Tim Sampair,E03033,27000,D101 Kim Arlich,E10001,57000,D190 Timothy Grove,E16398,29900,D190

rows = db.split('\n') DBRecord = namedtuple('DBRecord', rows[0].replace(' ', '_')) records = [ DBRecord(*row.split(',')) for row in rows[1:] ] records.sort(key = lambda record: (record.Department, -float(record.Salary))) print '\n\n'.join('\n '.join([dpt] + [str(g) for g in grp][:N])

                 for dpt, grp in groupby(records,
                                         lambda record: record.Department))</lang>

Sample output

D050
  DBRecord(Employee_Name='John Rappl', Employee_ID='E21437', Salary='47000', Department='D050')
  DBRecord(Employee_Name='Nathan Adams', Employee_ID='E41298', Salary='21900', Department='D050')

D101
  DBRecord(Employee_Name='George Woltman', Employee_ID='E00127', Salary='53500', Department='D101')
  DBRecord(Employee_Name='David McClellan', Employee_ID='E04242', Salary='41500', Department='D101')

D190
  DBRecord(Employee_Name='Kim Arlich', Employee_ID='E10001', Salary='57000', Department='D190')
  DBRecord(Employee_Name='Timothy Grove', Employee_ID='E16398', Salary='29900', Department='D190')

D202
  DBRecord(Employee_Name='Rich Holcomb', Employee_ID='E01234', Salary='49500', Department='D202')
  DBRecord(Employee_Name='Claire Buckman', Employee_ID='E39876', Salary='27800', Department='D202')

R

First, read in the data. <lang R> dfr <- read.csv(tc <- textConnection( "Employee Name,Employee ID,Salary,Department Tyler Bennett,E10297,32000,D101 John Rappl,E21437,47000,D050 George Woltman,E00127,53500,D101 Adam Smith,E63535,18000,D202 Claire Buckman,E39876,27800,D202 David McClellan,E04242,41500,D101 Rich Holcomb,E01234,49500,D202 Nathan Adams,E41298,21900,D050 Richard Potter,E43128,15900,D101 David Motsinger,E27002,19250,D202 Tim Sampair,E03033,27000,D101 Kim Arlich,E10001,57000,D190 Timothy Grove,E16398,29900,D190")); close(tc) </lang> To just return the top salary, it's very simple using tapply. <lang R> with(dfr, tapply(Salary, Department, max)) </lang> To return N salaries, we replace max with our own function. <lang R> get.top.N.salaries <- function(N) {

  with(dfr, tapply(Salary, Department, 
     function(x) 
     {
        sort(x); 
        lx <- length(x)
        if(N >= lx) return(x)
        x[-1:(N-lx)]
     }))

}

get.top.N.salaries(3) </lang>

$D050
[1] 47000 21900

$D101
[1] 41500 15900 27000

$D190
[1] 57000 29900

$D202
[1] 27800 49500 19250

To return the whole record for each of the top salaries, a different tack is required. <lang R>

get.top.N.salaries2 <- function(N) {

  #Sort data frame by Department, then by Salary
  sorted <- dfr[with(dfr, order(Department, Salary, decreasing=TRUE)),]
  #Split the dataframe up, by Department
  bydept <- split(sorted, sorted$Department)
  #Return the first N values (or all of them
  lapply(bydept, 
     function(x) 
     {
        n <- min(N, nrow(x))
        x[1:n,]
     })

} get.top.N.salaries2(3) </lang>

 $D050
  Employee.Name Employee.ID Salary Department
2    John Rappl      E21437  47000       D050
8  Nathan Adams      E41298  21900       D050

$D101
    Employee.Name Employee.ID Salary Department
3  George Woltman      E00127  53500       D101
6 David McClellan      E04242  41500       D101
1   Tyler Bennett      E10297  32000       D101

$D190
   Employee.Name Employee.ID Salary Department
12    Kim Arlich      E10001  57000       D190
13 Timothy Grove      E16398  29900       D190

$D202
     Employee.Name Employee.ID Salary Department
7     Rich Holcomb      E01234  49500       D202
5   Claire Buckman      E39876  27800       D202
10 David Motsinger      E27002  19250       D202

Ruby

Without much thought to report formatting: <lang ruby>class Employee

 def initialize(name, employee_id, salary, department)
   @name = name
   @employee_id = employee_id
   @salary = salary
   @department = department
 end
 
 # define the compare method and you get sorting for free
 def <=>(anEmployee)
   # sort by salary descending
   anEmployee.salary <=> @salary
 end
 
 attr_reader :salary, :department
 protected :salary

end

def get_employees

 [
   Employee.new("Tyler Bennett", "E10297", 32000, "D101"), 
   Employee.new("John Rappl", "E21437", 47000, "D050"), 
   Employee.new("George Woltman", "E00127", 53500, "D101"), 
   Employee.new("Adam Smith", "E63535", 18000, "D202"), 
   Employee.new("Claire Buckman", "E39876", 27800, "D202"), 
   Employee.new("David McClellan", "E04242", 41500, "D101"), 
   Employee.new("Rich Holcomb", "E01234", 49500, "D202"), 
   Employee.new("Nathan Adams", "E41298", 21900, "D050"), 
   Employee.new("Richard Potter", "E43128", 15900, "D101"), 
   Employee.new("David Motsinger", "E27002", 19250, "D202"), 
   Employee.new("Tim Sampair", "E03033", 27000, "D101"), 
   Employee.new("Kim Arlich", "E10001", 57000, "D190"), 
   Employee.new("Timothy Grove", "E16398", 29900, "D190"), 
 ]

end

def group_by_department(employees)

 group = {}
 employees.each do |emp|
   dept = emp.department
   group[dept] = [] if group[dept].nil?
   group[dept] << emp
 end
 group

end

def show_top_salaries_per_group(groups, n)

 groups.each do |dept, emps|
   puts dept
   emps.sort[0,n].each {|e| p e}
   puts ""
 end

end

groups = group_by_department(get_employees)

show_top_salaries_per_group(groups,3)</lang>

D101
#<Employee:0x1002fe30 @employee_id="E00127", @name="George Woltman", @department="D101", @salary=53500>
#<Employee:0x1002fd40 @employee_id="E04242", @name="David McClellan", @department="D101", @salary=41500>
#<Employee:0x1002fed0 @employee_id="E10297", @name="Tyler Bennett", @department="D101", @salary=32000>

D190
#<Employee:0x1002fb60 @employee_id="E10001", @name="Kim Arlich", @department="D190", @salary=57000>
#<Employee:0x1002fb10 @employee_id="E16398", @name="Timothy Grove", @department="D190", @salary=29900>

D202
#<Employee:0x1002fcf0 @employee_id="E01234", @name="Rich Holcomb", @department="D202", @salary=49500>
#<Employee:0x1002fd90 @employee_id="E39876", @name="Claire Buckman", @department="D202", @salary=27800>
#<Employee:0x1002fc00 @employee_id="E27002", @name="David Motsinger", @department="D202", @salary=19250>

D050
#<Employee:0x1002fe80 @employee_id="E21437", @name="John Rappl", @department="D050", @salary=47000>
#<Employee:0x1002fca0 @employee_id="E41298", @name="Nathan Adams", @department="D050", @salary=21900>

SMEQL

The following SMEQL example returns the top 6 earners in each department based on this table schema:

 table: Employees
 ----------------
 empID
 dept
 empName
 salary

Source Code:

 srt = orderBy(Employees, (dept, salary), order)
 top = group(srt, [(dept) dept2, max(order) order])
 join(srt, top, a.dept=b.dept2 and b.order - a.order < 6)

Tcl

Works with: Tcl version 8.5

<lang tcl>package require Tcl 8.5

set text {Tyler Bennett,E10297,32000,D101 John Rappl,E21437,47000,D050 George Woltman,E00127,53500,D101 Adam Smith,E63535,18000,D202 Claire Buckman,E39876,27800,D202 David McClellan,E04242,41500,D101 Rich Holcomb,E01234,49500,D202 Nathan Adams,E41298,21900,D050 Richard Potter,E43128,15900,D101 David Motsinger,E27002,19250,D202 Tim Sampair,E03033,27000,D101 Kim Arlich,E10001,57000,D190 Timothy Grove,E16398,29900,D190}

set data [dict create] foreach line [split $text \n] {

   lassign [split $line ,] name id salary dept
   dict lappend data $dept [list $name $id $salary]

}

proc top_n_salaries {n data} {

   incr n -1
   dict for {dept employees} $data {
       puts "Department $dept"
       foreach emp [lrange [lsort -integer -decreasing -index 2 $employees] 0 $n] {
           puts [format "   %-20s %-8s %8d" {*}$emp]
       }
       puts ""
   }

}

top_n_salaries 3 $data</lang> outputs

Department D101
   George Woltman       E00127      53500
   David McClellan      E04242      41500
   Tyler Bennett        E10297      32000

Department D050
   John Rappl           E21437      47000
   Nathan Adams         E41298      21900

Department D202
   Rich Holcomb         E01234      49500
   Claire Buckman       E39876      27800
   David Motsinger      E27002      19250

Department D190
   Kim Arlich           E10001      57000
   Timothy Grove        E16398      29900

Ursala

The algorithm used by the top function is to lex the data into fields, partition by the last field, sort each partition descending by the second to last, take the first n strings in each partition, and display the list of them in a reasonably understandable form. <lang Ursala>

import std
import nat

data =

-[ Employee Name,Employee ID,Salary,Department Tyler Bennett,E10297,32000,D101 John Rappl,E21437,47000,D050 George Woltman,E00127,53500,D101 Adam Smith,E63535,18000,D202 Claire Buckman,E39876,27800,D202 David McClellan,E04242,41500,D101 Rich Holcomb,E01234,49500,D202 Nathan Adams,E41298,21900,D050 Richard Potter,E43128,15900,D101 David Motsinger,E27002,19250,D202 Tim Sampair,E03033,27000,D101 Kim Arlich,E10001,57000,D190 Timothy Grove,E16398,29900,D190]-

top "n" = @tt sep`,*; mat0+ ^C(~&hz,mat`,*yS)*+ take/*"n"+ *zK2 (nleq+ %np~~)-<x&yzNC

show+

main = top3 data</lang> output:

D190
Kim Arlich,E10001,57000
Timothy Grove,E16398,29900

D101
George Woltman,E00127,53500
David McClellan,E04242,41500
Tyler Bennett,E10297,32000

D202
Rich Holcomb,E01234,49500
Claire Buckman,E39876,27800
David Motsinger,E27002,19250

D050
John Rappl,E21437,47000
Nathan Adams,E41298,21900