Jump to content

Averages/Mode

From Rosetta Code
Task
Averages/Mode
You are encouraged to solve this task according to the task description, using any language you may know.

Write a program to find the mode value of a collection. The case where the collection is empty may be ignored. Care must be taken to handle the case where the mode is non-unique.

If it is not appropriate or possible to support a general collection, use a vector (array), if possible. If it is not appropriate or possible to support an unspecified value type, use integers.

See also: Mean, Median

AutoHotkey

Search autohotkey.com: [1]

Source: AutoHotkey forum by Laszlo <lang autohotkey> MsgBox % Mode("1 2 3") MsgBox % Mode("1 2 0 3 0.0") MsgBox % Mode("0.1 2.2 -0.1 0.22e1 2.20 0.1")

Mode(a, d=" ") { ; the number that occurs most frequently in a list delimited by d (space)

  Sort a, ND%d% 
  Loop Parse, a, %d% 
     If (V != A_LoopField) { 
        If (Ct > MxCt) 
           MxV := V, MxCt := Ct 
        V := A_LoopField, Ct := 1 
     } 
     Else Ct++ 
  Return Ct>MxCt ? V : MxV 

} </lang>

C

Here we use an array of double (easily we could change it to handle integers or any other basic type). <lang c>#include <stdio.h>

  1. include <stdlib.h>
  2. include <stdbool.h>
  1. define INC_VAL 50

// to hold the results typedef struct stack {

 double *data;
 size_t n;
 size_t cap;

} stack_t;

// to hold values and their frequencies typedef struct fav_el {

 double v;
 size_t f;

} fav_el_t;

typedef struct freqs_and_vals_table {

 fav_el_t *vf;
 size_t n;
 size_t cap;

} fav_t;</lang>

<lang c>fav_t *alloc_table() {

 fav_t *r = malloc(sizeof(fav_t));
 r->n = 0;
 r->cap = 1;
 r->vf = malloc(sizeof(fav_el_t)*INC_VAL);
 return r;

}

void free_table(fav_t *t) {

 free(t->vf);
 free(t);

}

// compare functions for bsearch and qsort

  1. define CMP_HOOK(F) ((int (*)(const void *, const void *))F)

int t_cmp(double *k, double *v) {

 if ( *k < *v )
   return -1;
 else if ( *k > *v )
   return 1;
 else
   return 0;

}

int s_cmp(fav_el_t *a, fav_el_t *b) {

 return t_cmp(&a->v, &b->v);

}

int f_cmp(fav_el_t *a, fav_el_t *b) {

 if ( a->f < b->f )
   return 1;
 else if ( a->f > b->f )
   return -1;
 else
   return 0;

}

size_t table_search(fav_t *t, double v) {

 fav_el_t *el = bsearch(&v, t->vf, t->n, sizeof(fav_el_t), CMP_HOOK(s_cmp));
 return (el==NULL) ? -1 : (el - t->vf);

}

void add_value(fav_t *t, double v) {

 int i;
 if ( (i=table_search(t, v)) < 0 ) {
   if ( (t->n + 1) > (t->cap * INC_VAL) ) {
     t->cap++;
     t->vf = realloc(t->vf, t->cap * INC_VAL * sizeof(fav_el_t));
   }
   t->vf[t->n].v = v;
   t->vf[t->n].f = 1;
   t->n++;
 } else {
   t->vf[i].f++;
 }

}

void collect_from_array(fav_t *t, double *v, size_t n) {

 size_t i;
 qsort(v, n, sizeof(double), CMP_HOOK(t_cmp));
 for(i=0; i < n; i++) {
   add_value(t, v[i]);
 }

}

stack_t *init_stack() {

 stack_t *r = malloc(sizeof(stack_t));
 r->n = 0;
 r->cap = 1;
 r->data = malloc(sizeof(double)*INC_VAL);
 return r;

}

void free_stack(stack_t *s) {

 free(s->data);
 free(s);

}

void push_value(stack_t *s, double v) {

 if ( (s->n + 1) > (s->cap * INC_VAL) ) {
   s->cap++;
   s->data = realloc(s->data, s->cap * INC_VAL * sizeof(double));
 }
 s->data[s->n] = v;
 s->n++;

}</lang>

All the codes above are utilities to keep this main function simple. First, we build a table of value-frequency, for each different value, using the utility function collect_from_array. Then we sort the value-frequency pair according to the frequency (descending, cfr. f_cmp), and push on a "stack" all the values with the biggest same frequency.

<lang c>stack_t *median(double *v, size_t n) {

 size_t i, f;
 stack_t *r;
 fav_t *t1 = alloc_table();
 collect_from_array(t1, v, n);
 // now let's sort according to freqs
 qsort(t1->vf, t1->n, sizeof(fav_el_t), CMP_HOOK(f_cmp));
 r = init_stack();
 f = t1->vf[0].f;
 push_value(r, t1->vf[0].v);
 for(i=1; (i < t1->n) && (t1->vf[i].f == f); i++)
   push_value(r, t1->vf[i].v);
 free_table(t1);
 return r;

}</lang>

<lang c>double v1[] = { 1, 3, 6, 6, 6, 6, 7, 7, 12, 12, 17 }; double v2[] = { 1, 1, 2, 4, 4 };

  1. define PRINT_MEDIAN(V) do { size_t i; \
   stack_t *r = median(V, sizeof(V)/sizeof(double));	\
   for(i=0; i < r->n; i++) printf("%lf ", r->data[i]); \
   free_stack(r); printf("\n");			\
 } while(0);
 

int main() {

 PRINT_MEDIAN(v1)
 PRINT_MEDIAN(v2);
 return EXIT_SUCCESS;

}</lang>

C++

Works with: g++ version 4.3.2

<lang cpp>

  1. include <iterator>
  2. include <utility>
  3. include <algorithm>
  4. include <list>
  5. include <iostream>

// helper struct template<typename T> struct referring {

 referring(T const& t): value(t) {}
 template<typename Iter>
  bool operator()(std::pair<Iter, int> const& p)
 {
   return *p.first == value;
 }
 T const& value;

};

// requires: // FwdIterator is a ForwardIterator // The value_type of FwdIterator is EqualityComparable // OutIterator is an output iterator // the value_type of FwdIterator is convertible to the value_type of OutIterator // [first, last) is a valid range // provides: // the mode is written to result template<typename FwdIterator, typename OutIterator>

void mode(FwdIterator first, FwdIterator last, OutIterator result)

{

 typedef typename std::iterator_traits<FwdIterator>::value_type value_type;
 typedef std::list<std::pair<FwdIterator, int> > count_type;
 typedef typename count_type::iterator count_iterator;
 // count elements
 count_type counts;
 while (first != last)
 {
   count_iterator element = std::find_if(counts.begin(), counts.end(),
                                         referring<value_type>(*first));
   if (element == counts.end())
     counts.push_back(std::make_pair(first, 1));
   else
     ++element->second;
   ++first;
 }
 // find maximum
 int max = 0;
 for (count_iterator i = counts.begin(); i != counts.end(); ++i)
   if (i->second > max)
     max = i->second;
 // copy corresponding elements to output sequence
 for (count_iterator i = counts.begin(); i != counts.end(); ++i)
   if (i->second == max)
     *result++ = *i->first;

}

// example usage int main() {

 int values[] = { 1, 2, 3, 1, 2, 4, 2, 5, 2, 3, 3, 1, 3, 6 };
 median(values, values + sizeof(values)/sizeof(int),
        std::ostream_iterator<int>(std::cout, " "));
 std::cout << std::endl;

} </lang> Output:

2 3

E

<lang e>pragma.enable("accumulator") def mode(values) {

   def counts := [].asMap().diverge()
   var maxCount := 0
   for v in values {
       maxCount max= (counts[v] := counts.fetch(v, fn{0}) + 1)
   }
   return accum [].asSet() for v => ==maxCount in counts { _.with(v) }

}</lang>

<lang e>? mode([1,1,2,2,3,3,4,4,4,5,5,6,6,7,8,8,9,9,0,0,0])

  1. value: [4, 0].asSet()</lang>

In the line "maxCount max= (counts[v] := counts.fetch(v, fn{0}) + 1)", max= is an update-assignment operation like +=. (The parentheses are unnecessary.) A more verbose version would be:

<lang e> def newCount := counts.fetch(v, fn { 0 }) + 1

 counts[v] := newCount
 maxCount := maxCount.max(newCount)</lang>
 

In for loops, each key and value from the collection are pattern matched against the specified key pattern => value pattern. In "for v => ==maxCount in counts", the == is a pattern-match operator which fails unless the value examined is equal to the specified value; so this selects only the input values (keys in counts) whose counts are equal to the maximum count.

Java

<lang java>import java.util.*;

public class Mode {

   public static <T> List<T> mode(List<? extends T> coll) {
       Map<T, Integer> seen = new HashMap<T, Integer>();
       int max = 0;
       List<T> maxElems = new ArrayList<T>();
       for (T value : coll) {
           if (seen.containsKey(value))
               seen.put(value, seen.get(value) + 1);
           else
               seen.put(value, 1);
           if (seen.get(value) > max) {
               max = seen.get(value);
               maxElems.clear();
               maxElems.add(value);
           } else if (seen.get(value) == max) {
               maxElems.add(value);
           }
       }
       return maxElems;
   }
   public static void main(String[] args) {
       System.out.println(mode(Arrays.asList(1, 3, 6, 6, 6, 6, 7, 7, 12, 12, 17))); // prints [6]
       System.out.println(mode(Arrays.asList(1, 1, 2, 4, 4))); // prints [1, 4]
   }

}</lang>

Mathematica

Built-in function commonest returns a list of the most common element(s), even is there is only one 'commonest' number. Example for multiple 'commonest' numbers and a single 'commonest' number: <lang Mathematica>

Commonest[{b, a, c, 2, a, b, 1, 2, 3}]
Commonest[{1, 3, 2, 3}]

</lang> gives back: <lang Mathematica>

{b,a,2}
{3}

</lang>

Objective-C

<lang objc>#import <Foundation/Foundation.h>

@interface NSArray (Mode) - (NSArray *)mode; @end

@implementation NSArray (Mode) - (NSArray *)mode {

   NSCountedSet *seen = [NSCountedSet setWithArray:self];
   int max = 0;
   NSMutableArray *maxElems = [NSMutableArray array];
   NSEnumerator *enm = [seen objectEnumerator];
   id obj;
   while( (obj = [enm nextObject]) ) {
       int count = [seen countForObject:obj];
       if (count > max) {
           max = count;
           [maxElems removeAllObjects];
           [maxElems addObject:obj];
       } else if (count == max) {
           [maxElems addObject:obj];
       }
   }
   return maxElems;

} @end</lang>

OCaml

<lang ocaml>let mode lst =

 let seen = Hashtbl.create 42 in
   List.iter (fun x ->
                let old = if Hashtbl.mem seen x then
                  Hashtbl.find seen x
                else 0 in
                  Hashtbl.replace seen x (old + 1))
     lst;
   let best = Hashtbl.fold (fun _ -> max) seen 0 in
     Hashtbl.fold (fun k v acc ->
                     if v = best then k :: acc
                     else acc)
       seen []</lang>
# mode [1;3;6;6;6;6;7;7;12;12;17];;
- : int list = [6]
# mode [1;1;2;4;4];;
- : int list = [4; 1]

Octave

Of course Octave has the mode function; but it returns only the "lowest" mode if multiple modes are available.

<lang octave>function m = mode2(v)

 sv = sort(v);
 % build two vectors, vals and c, so that
 % c(i) holds how many times vals(i) appears
 i = 1; c = []; vals = [];
 while (i <= numel(v) )
   tc = sum(sv==sv(i)); % it would be faster to count
                        % them "by hand", since sv is sorted...
   c = [c, tc];
   vals = [vals, sv(i)];
   i += tc;
 endwhile
 % stack vals and c building a 2-rows matrix x
 x = cat(1,vals,c);
 % sort the second row (frequencies) into t (most frequent
 % first) and take the "original indices" i ... 
 [t, i] = sort(x(2,:), "descend");
 % ... so that we can use them to sort columns according
 % to frequencies
 nv = x(1,i);
 % at last, collect into m (the result) all the values
 % having the same bigger frequency
 r = t(1); i = 1;
 m = [];
 while ( t(i) == r )
   m = [m, nv(i)];
   i++;
 endwhile

endfunction</lang>

<lang octave>a = [1, 3, 6, 6, 6, 6, 7, 7, 12, 12, 17]; mode2(a) mode(a)

a = [1, 1, 2, 4, 4]; mode2(a)  % returns 1 and 4 mode(a)  % returns 1 only</lang>

Perl

<lang perl>use strict; use List::Util qw(max);

sub mode {

   my %c;
   foreach my $e ( @_ ) {

$c{$e}++;

   }
   my $best = max(values %c);
   return grep { $c{$_} == $best } keys %c;

}</lang>

<lang perl>print "$_ " foreach mode(1, 3, 6, 6, 6, 6, 7, 7, 12, 12, 17); print "\n"; print "$_ " foreach mode(1, 1, 2, 4, 4); print "\n";</lang>

PHP

Note: this function only works with strings and integers, as those are the only things that can be used as keys of an (associative) array in PHP. <lang php><?php function mode($arr) {

   $count = array();
   foreach ( $arr as $e )

$count[$e]++;

   $best = max($count);
   return array_keys($count, $best);

}

print_r(mode(array(1, 3, 6, 6, 6, 6, 7, 7, 12, 12, 17))); print_r(mode(array(1, 1, 2, 4, 4))); ?></lang>

Python

<lang python>>>> from collections import defaultdict >>> def modes(values): count = defaultdict(int) for v in values: count[v] +=1 best = max(count.itervalues()) return [k for k,v in count.iteritems() if v == best]

>>> modes([1,3,6,6,6,6,7,7,12,12,17]) [6] >>> modes((1,1,2,4,4)) [1, 4]</lang>

R

<lang R>statmode <- function(v) {

 a <- sort(table(v), decreasing=TRUE)
 r <- c()
 for(i in 1:length(a)) {
   if ( a1 == ai ) {
     r <- c(r, as.integer(names(a)[i]))
   } else break; # since it's sorted, once we find
                 # a different value, we can stop
 }
 r

}

print(statmode(c(1, 3, 6, 6, 6, 6, 7, 7, 12, 12, 17))) print(statmode(c(1, 1, 2, 4, 4)))</lang>

Ruby

Here's two methods, the first more Ruby-ish, the second perhaps a bit more efficient. <lang ruby>def mode(ary)

 seen = Hash.new(0)
 ary.each {|value| seen[value] += 1}
 max = seen.values.max
 seen.find_all {|key,value| value == max}.map {|key,value| key}

end

def mode_one_pass(ary)

 seen = Hash.new(0)
 max = 0
 max_elems = []
 ary.each do |value|
   seen[value] += 1
   if seen[value] > max
     max = seen[value]
     max_elems = [value]
   elsif seen[value] == max
     max_elems << value
   end
 end
 max_elems

end

p mode([1, 3, 6, 6, 6, 6, 7, 7, 12, 12, 17]) # => [6] p mode([1, 1, 2, 4, 4]) # => [1, 4] p mode_one_pass([1, 3, 6, 6, 6, 6, 7, 7, 12, 12, 17]) # => [6] p mode_one_pass([1, 1, 2, 4, 4]) # => [1, 4]</lang>

Smalltalk

Works with: GNU Smalltalk

This code is able to find the mode of any collection of any kind of object. <lang smalltalk>OrderedCollection extend [

 mode [ |s|
    s := self asBag sortedByCount.
    ^ (s select: [ :k | ((s at: 1) key) = (k key) ]) collect: [:k| k value]
 ]

].

  1. ( 1 3 6 6 6 6 7 7 12 12 17 ) asOrderedCollection
   mode displayNl.
  1. ( 1 1 2 4 4) asOrderedCollection
   mode displayNl.</lang>

Tcl

Works with: Tcl version 8.6

<lang tcl># Can find the modal value of any vector of values proc mode {n args} {

   foreach n [list $n {*}$args] {
       dict incr counter $n
   }
   set counts [lsort -stride 2 -index 1 -decreasing $counter]
   set best {}
   foreach {n count} $counts {
       if {[lindex $counts 1] == $count} {
           lappend best $n
       } else break
   }
   return $best

}

  1. Testing

puts [mode 1 3 6 6 6 6 7 7 12 12 17]; # --> 6 puts [mode 1 1 2 4 4]; # --> 1 4</lang> Note that this works for any kind of value.

Ursala

The mode function defined below works on lists of any type and returns a list of the modes. There is no concept of a general collection in Ursala. The algorithm is to partition the list by equality, then partition the classes by their lengths, and then select a representative from each member of the set of classes with the maximum length.

<lang Ursala>#import std

mode = ~&hS+ leql$^&h+ eql|=@K2

  1. cast %nLW

examples = mode~~ (<1,3,6,6,6,7,7,12,12,17>,<1,1,2,4,4>)</lang> The function is tested on a pair of lists, one with a unique mode and one with multiple modes. Here is the output.

(<6>,<4,1>)
Cookies help us deliver our services. By using our services, you agree to our use of cookies.