-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathdemo_frequency.erl
45 lines (41 loc) · 2.22 KB
/
demo_frequency.erl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
% Copyright (c) 2011-2012, Tom Van Cutsem, Vrije Universiteit Brussel
% All rights reserved.
%
% Redistribution and use in source and binary forms, with or without
% modification, are permitted provided that the following conditions are met:
% * Redistributions of source code must retain the above copyright
% notice, this list of conditions and the following disclaimer.
% * Redistributions in binary form must reproduce the above copyright
% notice, this list of conditions and the following disclaimer in the
% documentation and/or other materials provided with the distribution.
% * Neither the name of the Vrije Universiteit Brussel nor the
% names of its contributors may be used to endorse or promote products
% derived from this software without specific prior written permission.
%
%THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
%ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
%WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
%DISCLAIMED. IN NO EVENT SHALL VRIJE UNIVERSITEIT BRUSSEL BE LIABLE FOR ANY
%DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
%(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES
%LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
%ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
%(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
%SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
% Word frequency count example using the simple parallel MapReduce
% implementation:
-module(demo_frequency).
-export([freq/1]).
-import(mapreduce, [mapreduce/3]).
%% Word frequency count
freq(DirName) ->
NumberedFiles = demo_inverted_index:list_numbered_files(DirName),
mapreduce(NumberedFiles, fun enum_words/3, fun add_words/3).
% this function is used as a Map function
enum_words(_Index, FileName, Emit) ->
{ok, [Words]} = file:consult(FileName),
lists:foreach(fun (Word) -> Emit(Word, 1) end, Words).
% this function is used as a Reduce function
add_words(Word, Counts, Emit) ->
Total = lists:foldl(fun (Count, Partial) -> Count + Partial end, 0, Counts),
Emit(Word, Total).