-
Notifications
You must be signed in to change notification settings - Fork 0
/
ML_Mapper.java
40 lines (29 loc) · 944 Bytes
/
ML_Mapper.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
package movielens;
import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import com.opencsv.CSVParser;
public class ML_Mapper extends Mapper<LongWritable, Text, Text, Text> {
static enum Stats {
TOTAL, BAD
}
public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String line = value.toString();
// URI[] files = context.getCacheFiles();
CSVParser parser = new CSVParser();
String[] record = parser.parseLine(line);
if (record.length == 3) {
context.getCounter(Stats.TOTAL).increment(1);
String title = record[1];
String genres = record[2];
String[] list_genre = genres.split("\\|");
for (String genre : list_genre) {
context.write(new Text(genre.trim()), new Text(title.trim()));
}
} else {
context.getCounter(Stats.BAD).increment(1);
}
}
}