-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathPA5.java
79 lines (68 loc) · 1.88 KB
/
PA5.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import java.io.File;
import java.io.FileNotFoundException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Scanner;
import java.util.regex.Pattern;
public class PA5 {
public static void main(String[] args) {
try {
File file = new File(args[0]);
Scanner scan = new Scanner(file);
ArrayList<String> htmlLinks = new ArrayList<String>();
Pattern hyperLinkWithAngles = Pattern.compile("<[^>]+>");
while (scan.hasNext(hyperLinkWithAngles)){
htmlLinks.add(scan.next(hyperLinkWithAngles));
}
//Get output name
String nextArg = scan.nextLine();
String output = nextArg;
Graph graph = new Graph();
//Get hash size
nextArg = scan.nextLine();
Integer hashSize = new Integer(nextArg);
nextArg = scan.nextLine();
WebPages web = new WebPages(hashSize);
//adding files/creating new webPages
while(!(nextArg.equals("*EOFs*"))){
web.addPage(nextArg);
nextArg = scan.nextLine();
}
//skip EOFs
//nextArg = scan.nextLine();
while(!(nextArg.equals("*STOPs*"))){
nextArg = scan.nextLine();
web.pruneStopWords(nextArg);
}
nextArg = scan.nextLine();
//scanning though all whichPages words
while(!(nextArg == null)){
String[] pages = web.bestPages(nextArg.toLowerCase());
String[] query = web.fillQueryArray(nextArg.toLowerCase());
Arrays.sort(query);
if(pages[0] == null){
System.out.println("[" + nextArg + " ]" + " not found");
}
else{
System.out.print("[");
for(int i = 0; i < query.length; i++){
System.out.print(query[i] + " ");
}
System.out.print("] in ");
System.out.print(pages[0] + ": ");
System.out.println(pages[1]);
}
if(scan.hasNext()){
nextArg = scan.nextLine();
}
else{
break;
}
}
Graph.writeDotFile(output);
scan.close();
} catch (FileNotFoundException e) {
e.printStackTrace();
}
}
}