This repository has been archived by the owner on Jan 30, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathWebPages.java
84 lines (71 loc) · 2.31 KB
/
WebPages.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import java.util.ArrayList;
public class WebPages {
//Holds list of Term objects associated with each parsed word in web page
private BST termIndex;
//Number of webpage files
private static int docCount;
//Constructor
public WebPages(){
docCount = 0;
termIndex = new BST();
}
//Passes filename to HTMLParser to get parsed array WITH duplicates
//Calls adds terms to termIndex
public void addPage(String document){
//add to termIndex the parsed words from *document*
HTMLParser pageParser = new HTMLParser(document);
docCount++;
//For each word in our parsed array...
for(String word: pageParser.getParsedArray()){
if (termIndex.size() == 0) {
addNewTerm(word, document);
}
else{
boolean add = false;
boolean cont = true;
for(Term term: termIndex){
if(cont) {
add = true;
if (word.equals(term.getName())) {
term.addNewOccurrence(document);
cont = false;
add = false;
}
}
}
if(add) {
addNewTerm(word, document);
}
}
}
}
private void addNewTerm(String name, String document){
termIndex.add(document, name);
}
//Iterates through the array of termIndex and prints each word
public void printTerms() {
System.out.println("WORDS");
for (Term word : termIndex) {
System.out.println(word.getName());
}
}
//Prints which pages *word* exist on
public String[] whichPages(String word) {
word = word.toLowerCase();
ArrayList<String> pages = new ArrayList<String>();
for(Term term: termIndex){
if (term.getName().equals(word)){
for(Occurrence occ: term.getDocsList()){
pages.add(occ.getDocName());
}
}
}
return pages.toArray(new String[pages.size()]);
}
public static int getDocCount() {
return docCount;
}
public BST getTermIndex() {
return termIndex;
}
}