-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathmerge_primo_misspell.pl
executable file
·112 lines (82 loc) · 2.76 KB
/
merge_primo_misspell.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
#!/usr/bin/perl -w
#create function to clean up input line
# Declare the subroutines
sub trim($);
sub trim($)
{
my $string = shift;
$string =~ s/^\s+//;
$string =~ s/\s+$//;
return $string;
}
print "What version of Primo are you using ?";
$primo_version = <>;
$primo_version = trim($primo_version);
#################################################################
#Merge the contents of the original primo file with the new one #
#################################################################
open (ORIG, "$primo_version/misspell_eng.txt.primo_original");
open (OUTPUT, '>',"$primo_version/misspell_eng.txt");
while (<ORIG>) {
chomp;
print OUTPUT "$_\n";
}
close(ORIG);
close(OUTPUT);
#################################################
#Merge the contents of the custom misspell file #
#################################################
open (CUSTOM, "custom_misspell.txt");
open (OUTPUT_CUSTOM, '>>',"$primo_version/misspell_eng.txt");
while (<CUSTOM>) {
chomp;
print OUTPUT_CUSTOM "$_\n";
}
close(CUSTOM);
close(OUTPUT_CUSTOM);
##############################################################
# Clean up wiki.txt spellings and merge with orig primo file #
##############################################################
open (OUTPUT_APPEND, '>>',"$primo_version/misspell_eng.txt");
$LOGFILE = "wikipedia_misspell.txt";
open(LOGFILE) or die("Could not open log file.");
foreach $line (<LOGFILE>) {
$write_to_file = 'yes';
#Clean up the data from the wiki.txt - change -> to spaces and , to or and append to new file
$line = trim($line);
$line =~ s/\-\>/ /;
#nto include anyting after a comma
@line_clean = split /\,/ , $line ;
$line = $line_clean[0];
#check if wiki misspelling in oring primo file - if it is skip
@wiki_misspell = split / / ,$line;
open (ORIG2, '<', "$primo_version/misspell_eng.txt.primo_original") or die "Failed to read file : $! " ;
open (OUTPUT, '>>',"$primo_version/misspell_eng.txt");
while (<ORIG2>) {
chomp;
@primo_misspell = split / / , $_;
if($primo_misspell[0] eq $wiki_misspell[0])
{
$write_to_file = 'no';
}
}
close(ORIG2);
if ($write_to_file eq 'yes')
{
print OUTPUT_APPEND "$line \n";
}
}
close(LOGFILE);
close(OUTPUT_APPEND);
##############################
# Sort the new misspell file #
##############################
open (NAMES_FILE, '<', "$primo_version/misspell_eng.txt") or die "Failed to read file : $! ";
my @not_sorted = <NAMES_FILE>; # read entire file in the array
close (NAMES_FILE);
@sorted = sort { lc($a) cmp lc($b) } @not_sorted ;
open(FILE, '>',"$primo_version/misspell_eng.txt" ) || die "unable to open file write <$!>";
foreach(@sorted) {
print FILE "$_";
}
close(FILE);