-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathwiki.tcl
111 lines (98 loc) · 3.27 KB
/
wiki.tcl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# Requires Tcl 8.5+ and tcllib
# To enable you must .chanset #channel +wiki
package require http
package require htmlparse
package require tls
http::register https 443 [list tls::socket -tls1 1]
namespace eval wiki {
variable max_lines 1
variable max_chars 400
variable url "https://en.wikipedia.org/wiki/"
bind pub -|- ",wiki" wiki::search
#variable parse_regexp {(<table class.*?<p>.*?</p>.*?</table>)??.*?<p>(.*?)</p>\n<table id="toc"}
#variable parse_regexp {(?:</table>)?.*?<p>(.*)((</ul>)|(</p>)).*?((<table id="toc")|(<h2>)|(<table id="disambigbox"))}
variable parse_regexp {<\/table>.*?<p>(.*?)<\/p>}
setudef flag wiki
}
proc wiki::fetch {term {url {}}} {
if {$url != ""} {
set token [http::geturl $url -timeout 10000]
} else {
set query [regsub -all -- {\s} $term "_"]
set token [http::geturl ${wiki::url}${query} -timeout 10000]
}
set data [http::data $token]
set ncode [http::ncode $token]
set meta [http::meta $token]
upvar #0 $token state
set fetched_url $state(url)
http::cleanup $token
# debug
putlog "Fetch! term: $term url: $url fetched: $fetched_url"
set fid [open "w-debug.txt" w]
puts $fid $data
close $fid
# Follow redirects
if {[regexp -- {^3\d{2}$} $ncode]} {
return [wiki::fetch $term [dict get $meta Location]]
}
if {$ncode != 200} {
error "HTTP query failed ($ncode): $data: $meta"
}
# If page returns list of results, choose the first one and fetch that
#if {[regexp -- {<p>.*?((may refer to:)|(in one of the following senses:))</p>} $data]} {
# regexp -- {<ul>.*?<li>.*? title="(.*?)">.*?</li>} $data -> new_query
# return [wiki::fetch $new_query]
#}
if {![regexp -- $wiki::parse_regexp $data -> out]} {
error "Parse error"
}
return [list url $fetched_url result [wiki::sanitise $out]]
}
proc wiki::sanitise {raw} {
set raw [::htmlparse::mapEscapes $raw]
# Remove some help links
set raw [regsub -- {<small class="metadata">.*?</small>} $raw ""]
set raw [regsub -all -- {<(.*?)>} $raw ""]
set raw [regsub -all -- {\[.*?\]} $raw ""]
set raw [regsub -all -- {\n} $raw " "]
return $raw
}
proc wiki::search {nick uhost hand chan argv} {
if {![channel get $chan wiki]} { return }
if {[string length $argv] == 0} {
puthelp "PRIVMSG $chan :Please provide a term."
return
}
set argv [string trim $argv]
# Upper case first character
set argv [string toupper [string index $argv 0]][string range $argv 1 end]
if {[catch {wiki::fetch $argv} data]} {
puthelp "PRIVMSG $chan :Error: $data"
return
}
foreach line [wiki::split_line $wiki::max_chars [dict get $data result]] {
if {[incr count] > $wiki::max_lines} {
puthelp "PRIVMSG $chan :Output truncated. [dict get $data url]"
break
}
putserv [encoding convertfrom utf-8 "PRIVMSG $chan :$line"]
}
}
# by fedex
proc wiki::split_line {max str} {
set last [expr {[string length $str] -1}]
set start 0
set end [expr {$max -1}]
set lines []
while {$start <= $last} {
if {$last >= $end} {
set end [string last { } $str $end]
}
lappend lines [string trim [string range $str $start $end]]
set start $end
set end [expr {$start + $max}]
}
return $lines
}
putlog "wiki.tcl loaded"