aboutsummaryrefslogtreecommitdiff
path: root/wiki.tcl
diff options
context:
space:
mode:
authorGravatar fireonlive2024-08-09 19:04:10 +0000
committerGravatar steering72532024-10-23 00:39:49 -0600
commit985dcb2901d2527b79976df787035425294200b9 (patch)
treed49caf539627b062c3e8406854ffbc214804661c /wiki.tcl
init
Diffstat (limited to 'wiki.tcl')
-rw-r--r--wiki.tcl111
1 files changed, 111 insertions, 0 deletions
diff --git a/wiki.tcl b/wiki.tcl
new file mode 100644
index 0000000..4e9c203
--- /dev/null
+++ b/wiki.tcl
@@ -0,0 +1,111 @@
+# Requires Tcl 8.5+ and tcllib
+# To enable you must .chanset #channel +wiki
+
+package require http
+package require htmlparse
+package require tls
+http::register https 443 [list tls::socket -tls1 1]
+
+namespace eval wiki {
+ variable max_lines 1
+ variable max_chars 400
+ variable url "https://en.wikipedia.org/wiki/"
+
+ bind pub -|- ",wiki" wiki::search
+
+ #variable parse_regexp {(<table class.*?<p>.*?</p>.*?</table>)??.*?<p>(.*?)</p>\n<table id="toc"}
+ #variable parse_regexp {(?:</table>)?.*?<p>(.*)((</ul>)|(</p>)).*?((<table id="toc")|(<h2>)|(<table id="disambigbox"))}
+ variable parse_regexp {<\/table>.*?<p>(.*?)<\/p>}
+ setudef flag wiki
+}
+
+proc wiki::fetch {term {url {}}} {
+ if {$url != ""} {
+ set token [http::geturl $url -timeout 10000]
+ } else {
+ set query [regsub -all -- {\s} $term "_"]
+ set token [http::geturl ${wiki::url}${query} -timeout 10000]
+ }
+ set data [http::data $token]
+ set ncode [http::ncode $token]
+ set meta [http::meta $token]
+ upvar #0 $token state
+ set fetched_url $state(url)
+ http::cleanup $token
+
+ # debug
+ putlog "Fetch! term: $term url: $url fetched: $fetched_url"
+ set fid [open "w-debug.txt" w]
+ puts $fid $data
+ close $fid
+
+ # Follow redirects
+ if {[regexp -- {^3\d{2}$} $ncode]} {
+ return [wiki::fetch $term [dict get $meta Location]]
+ }
+ if {$ncode != 200} {
+ error "HTTP query failed ($ncode): $data: $meta"
+ }
+
+ # If page returns list of results, choose the first one and fetch that
+ #if {[regexp -- {<p>.*?((may refer to:)|(in one of the following senses:))</p>} $data]} {
+ # regexp -- {<ul>.*?<li>.*? title="(.*?)">.*?</li>} $data -> new_query
+ # return [wiki::fetch $new_query]
+ #}
+
+ if {![regexp -- $wiki::parse_regexp $data -> out]} {
+ error "Parse error"
+ }
+ return [list url $fetched_url result [wiki::sanitise $out]]
+}
+
+proc wiki::sanitise {raw} {
+ set raw [::htmlparse::mapEscapes $raw]
+ # Remove some help links
+ set raw [regsub -- {<small class="metadata">.*?</small>} $raw ""]
+ set raw [regsub -all -- {<(.*?)>} $raw ""]
+ set raw [regsub -all -- {\[.*?\]} $raw ""]
+ set raw [regsub -all -- {\n} $raw " "]
+ return $raw
+}
+
+proc wiki::search {nick uhost hand chan argv} {
+ if {![channel get $chan wiki]} { return }
+ if {[string length $argv] == 0} {
+ puthelp "PRIVMSG $chan :Please provide a term."
+ return
+ }
+ set argv [string trim $argv]
+ # Upper case first character
+ set argv [string toupper [string index $argv 0]][string range $argv 1 end]
+ if {[catch {wiki::fetch $argv} data]} {
+ puthelp "PRIVMSG $chan :Error: $data"
+ return
+ }
+ foreach line [wiki::split_line $wiki::max_chars [dict get $data result]] {
+ if {[incr count] > $wiki::max_lines} {
+ puthelp "PRIVMSG $chan :Output truncated. [dict get $data url]"
+ break
+ }
+ putserv [encoding convertfrom utf-8 "PRIVMSG $chan :$line"]
+ }
+}
+
+# by fedex
+proc wiki::split_line {max str} {
+ set last [expr {[string length $str] -1}]
+ set start 0
+ set end [expr {$max -1}]
+ set lines []
+ while {$start <= $last} {
+ if {$last >= $end} {
+ set end [string last { } $str $end]
+ }
+ lappend lines [string trim [string range $str $start $end]]
+ set start $end
+ set end [expr {$start + $max}]
+ }
+ return $lines
+}
+
+putlog "wiki.tcl loaded"