diff options
| author | 2024-08-09 19:04:10 +0000 | |
|---|---|---|
| committer | 2024-10-23 00:39:49 -0600 | |
| commit | 985dcb2901d2527b79976df787035425294200b9 (patch) | |
| tree | d49caf539627b062c3e8406854ffbc214804661c /wiki.tcl | |
init
Diffstat (limited to 'wiki.tcl')
| -rw-r--r-- | wiki.tcl | 111 |
1 files changed, 111 insertions, 0 deletions
diff --git a/wiki.tcl b/wiki.tcl new file mode 100644 index 0000000..4e9c203 --- /dev/null +++ b/wiki.tcl @@ -0,0 +1,111 @@ +# Requires Tcl 8.5+ and tcllib +# To enable you must .chanset #channel +wiki + +package require http +package require htmlparse +package require tls +http::register https 443 [list tls::socket -tls1 1] + +namespace eval wiki { + variable max_lines 1 + variable max_chars 400 + variable url "https://en.wikipedia.org/wiki/" + + bind pub -|- ",wiki" wiki::search + + #variable parse_regexp {(<table class.*?<p>.*?</p>.*?</table>)??.*?<p>(.*?)</p>\n<table id="toc"} + #variable parse_regexp {(?:</table>)?.*?<p>(.*)((</ul>)|(</p>)).*?((<table id="toc")|(<h2>)|(<table id="disambigbox"))} + variable parse_regexp {<\/table>.*?<p>(.*?)<\/p>} + setudef flag wiki +} + +proc wiki::fetch {term {url {}}} { + if {$url != ""} { + set token [http::geturl $url -timeout 10000] + } else { + set query [regsub -all -- {\s} $term "_"] + set token [http::geturl ${wiki::url}${query} -timeout 10000] + } + set data [http::data $token] + set ncode [http::ncode $token] + set meta [http::meta $token] + upvar #0 $token state + set fetched_url $state(url) + http::cleanup $token + + # debug + putlog "Fetch! term: $term url: $url fetched: $fetched_url" + set fid [open "w-debug.txt" w] + puts $fid $data + close $fid + + # Follow redirects + if {[regexp -- {^3\d{2}$} $ncode]} { + return [wiki::fetch $term [dict get $meta Location]] + } + if {$ncode != 200} { + error "HTTP query failed ($ncode): $data: $meta" + } + + # If page returns list of results, choose the first one and fetch that + #if {[regexp -- {<p>.*?((may refer to:)|(in one of the following senses:))</p>} $data]} { + # regexp -- {<ul>.*?<li>.*? title="(.*?)">.*?</li>} $data -> new_query + # return [wiki::fetch $new_query] + #} + + if {![regexp -- $wiki::parse_regexp $data -> out]} { + error "Parse error" + } + return [list url $fetched_url result [wiki::sanitise $out]] +} + +proc wiki::sanitise {raw} { + set raw [::htmlparse::mapEscapes $raw] + # Remove some help links + set raw [regsub -- {<small class="metadata">.*?</small>} $raw ""] + set raw [regsub -all -- {<(.*?)>} $raw ""] + set raw [regsub -all -- {\[.*?\]} $raw ""] + set raw [regsub -all -- {\n} $raw " "] + return $raw +} + +proc wiki::search {nick uhost hand chan argv} { + if {![channel get $chan wiki]} { return } + if {[string length $argv] == 0} { + puthelp "PRIVMSG $chan :Please provide a term." + return + } + set argv [string trim $argv] + # Upper case first character + set argv [string toupper [string index $argv 0]][string range $argv 1 end] + if {[catch {wiki::fetch $argv} data]} { + puthelp "PRIVMSG $chan :Error: $data" + return + } + foreach line [wiki::split_line $wiki::max_chars [dict get $data result]] { + if {[incr count] > $wiki::max_lines} { + puthelp "PRIVMSG $chan :Output truncated. [dict get $data url]" + break + } + putserv [encoding convertfrom utf-8 "PRIVMSG $chan :$line"] + } +} + +# by fedex +proc wiki::split_line {max str} { + set last [expr {[string length $str] -1}] + set start 0 + set end [expr {$max -1}] + set lines [] + while {$start <= $last} { + if {$last >= $end} { + set end [string last { } $str $end] + } + lappend lines [string trim [string range $str $start $end]] + set start $end + set end [expr {$start + $max}] + } + return $lines +} + +putlog "wiki.tcl loaded" |
