· 4 years ago · Mar 27, 2021, 07:24 AM
1# http.tcl --
2#
3# Client-side HTTP for GET, POST, and HEAD commands. These routines can
4# be used in untrusted code that uses the Safesock security policy.
5# These procedures use a callback interface to avoid using vwait, which
6# is not defined in the safe base.
7#
8# See the file "license.terms" for information on usage and redistribution of
9# this file, and for a DISCLAIMER OF ALL WARRANTIES.
10
11package require Tcl 8.6
12# Keep this in sync with pkgIndex.tcl and with the install directories in
13# Makefiles
14package provide http 2.8.9
15
16namespace eval http {
17 # Allow resourcing to not clobber existing data
18
19 variable http
20 if {![info exists http]} {
21 array set http {
22 -accept */*
23 -proxyhost {}
24 -proxyport {}
25 -proxyfilter http::ProxyRequired
26 -urlencoding utf-8
27 }
28 # We need a useragent string of this style or various servers will refuse to
29 # send us compressed content even when we ask for it. This follows the
30 # de-facto layout of user-agent strings in current browsers.
31 set http(-useragent) "Mozilla/5.0\
32 ([string totitle $::tcl_platform(platform)]; U;\
33 $::tcl_platform(os) $::tcl_platform(osVersion))\
34 http/[package provide http] Tcl/[package provide Tcl]"
35 }
36
37 proc init {} {
38 # Set up the map for quoting chars. RFC3986 Section 2.3 say percent
39 # encode all except: "... percent-encoded octets in the ranges of
40 # ALPHA (%41-%5A and %61-%7A), DIGIT (%30-%39), hyphen (%2D), period
41 # (%2E), underscore (%5F), or tilde (%7E) should not be created by URI
42 # producers ..."
43 for {set i 0} {$i <= 256} {incr i} {
44 set c [format %c $i]
45 if {![string match {[-._~a-zA-Z0-9]} $c]} {
46 set map($c) %[format %.2X $i]
47 }
48 }
49 # These are handled specially
50 set map(\n) %0D%0A
51 variable formMap [array get map]
52
53 # Create a map for HTTP/1.1 open sockets
54 variable socketmap
55 if {[info exists socketmap]} {
56 # Close but don't remove open sockets on re-init
57 foreach {url sock} [array get socketmap] {
58 catch {close $sock}
59 }
60 }
61 array set socketmap {}
62 }
63 init
64
65 variable urlTypes
66 if {![info exists urlTypes]} {
67 set urlTypes(http) [list 80 ::socket]
68 }
69
70 variable encodings [string tolower [encoding names]]
71 # This can be changed, but iso8859-1 is the RFC standard.
72 variable defaultCharset
73 if {![info exists defaultCharset]} {
74 set defaultCharset "iso8859-1"
75 }
76
77 # Force RFC 3986 strictness in geturl url verification?
78 variable strict
79 if {![info exists strict]} {
80 set strict 1
81 }
82
83 # Let user control default keepalive for compatibility
84 variable defaultKeepalive
85 if {![info exists defaultKeepalive]} {
86 set defaultKeepalive 0
87 }
88
89 namespace export geturl config reset wait formatQuery register unregister
90 # Useful, but not exported: data size status code
91}
92
93# http::Log --
94#
95# Debugging output -- define this to observe HTTP/1.1 socket usage.
96# Should echo any args received.
97#
98# Arguments:
99# msg Message to output
100#
101if {[info command http::Log] eq {}} {proc http::Log {args} {}}
102
103# http::register --
104#
105# See documentation for details.
106#
107# Arguments:
108# proto URL protocol prefix, e.g. https
109# port Default port for protocol
110# command Command to use to create socket
111# Results:
112# list of port and command that was registered.
113
114proc http::register {proto port command} {
115 variable urlTypes
116 set urlTypes([string tolower $proto]) [list $port $command]
117}
118
119# http::unregister --
120#
121# Unregisters URL protocol handler
122#
123# Arguments:
124# proto URL protocol prefix, e.g. https
125# Results:
126# list of port and command that was unregistered.
127
128proc http::unregister {proto} {
129 variable urlTypes
130 set lower [string tolower $proto]
131 if {![info exists urlTypes($lower)]} {
132 return -code error "unsupported url type \"$proto\""
133 }
134 set old $urlTypes($lower)
135 unset urlTypes($lower)
136 return $old
137}
138
139# http::config --
140#
141# See documentation for details.
142#
143# Arguments:
144# args Options parsed by the procedure.
145# Results:
146# TODO
147
148proc http::config {args} {
149 variable http
150 set options [lsort [array names http -*]]
151 set usage [join $options ", "]
152 if {[llength $args] == 0} {
153 set result {}
154 foreach name $options {
155 lappend result $name $http($name)
156 }
157 return $result
158 }
159 set options [string map {- ""} $options]
160 set pat ^-(?:[join $options |])$
161 if {[llength $args] == 1} {
162 set flag [lindex $args 0]
163 if {![regexp -- $pat $flag]} {
164 return -code error "Unknown option $flag, must be: $usage"
165 }
166 return $http($flag)
167 } else {
168 foreach {flag value} $args {
169 if {![regexp -- $pat $flag]} {
170 return -code error "Unknown option $flag, must be: $usage"
171 }
172 set http($flag) $value
173 }
174 }
175}
176
177# http::Finish --
178#
179# Clean up the socket and eval close time callbacks
180#
181# Arguments:
182# token Connection token.
183# errormsg (optional) If set, forces status to error.
184# skipCB (optional) If set, don't call the -command callback. This
185# is useful when geturl wants to throw an exception instead
186# of calling the callback. That way, the same error isn't
187# reported to two places.
188#
189# Side Effects:
190# Closes the socket
191
192proc http::Finish {token {errormsg ""} {skipCB 0}} {
193 variable $token
194 upvar 0 $token state
195 global errorInfo errorCode
196 if {$errormsg ne ""} {
197 set state(error) [list $errormsg $errorInfo $errorCode]
198 set state(status) "error"
199 }
200 if {
201 ($state(status) eq "timeout") || ($state(status) eq "error") ||
202 ([info exists state(connection)] && ($state(connection) eq "close"))
203 } {
204 CloseSocket $state(sock) $token
205 }
206 if {[info exists state(after)]} {
207 after cancel $state(after)
208 }
209 if {[info exists state(-command)] && !$skipCB
210 && ![info exists state(done-command-cb)]} {
211 set state(done-command-cb) yes
212 if {[catch {eval $state(-command) {$token}} err] && $errormsg eq ""} {
213 set state(error) [list $err $errorInfo $errorCode]
214 set state(status) error
215 }
216 }
217}
218
219# http::CloseSocket -
220#
221# Close a socket and remove it from the persistent sockets table. If
222# possible an http token is included here but when we are called from a
223# fileevent on remote closure we need to find the correct entry - hence
224# the second section.
225
226proc ::http::CloseSocket {s {token {}}} {
227 variable socketmap
228 catch {fileevent $s readable {}}
229 set conn_id {}
230 if {$token ne ""} {
231 variable $token
232 upvar 0 $token state
233 if {[info exists state(socketinfo)]} {
234 set conn_id $state(socketinfo)
235 }
236 } else {
237 set map [array get socketmap]
238 set ndx [lsearch -exact $map $s]
239 if {$ndx != -1} {
240 incr ndx -1
241 set conn_id [lindex $map $ndx]
242 }
243 }
244 if {$conn_id eq {} || ![info exists socketmap($conn_id)]} {
245 Log "Closing socket $s (no connection info)"
246 if {[catch {close $s} err]} {
247 Log "Error: $err"
248 }
249 } else {
250 if {[info exists socketmap($conn_id)]} {
251 Log "Closing connection $conn_id (sock $socketmap($conn_id))"
252 if {[catch {close $socketmap($conn_id)} err]} {
253 Log "Error: $err"
254 }
255 unset socketmap($conn_id)
256 } else {
257 Log "Cannot close connection $conn_id - no socket in socket map"
258 }
259 }
260}
261
262# http::reset --
263#
264# See documentation for details.
265#
266# Arguments:
267# token Connection token.
268# why Status info.
269#
270# Side Effects:
271# See Finish
272
273proc http::reset {token {why reset}} {
274 variable $token
275 upvar 0 $token state
276 set state(status) $why
277 catch {fileevent $state(sock) readable {}}
278 catch {fileevent $state(sock) writable {}}
279 Finish $token
280 if {[info exists state(error)]} {
281 set errorlist $state(error)
282 unset state
283 eval ::error $errorlist
284 }
285}
286
287# http::geturl --
288#
289# Establishes a connection to a remote url via http.
290#
291# Arguments:
292# url The http URL to goget.
293# args Option value pairs. Valid options include:
294# -blocksize, -validate, -headers, -timeout
295# Results:
296# Returns a token for this connection. This token is the name of an
297# array that the caller should unset to garbage collect the state.
298
299proc http::geturl {url args} {
300 variable http
301 variable urlTypes
302 variable defaultCharset
303 variable defaultKeepalive
304 variable strict
305
306 # Initialize the state variable, an array. We'll return the name of this
307 # array as the token for the transaction.
308
309 if {![info exists http(uid)]} {
310 set http(uid) 0
311 }
312 set token [namespace current]::[incr http(uid)]
313 variable $token
314 upvar 0 $token state
315 reset $token
316
317 # Process command options.
318
319 array set state {
320 -binary false
321 -blocksize 8192
322 -queryblocksize 8192
323 -validate 0
324 -headers {}
325 -timeout 0
326 -type application/x-www-form-urlencoded
327 -queryprogress {}
328 -protocol 1.1
329 binary 0
330 state connecting
331 meta {}
332 coding {}
333 currentsize 0
334 totalsize 0
335 querylength 0
336 queryoffset 0
337 type text/html
338 body {}
339 status ""
340 http ""
341 connection close
342 }
343 set state(-keepalive) $defaultKeepalive
344 set state(-strict) $strict
345 # These flags have their types verified [Bug 811170]
346 array set type {
347 -binary boolean
348 -blocksize integer
349 -queryblocksize integer
350 -strict boolean
351 -timeout integer
352 -validate boolean
353 }
354 set state(charset) $defaultCharset
355 set options {
356 -binary -blocksize -channel -command -handler -headers -keepalive
357 -method -myaddr -progress -protocol -query -queryblocksize
358 -querychannel -queryprogress -strict -timeout -type -validate
359 }
360 set usage [join [lsort $options] ", "]
361 set options [string map {- ""} $options]
362 set pat ^-(?:[join $options |])$
363 foreach {flag value} $args {
364 if {[regexp -- $pat $flag]} {
365 # Validate numbers
366 if {
367 [info exists type($flag)] &&
368 ![string is $type($flag) -strict $value]
369 } {
370 unset $token
371 return -code error \
372 "Bad value for $flag ($value), must be $type($flag)"
373 }
374 set state($flag) $value
375 } else {
376 unset $token
377 return -code error "Unknown option $flag, can be: $usage"
378 }
379 }
380
381 # Make sure -query and -querychannel aren't both specified
382
383 set isQueryChannel [info exists state(-querychannel)]
384 set isQuery [info exists state(-query)]
385 if {$isQuery && $isQueryChannel} {
386 unset $token
387 return -code error "Can't combine -query and -querychannel options!"
388 }
389
390 # Validate URL, determine the server host and port, and check proxy case
391 # Recognize user:pass@host URLs also, although we do not do anything with
392 # that info yet.
393
394 # URLs have basically four parts.
395 # First, before the colon, is the protocol scheme (e.g. http)
396 # Second, for HTTP-like protocols, is the authority
397 # The authority is preceded by // and lasts up to (but not including)
398 # the following / or ? and it identifies up to four parts, of which
399 # only one, the host, is required (if an authority is present at all).
400 # All other parts of the authority (user name, password, port number)
401 # are optional.
402 # Third is the resource name, which is split into two parts at a ?
403 # The first part (from the single "/" up to "?") is the path, and the
404 # second part (from that "?" up to "#") is the query. *HOWEVER*, we do
405 # not need to separate them; we send the whole lot to the server.
406 # Both, path and query are allowed to be missing, including their
407 # delimiting character.
408 # Fourth is the fragment identifier, which is everything after the first
409 # "#" in the URL. The fragment identifier MUST NOT be sent to the server
410 # and indeed, we don't bother to validate it (it could be an error to
411 # pass it in here, but it's cheap to strip).
412 #
413 # An example of a URL that has all the parts:
414 #
415 # http://jschmoe:xyzzy@www.bogus.net:8000/foo/bar.tml?q=foo#changes
416 #
417 # The "http" is the protocol, the user is "jschmoe", the password is
418 # "xyzzy", the host is "www.bogus.net", the port is "8000", the path is
419 # "/foo/bar.tml", the query is "q=foo", and the fragment is "changes".
420 #
421 # Note that the RE actually combines the user and password parts, as
422 # recommended in RFC 3986. Indeed, that RFC states that putting passwords
423 # in URLs is a Really Bad Idea, something with which I would agree utterly.
424 #
425 # From a validation perspective, we need to ensure that the parts of the
426 # URL that are going to the server are correctly encoded. This is only
427 # done if $state(-strict) is true (inherited from $::http::strict).
428
429 set URLmatcher {(?x) # this is _expanded_ syntax
430 ^
431 (?: (\w+) : ) ? # <protocol scheme>
432 (?: //
433 (?:
434 (
435 [^@/\#?]+ # <userinfo part of authority>
436 ) @
437 )?
438 ( # <host part of authority>
439 [^/:\#?]+ | # host name or IPv4 address
440 \[ [^/\#?]+ \] # IPv6 address in square brackets
441 )
442 (?: : (\d+) )? # <port part of authority>
443 )?
444 ( [/\?] [^\#]*)? # <path> (including query)
445 (?: \# (.*) )? # <fragment>
446 $
447 }
448
449 # Phase one: parse
450 if {![regexp -- $URLmatcher $url -> proto user host port srvurl]} {
451 unset $token
452 return -code error "Unsupported URL: $url"
453 }
454 # Phase two: validate
455 set host [string trim $host {[]}]; # strip square brackets from IPv6 address
456 if {$host eq ""} {
457 # Caller has to provide a host name; we do not have a "default host"
458 # that would enable us to handle relative URLs.
459 unset $token
460 return -code error "Missing host part: $url"
461 # Note that we don't check the hostname for validity here; if it's
462 # invalid, we'll simply fail to resolve it later on.
463 }
464 if {$port ne "" && $port > 65535} {
465 unset $token
466 return -code error "Invalid port number: $port"
467 }
468 # The user identification and resource identification parts of the URL can
469 # have encoded characters in them; take care!
470 if {$user ne ""} {
471 # Check for validity according to RFC 3986, Appendix A
472 set validityRE {(?xi)
473 ^
474 (?: [-\w.~!$&'()*+,;=:] | %[0-9a-f][0-9a-f] )+
475 $
476 }
477 if {$state(-strict) && ![regexp -- $validityRE $user]} {
478 unset $token
479 # Provide a better error message in this error case
480 if {[regexp {(?i)%(?![0-9a-f][0-9a-f]).?.?} $user bad]} {
481 return -code error \
482 "Illegal encoding character usage \"$bad\" in URL user"
483 }
484 return -code error "Illegal characters in URL user"
485 }
486 }
487 if {$srvurl ne ""} {
488 # RFC 3986 allows empty paths (not even a /), but servers
489 # return 400 if the path in the HTTP request doesn't start
490 # with / , so add it here if needed.
491 if {[string index $srvurl 0] ne "/"} {
492 set srvurl /$srvurl
493 }
494 # Check for validity according to RFC 3986, Appendix A
495 set validityRE {(?xi)
496 ^
497 # Path part (already must start with / character)
498 (?: [-\w.~!$&'()*+,;=:@/] | %[0-9a-f][0-9a-f] )*
499 # Query part (optional, permits ? characters)
500 (?: \? (?: [-\w.~!$&'()*+,;=:@/?] | %[0-9a-f][0-9a-f] )* )?
501 $
502 }
503 if {$state(-strict) && ![regexp -- $validityRE $srvurl]} {
504 unset $token
505 # Provide a better error message in this error case
506 if {[regexp {(?i)%(?![0-9a-f][0-9a-f])..} $srvurl bad]} {
507 return -code error \
508 "Illegal encoding character usage \"$bad\" in URL path"
509 }
510 return -code error "Illegal characters in URL path"
511 }
512 } else {
513 set srvurl /
514 }
515 if {$proto eq ""} {
516 set proto http
517 }
518 set lower [string tolower $proto]
519 if {![info exists urlTypes($lower)]} {
520 unset $token
521 return -code error "Unsupported URL type \"$proto\""
522 }
523 set defport [lindex $urlTypes($lower) 0]
524 set defcmd [lindex $urlTypes($lower) 1]
525
526 if {$port eq ""} {
527 set port $defport
528 }
529 if {![catch {$http(-proxyfilter) $host} proxy]} {
530 set phost [lindex $proxy 0]
531 set pport [lindex $proxy 1]
532 }
533
534 # OK, now reassemble into a full URL
535 set url ${proto}://
536 if {$user ne ""} {
537 append url $user
538 append url @
539 }
540 append url $host
541 if {$port != $defport} {
542 append url : $port
543 }
544 append url $srvurl
545 # Don't append the fragment!
546 set state(url) $url
547
548 # If a timeout is specified we set up the after event and arrange for an
549 # asynchronous socket connection.
550
551 set sockopts [list -async]
552 if {$state(-timeout) > 0} {
553 set state(after) [after $state(-timeout) \
554 [list http::reset $token timeout]]
555 }
556
557 # If we are using the proxy, we must pass in the full URL that includes
558 # the server name.
559
560 if {[info exists phost] && ($phost ne "")} {
561 set srvurl $url
562 set targetAddr [list $phost $pport]
563 } else {
564 set targetAddr [list $host $port]
565 }
566 # Proxy connections aren't shared among different hosts.
567 set state(socketinfo) $host:$port
568
569 # Save the accept types at this point to prevent a race condition. [Bug
570 # c11a51c482]
571 set state(accept-types) $http(-accept)
572
573 # See if we are supposed to use a previously opened channel.
574 if {$state(-keepalive)} {
575 variable socketmap
576 if {[info exists socketmap($state(socketinfo))]} {
577 if {[catch {fconfigure $socketmap($state(socketinfo))}]} {
578 Log "WARNING: socket for $state(socketinfo) was closed"
579 unset socketmap($state(socketinfo))
580 } else {
581 set sock $socketmap($state(socketinfo))
582 Log "reusing socket $sock for $state(socketinfo)"
583 catch {fileevent $sock writable {}}
584 catch {fileevent $sock readable {}}
585 }
586 }
587 # don't automatically close this connection socket
588 set state(connection) {}
589 }
590 if {![info exists sock]} {
591 # Pass -myaddr directly to the socket command
592 if {[info exists state(-myaddr)]} {
593 lappend sockopts -myaddr $state(-myaddr)
594 }
595 if {[catch {eval $defcmd $sockopts $targetAddr} sock]} {
596 # something went wrong while trying to establish the connection.
597 # Clean up after events and such, but DON'T call the command
598 # callback (if available) because we're going to throw an
599 # exception from here instead.
600
601 set state(sock) $sock
602 Finish $token "" 1
603 cleanup $token
604 return -code error $sock
605 }
606 }
607 set state(sock) $sock
608 Log "Using $sock for $state(socketinfo)" \
609 [expr {$state(-keepalive)?"keepalive":""}]
610 if {$state(-keepalive)} {
611 set socketmap($state(socketinfo)) $sock
612 }
613
614 if {![info exists phost]} {
615 set phost ""
616 }
617 fileevent $sock writable [list http::Connect $token $proto $phost $srvurl]
618
619 # Wait for the connection to complete.
620 if {![info exists state(-command)]} {
621 # geturl does EVERYTHING asynchronously, so if the user
622 # calls it synchronously, we just do a wait here.
623 http::wait $token
624
625 if {![info exists state]} {
626 # If we timed out then Finish has been called and the users
627 # command callback may have cleaned up the token. If so we end up
628 # here with nothing left to do.
629 return $token
630 } elseif {$state(status) eq "error"} {
631 # Something went wrong while trying to establish the connection.
632 # Clean up after events and such, but DON'T call the command
633 # callback (if available) because we're going to throw an
634 # exception from here instead.
635 set err [lindex $state(error) 0]
636 cleanup $token
637 return -code error $err
638 }
639 }
640
641 return $token
642}
643
644# http::Connected --
645#
646# Callback used when the connection to the HTTP server is actually
647# established.
648#
649# Arguments:
650# token State token.
651# proto What protocol (http, https, etc.) was used to connect.
652# phost Are we using keep-alive? Non-empty if yes.
653# srvurl Service-local URL that we're requesting
654# Results:
655# None.
656
657proc http::Connected {token proto phost srvurl} {
658 variable http
659 variable urlTypes
660
661 variable $token
662 upvar 0 $token state
663
664 # Set back the variables needed here
665 set sock $state(sock)
666 set isQueryChannel [info exists state(-querychannel)]
667 set isQuery [info exists state(-query)]
668 set host [lindex [split $state(socketinfo) :] 0]
669 set port [lindex [split $state(socketinfo) :] 1]
670
671 set lower [string tolower $proto]
672 set defport [lindex $urlTypes($lower) 0]
673
674 # Send data in cr-lf format, but accept any line terminators
675
676 fconfigure $sock -translation {auto crlf} -buffersize $state(-blocksize)
677
678 # The following is disallowed in safe interpreters, but the socket is
679 # already in non-blocking mode in that case.
680
681 catch {fconfigure $sock -blocking off}
682 set how GET
683 if {$isQuery} {
684 set state(querylength) [string length $state(-query)]
685 if {$state(querylength) > 0} {
686 set how POST
687 set contDone 0
688 } else {
689 # There's no query data.
690 unset state(-query)
691 set isQuery 0
692 }
693 } elseif {$state(-validate)} {
694 set how HEAD
695 } elseif {$isQueryChannel} {
696 set how POST
697 # The query channel must be blocking for the async Write to
698 # work properly.
699 fconfigure $state(-querychannel) -blocking 1 -translation binary
700 set contDone 0
701 }
702 if {[info exists state(-method)] && $state(-method) ne ""} {
703 set how $state(-method)
704 }
705 # We cannot handle chunked encodings with -handler, so force HTTP/1.0
706 # until we can manage this.
707 if {[info exists state(-handler)]} {
708 set state(-protocol) 1.0
709 }
710 set accept_types_seen 0
711 if {[catch {
712 puts $sock "$how $srvurl HTTP/$state(-protocol)"
713 if {[dict exists $state(-headers) Host]} {
714 # Allow Host spoofing. [Bug 928154]
715 puts $sock "Host: [dict get $state(-headers) Host]"
716 } elseif {$port == $defport} {
717 # Don't add port in this case, to handle broken servers. [Bug
718 # #504508]
719 puts $sock "Host: $host"
720 } else {
721 puts $sock "Host: $host:$port"
722 }
723 puts $sock "User-Agent: $http(-useragent)"
724 if {$state(-protocol) == 1.0 && $state(-keepalive)} {
725 puts $sock "Connection: keep-alive"
726 }
727 if {$state(-protocol) > 1.0 && !$state(-keepalive)} {
728 puts $sock "Connection: close" ;# RFC2616 sec 8.1.2.1
729 }
730 if {[info exists phost] && ($phost ne "") && $state(-keepalive)} {
731 puts $sock "Proxy-Connection: Keep-Alive"
732 }
733 set accept_encoding_seen 0
734 set content_type_seen 0
735 dict for {key value} $state(-headers) {
736 set value [string map [list \n "" \r ""] $value]
737 set key [string map {" " -} [string trim $key]]
738 if {[string equal -nocase $key "host"]} {
739 continue
740 }
741 if {[string equal -nocase $key "accept-encoding"]} {
742 set accept_encoding_seen 1
743 }
744 if {[string equal -nocase $key "accept"]} {
745 set accept_types_seen 1
746 }
747 if {[string equal -nocase $key "content-type"]} {
748 set content_type_seen 1
749 }
750 if {[string equal -nocase $key "content-length"]} {
751 set contDone 1
752 set state(querylength) $value
753 }
754 if {[string length $key]} {
755 puts $sock "$key: $value"
756 }
757 }
758 # Allow overriding the Accept header on a per-connection basis. Useful
759 # for working with REST services. [Bug c11a51c482]
760 if {!$accept_types_seen} {
761 puts $sock "Accept: $state(accept-types)"
762 }
763 if {!$accept_encoding_seen && ![info exists state(-handler)]} {
764 puts $sock "Accept-Encoding: gzip,deflate,compress"
765 }
766 if {$isQueryChannel && $state(querylength) == 0} {
767 # Try to determine size of data in channel. If we cannot seek, the
768 # surrounding catch will trap us
769
770 set start [tell $state(-querychannel)]
771 seek $state(-querychannel) 0 end
772 set state(querylength) \
773 [expr {[tell $state(-querychannel)] - $start}]
774 seek $state(-querychannel) $start
775 }
776
777 # Flush the request header and set up the fileevent that will either
778 # push the POST data or read the response.
779 #
780 # fileevent note:
781 #
782 # It is possible to have both the read and write fileevents active at
783 # this point. The only scenario it seems to affect is a server that
784 # closes the connection without reading the POST data. (e.g., early
785 # versions TclHttpd in various error cases). Depending on the
786 # platform, the client may or may not be able to get the response from
787 # the server because of the error it will get trying to write the post
788 # data. Having both fileevents active changes the timing and the
789 # behavior, but no two platforms (among Solaris, Linux, and NT) behave
790 # the same, and none behave all that well in any case. Servers should
791 # always read their POST data if they expect the client to read their
792 # response.
793
794 if {$isQuery || $isQueryChannel} {
795 if {!$content_type_seen} {
796 puts $sock "Content-Type: $state(-type)"
797 }
798 if {!$contDone} {
799 puts $sock "Content-Length: $state(querylength)"
800 }
801 puts $sock ""
802 fconfigure $sock -translation {auto binary}
803 fileevent $sock writable [list http::Write $token]
804 } else {
805 puts $sock ""
806 flush $sock
807 fileevent $sock readable [list http::Event $sock $token]
808 }
809
810 } err]} {
811 # The socket probably was never connected, or the connection dropped
812 # later.
813
814 # if state(status) is error, it means someone's already called Finish
815 # to do the above-described clean up.
816 if {$state(status) ne "error"} {
817 Finish $token $err
818 }
819 }
820}
821
822# Data access functions:
823# Data - the URL data
824# Status - the transaction status: ok, reset, eof, timeout
825# Code - the HTTP transaction code, e.g., 200
826# Size - the size of the URL data
827
828proc http::data {token} {
829 variable $token
830 upvar 0 $token state
831 return $state(body)
832}
833proc http::status {token} {
834 if {![info exists $token]} {
835 return "error"
836 }
837 variable $token
838 upvar 0 $token state
839 return $state(status)
840}
841proc http::code {token} {
842 variable $token
843 upvar 0 $token state
844 return $state(http)
845}
846proc http::ncode {token} {
847 variable $token
848 upvar 0 $token state
849 if {[regexp {[0-9]{3}} $state(http) numeric_code]} {
850 return $numeric_code
851 } else {
852 return $state(http)
853 }
854}
855proc http::size {token} {
856 variable $token
857 upvar 0 $token state
858 return $state(currentsize)
859}
860proc http::meta {token} {
861 variable $token
862 upvar 0 $token state
863 return $state(meta)
864}
865proc http::error {token} {
866 variable $token
867 upvar 0 $token state
868 if {[info exists state(error)]} {
869 return $state(error)
870 }
871 return ""
872}
873
874# http::cleanup
875#
876# Garbage collect the state associated with a transaction
877#
878# Arguments
879# token The token returned from http::geturl
880#
881# Side Effects
882# unsets the state array
883
884proc http::cleanup {token} {
885 variable $token
886 upvar 0 $token state
887 if {[info exists state]} {
888 unset state
889 }
890}
891
892# http::Connect
893#
894# This callback is made when an asyncronous connection completes.
895#
896# Arguments
897# token The token returned from http::geturl
898#
899# Side Effects
900# Sets the status of the connection, which unblocks
901# the waiting geturl call
902
903proc http::Connect {token proto phost srvurl} {
904 variable $token
905 upvar 0 $token state
906 set err "due to unexpected EOF"
907 if {
908 [eof $state(sock)] ||
909 [set err [fconfigure $state(sock) -error]] ne ""
910 } {
911 Finish $token "connect failed $err"
912 } else {
913 fileevent $state(sock) writable {}
914 ::http::Connected $token $proto $phost $srvurl
915 }
916 return
917}
918
919# http::Write
920#
921# Write POST query data to the socket
922#
923# Arguments
924# token The token for the connection
925#
926# Side Effects
927# Write the socket and handle callbacks.
928
929proc http::Write {token} {
930 variable $token
931 upvar 0 $token state
932 set sock $state(sock)
933
934 # Output a block. Tcl will buffer this if the socket blocks
935 set done 0
936 if {[catch {
937 # Catch I/O errors on dead sockets
938
939 if {[info exists state(-query)]} {
940 # Chop up large query strings so queryprogress callback can give
941 # smooth feedback.
942
943 puts -nonewline $sock \
944 [string range $state(-query) $state(queryoffset) \
945 [expr {$state(queryoffset) + $state(-queryblocksize) - 1}]]
946 incr state(queryoffset) $state(-queryblocksize)
947 if {$state(queryoffset) >= $state(querylength)} {
948 set state(queryoffset) $state(querylength)
949 set done 1
950 }
951 } else {
952 # Copy blocks from the query channel
953
954 set outStr [read $state(-querychannel) $state(-queryblocksize)]
955 puts -nonewline $sock $outStr
956 incr state(queryoffset) [string length $outStr]
957 if {[eof $state(-querychannel)]} {
958 set done 1
959 }
960 }
961 } err]} {
962 # Do not call Finish here, but instead let the read half of the socket
963 # process whatever server reply there is to get.
964
965 set state(posterror) $err
966 set done 1
967 }
968 if {$done} {
969 catch {flush $sock}
970 fileevent $sock writable {}
971 fileevent $sock readable [list http::Event $sock $token]
972 }
973
974 # Callback to the client after we've completely handled everything.
975
976 if {[string length $state(-queryprogress)]} {
977 eval $state(-queryprogress) \
978 [list $token $state(querylength) $state(queryoffset)]
979 }
980}
981
982# http::Event
983#
984# Handle input on the socket
985#
986# Arguments
987# sock The socket receiving input.
988# token The token returned from http::geturl
989#
990# Side Effects
991# Read the socket and handle callbacks.
992
993proc http::Event {sock token} {
994 variable $token
995 upvar 0 $token state
996
997 if {![info exists state]} {
998 Log "Event $sock with invalid token '$token' - remote close?"
999 if {![eof $sock]} {
1000 if {[set d [read $sock]] ne ""} {
1001 Log "WARNING: additional data left on closed socket"
1002 }
1003 }
1004 CloseSocket $sock
1005 return
1006 }
1007 if {$state(state) eq "connecting"} {
1008 if {[catch {gets $sock state(http)} n]} {
1009 return [Finish $token $n]
1010 } elseif {$n >= 0} {
1011 set state(state) "header"
1012 }
1013 } elseif {$state(state) eq "header"} {
1014 if {[catch {gets $sock line} n]} {
1015 return [Finish $token $n]
1016 } elseif {$n == 0} {
1017 # We have now read all headers
1018 # We ignore HTTP/1.1 100 Continue returns. RFC2616 sec 8.2.3
1019 if {$state(http) == "" || ([regexp {^\S+\s(\d+)} $state(http) {} x] && $x == 100)} {
1020 return
1021 }
1022
1023 set state(state) body
1024
1025 # If doing a HEAD, then we won't get any body
1026 if {$state(-validate)} {
1027 Eof $token
1028 return
1029 }
1030
1031 # For non-chunked transfer we may have no body - in this case we
1032 # may get no further file event if the connection doesn't close
1033 # and no more data is sent. We can tell and must finish up now -
1034 # not later.
1035 if {
1036 !(([info exists state(connection)]
1037 && ($state(connection) eq "close"))
1038 || [info exists state(transfer)])
1039 && ($state(totalsize) == 0)
1040 } {
1041 Log "body size is 0 and no events likely - complete."
1042 Eof $token
1043 return
1044 }
1045
1046 # We have to use binary translation to count bytes properly.
1047 fconfigure $sock -translation binary
1048
1049 if {
1050 $state(-binary) || ![string match -nocase text* $state(type)]
1051 } {
1052 # Turn off conversions for non-text data
1053 set state(binary) 1
1054 }
1055 if {[info exists state(-channel)]} {
1056 if {$state(binary) || [llength [ContentEncoding $token]]} {
1057 fconfigure $state(-channel) -translation binary
1058 }
1059 if {![info exists state(-handler)]} {
1060 # Initiate a sequence of background fcopies
1061 fileevent $sock readable {}
1062 CopyStart $sock $token
1063 return
1064 }
1065 }
1066 } elseif {$n > 0} {
1067 # Process header lines
1068 if {[regexp -nocase {^([^:]+):(.+)$} $line x key value]} {
1069 switch -- [string tolower $key] {
1070 content-type {
1071 set state(type) [string trim [string tolower $value]]
1072 # grab the optional charset information
1073 if {[regexp -nocase \
1074 {charset\s*=\s*\"((?:[^""]|\\\")*)\"} \
1075 $state(type) -> cs]} {
1076 set state(charset) [string map {{\"} \"} $cs]
1077 } else {
1078 regexp -nocase {charset\s*=\s*(\S+?);?} \
1079 $state(type) -> state(charset)
1080 }
1081 }
1082 content-length {
1083 set state(totalsize) [string trim $value]
1084 }
1085 content-encoding {
1086 set state(coding) [string trim $value]
1087 }
1088 transfer-encoding {
1089 set state(transfer) \
1090 [string trim [string tolower $value]]
1091 }
1092 proxy-connection -
1093 connection {
1094 set state(connection) \
1095 [string trim [string tolower $value]]
1096 }
1097 }
1098 lappend state(meta) $key [string trim $value]
1099 }
1100 }
1101 } else {
1102 # Now reading body
1103 if {[catch {
1104 if {[info exists state(-handler)]} {
1105 set n [eval $state(-handler) [list $sock $token]]
1106 } elseif {[info exists state(transfer_final)]} {
1107 set line [getTextLine $sock]
1108 set n [string length $line]
1109 if {$n > 0} {
1110 Log "found $n bytes following final chunk"
1111 append state(transfer_final) $line
1112 } else {
1113 Log "final chunk part"
1114 Eof $token
1115 }
1116 } elseif {
1117 [info exists state(transfer)]
1118 && $state(transfer) eq "chunked"
1119 } {
1120 set size 0
1121 set chunk [getTextLine $sock]
1122 set n [string length $chunk]
1123 if {[string trim $chunk] ne ""} {
1124 scan $chunk %x size
1125 if {$size != 0} {
1126 set bl [fconfigure $sock -blocking]
1127 fconfigure $sock -blocking 1
1128 set chunk [read $sock $size]
1129 fconfigure $sock -blocking $bl
1130 set n [string length $chunk]
1131 if {$n >= 0} {
1132 append state(body) $chunk
1133 }
1134 if {$size != [string length $chunk]} {
1135 Log "WARNING: mis-sized chunk:\
1136 was [string length $chunk], should be $size"
1137 }
1138 getTextLine $sock
1139 } else {
1140 set state(transfer_final) {}
1141 }
1142 }
1143 } else {
1144 #Log "read non-chunk $state(currentsize) of $state(totalsize)"
1145 set block [read $sock $state(-blocksize)]
1146 set n [string length $block]
1147 if {$n >= 0} {
1148 append state(body) $block
1149 }
1150 }
1151 if {[info exists state]} {
1152 if {$n >= 0} {
1153 incr state(currentsize) $n
1154 }
1155 # If Content-Length - check for end of data.
1156 if {
1157 ($state(totalsize) > 0)
1158 && ($state(currentsize) >= $state(totalsize))
1159 } {
1160 Eof $token
1161 }
1162 }
1163 } err]} {
1164 return [Finish $token $err]
1165 } else {
1166 if {[info exists state(-progress)]} {
1167 eval $state(-progress) \
1168 [list $token $state(totalsize) $state(currentsize)]
1169 }
1170 }
1171 }
1172
1173 # catch as an Eof above may have closed the socket already
1174 if {![catch {eof $sock} eof] && $eof} {
1175 if {[info exists $token]} {
1176 set state(connection) close
1177 Eof $token
1178 } else {
1179 # open connection closed on a token that has been cleaned up.
1180 CloseSocket $sock
1181 }
1182 return
1183 }
1184}
1185
1186# http::getTextLine --
1187#
1188# Get one line with the stream in blocking crlf mode
1189#
1190# Arguments
1191# sock The socket receiving input.
1192#
1193# Results:
1194# The line of text, without trailing newline
1195
1196proc http::getTextLine {sock} {
1197 set tr [fconfigure $sock -translation]
1198 set bl [fconfigure $sock -blocking]
1199 fconfigure $sock -translation crlf -blocking 1
1200 set r [gets $sock]
1201 fconfigure $sock -translation $tr -blocking $bl
1202 return $r
1203}
1204
1205# http::CopyStart
1206#
1207# Error handling wrapper around fcopy
1208#
1209# Arguments
1210# sock The socket to copy from
1211# token The token returned from http::geturl
1212#
1213# Side Effects
1214# This closes the connection upon error
1215
1216proc http::CopyStart {sock token {initial 1}} {
1217 upvar #0 $token state
1218 if {[info exists state(transfer)] && $state(transfer) eq "chunked"} {
1219 foreach coding [ContentEncoding $token] {
1220 lappend state(zlib) [zlib stream $coding]
1221 }
1222 make-transformation-chunked $sock [namespace code [list CopyChunk $token]]
1223 } else {
1224 if {$initial} {
1225 foreach coding [ContentEncoding $token] {
1226 zlib push $coding $sock
1227 }
1228 }
1229 if {[catch {
1230 fcopy $sock $state(-channel) -size $state(-blocksize) -command \
1231 [list http::CopyDone $token]
1232 } err]} {
1233 Finish $token $err
1234 }
1235 }
1236}
1237
1238proc http::CopyChunk {token chunk} {
1239 upvar 0 $token state
1240 if {[set count [string length $chunk]]} {
1241 incr state(currentsize) $count
1242 if {[info exists state(zlib)]} {
1243 foreach stream $state(zlib) {
1244 set chunk [$stream add $chunk]
1245 }
1246 }
1247 puts -nonewline $state(-channel) $chunk
1248 if {[info exists state(-progress)]} {
1249 eval [linsert $state(-progress) end \
1250 $token $state(totalsize) $state(currentsize)]
1251 }
1252 } else {
1253 Log "CopyChunk Finish $token"
1254 if {[info exists state(zlib)]} {
1255 set excess ""
1256 foreach stream $state(zlib) {
1257 catch {set excess [$stream add -finalize $excess]}
1258 }
1259 puts -nonewline $state(-channel) $excess
1260 foreach stream $state(zlib) { $stream close }
1261 unset state(zlib)
1262 }
1263 Eof $token ;# FIX ME: pipelining.
1264 }
1265}
1266
1267# http::CopyDone
1268#
1269# fcopy completion callback
1270#
1271# Arguments
1272# token The token returned from http::geturl
1273# count The amount transfered
1274#
1275# Side Effects
1276# Invokes callbacks
1277
1278proc http::CopyDone {token count {error {}}} {
1279 variable $token
1280 upvar 0 $token state
1281 set sock $state(sock)
1282 incr state(currentsize) $count
1283 if {[info exists state(-progress)]} {
1284 eval $state(-progress) \
1285 [list $token $state(totalsize) $state(currentsize)]
1286 }
1287 # At this point the token may have been reset
1288 if {[string length $error]} {
1289 Finish $token $error
1290 } elseif {[catch {eof $sock} iseof] || $iseof} {
1291 Eof $token
1292 } else {
1293 CopyStart $sock $token 0
1294 }
1295}
1296
1297# http::Eof
1298#
1299# Handle eof on the socket
1300#
1301# Arguments
1302# token The token returned from http::geturl
1303#
1304# Side Effects
1305# Clean up the socket
1306
1307proc http::Eof {token {force 0}} {
1308 variable $token
1309 upvar 0 $token state
1310 if {$state(state) eq "header"} {
1311 # Premature eof
1312 set state(status) eof
1313 } else {
1314 set state(status) ok
1315 }
1316
1317 if {[string length $state(body)] > 0} {
1318 if {[catch {
1319 foreach coding [ContentEncoding $token] {
1320 set state(body) [zlib $coding $state(body)]
1321 }
1322 } err]} {
1323 Log "error doing decompression: $err"
1324 return [Finish $token $err]
1325 }
1326
1327 if {!$state(binary)} {
1328 # If we are getting text, set the incoming channel's encoding
1329 # correctly. iso8859-1 is the RFC default, but this could be any IANA
1330 # charset. However, we only know how to convert what we have
1331 # encodings for.
1332
1333 set enc [CharsetToEncoding $state(charset)]
1334 if {$enc ne "binary"} {
1335 set state(body) [encoding convertfrom $enc $state(body)]
1336 }
1337
1338 # Translate text line endings.
1339 set state(body) [string map {\r\n \n \r \n} $state(body)]
1340 }
1341 }
1342 Finish $token
1343}
1344
1345# http::wait --
1346#
1347# See documentation for details.
1348#
1349# Arguments:
1350# token Connection token.
1351#
1352# Results:
1353# The status after the wait.
1354
1355proc http::wait {token} {
1356 variable $token
1357 upvar 0 $token state
1358
1359 if {![info exists state(status)] || $state(status) eq ""} {
1360 # We must wait on the original variable name, not the upvar alias
1361 vwait ${token}(status)
1362 }
1363
1364 return [status $token]
1365}
1366
1367# http::formatQuery --
1368#
1369# See documentation for details. Call http::formatQuery with an even
1370# number of arguments, where the first is a name, the second is a value,
1371# the third is another name, and so on.
1372#
1373# Arguments:
1374# args A list of name-value pairs.
1375#
1376# Results:
1377# TODO
1378
1379proc http::formatQuery {args} {
1380 set result ""
1381 set sep ""
1382 foreach i $args {
1383 append result $sep [mapReply $i]
1384 if {$sep eq "="} {
1385 set sep &
1386 } else {
1387 set sep =
1388 }
1389 }
1390 return $result
1391}
1392
1393# http::mapReply --
1394#
1395# Do x-www-urlencoded character mapping
1396#
1397# Arguments:
1398# string The string the needs to be encoded
1399#
1400# Results:
1401# The encoded string
1402
1403proc http::mapReply {string} {
1404 variable http
1405 variable formMap
1406
1407 # The spec says: "non-alphanumeric characters are replaced by '%HH'". Use
1408 # a pre-computed map and [string map] to do the conversion (much faster
1409 # than [regsub]/[subst]). [Bug 1020491]
1410
1411 if {$http(-urlencoding) ne ""} {
1412 set string [encoding convertto $http(-urlencoding) $string]
1413 return [string map $formMap $string]
1414 }
1415 set converted [string map $formMap $string]
1416 if {[string match "*\[\u0100-\uffff\]*" $converted]} {
1417 regexp "\[\u0100-\uffff\]" $converted badChar
1418 # Return this error message for maximum compatability... :^/
1419 return -code error \
1420 "can't read \"formMap($badChar)\": no such element in array"
1421 }
1422 return $converted
1423}
1424
1425# http::ProxyRequired --
1426# Default proxy filter.
1427#
1428# Arguments:
1429# host The destination host
1430#
1431# Results:
1432# The current proxy settings
1433
1434proc http::ProxyRequired {host} {
1435 variable http
1436 if {[info exists http(-proxyhost)] && [string length $http(-proxyhost)]} {
1437 if {
1438 ![info exists http(-proxyport)] ||
1439 ![string length $http(-proxyport)]
1440 } {
1441 set http(-proxyport) 8080
1442 }
1443 return [list $http(-proxyhost) $http(-proxyport)]
1444 }
1445}
1446
1447# http::CharsetToEncoding --
1448#
1449# Tries to map a given IANA charset to a tcl encoding. If no encoding
1450# can be found, returns binary.
1451#
1452
1453proc http::CharsetToEncoding {charset} {
1454 variable encodings
1455
1456 set charset [string tolower $charset]
1457 if {[regexp {iso-?8859-([0-9]+)} $charset -> num]} {
1458 set encoding "iso8859-$num"
1459 } elseif {[regexp {iso-?2022-(jp|kr)} $charset -> ext]} {
1460 set encoding "iso2022-$ext"
1461 } elseif {[regexp {shift[-_]?js} $charset]} {
1462 set encoding "shiftjis"
1463 } elseif {[regexp {(?:windows|cp)-?([0-9]+)} $charset -> num]} {
1464 set encoding "cp$num"
1465 } elseif {$charset eq "us-ascii"} {
1466 set encoding "ascii"
1467 } elseif {[regexp {(?:iso-?)?lat(?:in)?-?([0-9]+)} $charset -> num]} {
1468 switch -- $num {
1469 5 {set encoding "iso8859-9"}
1470 1 - 2 - 3 {
1471 set encoding "iso8859-$num"
1472 }
1473 }
1474 } else {
1475 # other charset, like euc-xx, utf-8,... may directly map to encoding
1476 set encoding $charset
1477 }
1478 set idx [lsearch -exact $encodings $encoding]
1479 if {$idx >= 0} {
1480 return $encoding
1481 } else {
1482 return "binary"
1483 }
1484}
1485
1486# Return the list of content-encoding transformations we need to do in order.
1487proc http::ContentEncoding {token} {
1488 upvar 0 $token state
1489 set r {}
1490 if {[info exists state(coding)]} {
1491 foreach coding [split $state(coding) ,] {
1492 switch -exact -- $coding {
1493 deflate { lappend r inflate }
1494 gzip - x-gzip { lappend r gunzip }
1495 compress - x-compress { lappend r decompress }
1496 identity {}
1497 default {
1498 return -code error "unsupported content-encoding \"$coding\""
1499 }
1500 }
1501 }
1502 }
1503 return $r
1504}
1505
1506proc http::make-transformation-chunked {chan command} {
1507 set lambda {{chan command} {
1508 set data ""
1509 set size -1
1510 yield
1511 while {1} {
1512 chan configure $chan -translation {crlf binary}
1513 while {[gets $chan line] < 1} { yield }
1514 chan configure $chan -translation {binary binary}
1515 if {[scan $line %x size] != 1} { return -code error "invalid size: \"$line\"" }
1516 set chunk ""
1517 while {$size && ![chan eof $chan]} {
1518 set part [chan read $chan $size]
1519 incr size -[string length $part]
1520 append chunk $part
1521 }
1522 if {[catch {
1523 uplevel #0 [linsert $command end $chunk]
1524 }]} {
1525 http::Log "Error in callback: $::errorInfo"
1526 }
1527 if {[string length $chunk] == 0} {
1528 # channel might have been closed in the callback
1529 catch {chan event $chan readable {}}
1530 return
1531 }
1532 }
1533 }}
1534 coroutine dechunk$chan ::apply $lambda $chan $command
1535 chan event $chan readable [namespace origin dechunk$chan]
1536 return
1537}
1538
1539# Local variables:
1540# indent-tabs-mode: t
1541# End: