--- /dev/null
+package HTTP::Config;
+
+use strict;
+use URI;
+use vars qw($VERSION);
+
+$VERSION = "6.00";
+
+sub new {
+ my $class = shift;
+ return bless [], $class;
+}
+
+sub entries {
+ my $self = shift;
+ @$self;
+}
+
+sub empty {
+ my $self = shift;
+ not @$self;
+}
+
+sub add {
+ if (@_ == 2) {
+ my $self = shift;
+ push(@$self, shift);
+ return;
+ }
+ my($self, %spec) = @_;
+ push(@$self, \%spec);
+ return;
+}
+
+sub find2 {
+ my($self, %spec) = @_;
+ my @found;
+ my @rest;
+ ITEM:
+ for my $item (@$self) {
+ for my $k (keys %spec) {
+ if (!exists $item->{$k} || $spec{$k} ne $item->{$k}) {
+ push(@rest, $item);
+ next ITEM;
+ }
+ }
+ push(@found, $item);
+ }
+ return \@found unless wantarray;
+ return \@found, \@rest;
+}
+
+sub find {
+ my $self = shift;
+ my $f = $self->find2(@_);
+ return @$f if wantarray;
+ return $f->[0];
+}
+
+sub remove {
+ my($self, %spec) = @_;
+ my($removed, $rest) = $self->find2(%spec);
+ @$self = @$rest if @$removed;
+ return @$removed;
+}
+
+my %MATCH = (
+ m_scheme => sub {
+ my($v, $uri) = @_;
+ return $uri->_scheme eq $v; # URI known to be canonical
+ },
+ m_secure => sub {
+ my($v, $uri) = @_;
+ my $secure = $uri->can("secure") ? $uri->secure : $uri->_scheme eq "https";
+ return $secure == !!$v;
+ },
+ m_host_port => sub {
+ my($v, $uri) = @_;
+ return unless $uri->can("host_port");
+ return $uri->host_port eq $v, 7;
+ },
+ m_host => sub {
+ my($v, $uri) = @_;
+ return unless $uri->can("host");
+ return $uri->host eq $v, 6;
+ },
+ m_port => sub {
+ my($v, $uri) = @_;
+ return unless $uri->can("port");
+ return $uri->port eq $v;
+ },
+ m_domain => sub {
+ my($v, $uri) = @_;
+ return unless $uri->can("host");
+ my $h = $uri->host;
+ $h = "$h.local" unless $h =~ /\./;
+ $v = ".$v" unless $v =~ /^\./;
+ return length($v), 5 if substr($h, -length($v)) eq $v;
+ return 0;
+ },
+ m_path => sub {
+ my($v, $uri) = @_;
+ return unless $uri->can("path");
+ return $uri->path eq $v, 4;
+ },
+ m_path_prefix => sub {
+ my($v, $uri) = @_;
+ return unless $uri->can("path");
+ my $path = $uri->path;
+ my $len = length($v);
+ return $len, 3 if $path eq $v;
+ return 0 if length($path) <= $len;
+ $v .= "/" unless $v =~ m,/\z,,;
+ return $len, 3 if substr($path, 0, length($v)) eq $v;
+ return 0;
+ },
+ m_path_match => sub {
+ my($v, $uri) = @_;
+ return unless $uri->can("path");
+ return $uri->path =~ $v;
+ },
+ m_uri__ => sub {
+ my($v, $k, $uri) = @_;
+ return unless $uri->can($k);
+ return 1 unless defined $v;
+ return $uri->$k eq $v;
+ },
+ m_method => sub {
+ my($v, $uri, $request) = @_;
+ return $request && $request->method eq $v;
+ },
+ m_proxy => sub {
+ my($v, $uri, $request) = @_;
+ return $request && ($request->{proxy} || "") eq $v;
+ },
+ m_code => sub {
+ my($v, $uri, $request, $response) = @_;
+ $v =~ s/xx\z//;
+ return unless $response;
+ return length($v), 2 if substr($response->code, 0, length($v)) eq $v;
+ },
+ m_media_type => sub { # for request too??
+ my($v, $uri, $request, $response) = @_;
+ return unless $response;
+ return 1, 1 if $v eq "*/*";
+ my $ct = $response->content_type;
+ return 2, 1 if $v =~ s,/\*\z,, && $ct =~ m,^\Q$v\E/,;
+ return 3, 1 if $v eq "html" && $response->content_is_html;
+ return 4, 1 if $v eq "xhtml" && $response->content_is_xhtml;
+ return 10, 1 if $v eq $ct;
+ return 0;
+ },
+ m_header__ => sub {
+ my($v, $k, $uri, $request, $response) = @_;
+ return unless $request;
+ return 1 if $request->header($k) eq $v;
+ return 1 if $response && $response->header($k) eq $v;
+ return 0;
+ },
+ m_response_attr__ => sub {
+ my($v, $k, $uri, $request, $response) = @_;
+ return unless $response;
+ return 1 if !defined($v) && exists $response->{$k};
+ return 0 unless exists $response->{$k};
+ return 1 if $response->{$k} eq $v;
+ return 0;
+ },
+);
+
+sub matching {
+ my $self = shift;
+ if (@_ == 1) {
+ if ($_[0]->can("request")) {
+ unshift(@_, $_[0]->request);
+ unshift(@_, undef) unless defined $_[0];
+ }
+ unshift(@_, $_[0]->uri_canonical) if $_[0] && $_[0]->can("uri_canonical");
+ }
+ my($uri, $request, $response) = @_;
+ $uri = URI->new($uri) unless ref($uri);
+
+ my @m;
+ ITEM:
+ for my $item (@$self) {
+ my $order;
+ for my $ikey (keys %$item) {
+ my $mkey = $ikey;
+ my $k;
+ $k = $1 if $mkey =~ s/__(.*)/__/;
+ if (my $m = $MATCH{$mkey}) {
+ #print "$ikey $mkey\n";
+ my($c, $o);
+ my @arg = (
+ defined($k) ? $k : (),
+ $uri, $request, $response
+ );
+ my $v = $item->{$ikey};
+ $v = [$v] unless ref($v) eq "ARRAY";
+ for (@$v) {
+ ($c, $o) = $m->($_, @arg);
+ #print " - $_ ==> $c $o\n";
+ last if $c;
+ }
+ next ITEM unless $c;
+ $order->[$o || 0] += $c;
+ }
+ }
+ $order->[7] ||= 0;
+ $item->{_order} = join(".", reverse map sprintf("%03d", $_ || 0), @$order);
+ push(@m, $item);
+ }
+ @m = sort { $b->{_order} cmp $a->{_order} } @m;
+ delete $_->{_order} for @m;
+ return @m if wantarray;
+ return $m[0];
+}
+
+sub add_item {
+ my $self = shift;
+ my $item = shift;
+ return $self->add(item => $item, @_);
+}
+
+sub remove_items {
+ my $self = shift;
+ return map $_->{item}, $self->remove(@_);
+}
+
+sub matching_items {
+ my $self = shift;
+ return map $_->{item}, $self->matching(@_);
+}
+
+1;
+
+__END__
+
+=head1 NAME
+
+HTTP::Config - Configuration for request and response objects
+
+=head1 SYNOPSIS
+
+ use HTTP::Config;
+ my $c = HTTP::Config->new;
+ $c->add(m_domain => ".example.com", m_scheme => "http", verbose => 1);
+
+ use HTTP::Request;
+ my $request = HTTP::Request->new(GET => "http://www.example.com");
+
+ if (my @m = $c->matching($request)) {
+ print "Yadayada\n" if $m[0]->{verbose};
+ }
+
+=head1 DESCRIPTION
+
+An C<HTTP::Config> object is a list of entries that
+can be matched against request or request/response pairs. Its
+purpose is to hold configuration data that can be looked up given a
+request or response object.
+
+Each configuration entry is a hash. Some keys specify matching to
+occur against attributes of request/response objects. Other keys can
+be used to hold user data.
+
+The following methods are provided:
+
+=over 4
+
+=item $conf = HTTP::Config->new
+
+Constructs a new empty C<HTTP::Config> object and returns it.
+
+=item $conf->entries
+
+Returns the list of entries in the configuration object.
+In scalar context returns the number of entries.
+
+=item $conf->empty
+
+Return true if there are no entries in the configuration object.
+This is just a shorthand for C<< not $conf->entries >>.
+
+=item $conf->add( %matchspec, %other )
+
+=item $conf->add( \%entry )
+
+Adds a new entry to the configuration.
+You can either pass separate key/value pairs or a hash reference.
+
+=item $conf->remove( %spec )
+
+Removes (and returns) the entries that have matches for all the key/value pairs in %spec.
+If %spec is empty this will match all entries; so it will empty the configuation object.
+
+=item $conf->matching( $uri, $request, $response )
+
+=item $conf->matching( $uri )
+
+=item $conf->matching( $request )
+
+=item $conf->matching( $response )
+
+Returns the entries that match the given $uri, $request and $response triplet.
+
+If called with a single $request object then the $uri is obtained by calling its 'uri_canonical' method.
+If called with a single $response object, then the request object is obtained by calling its 'request' method;
+and then the $uri is obtained as if a single $request was provided.
+
+The entries are returned with the most specific matches first.
+In scalar context returns the most specific match or C<undef> in none match.
+
+=item $conf->add_item( $item, %matchspec )
+
+=item $conf->remove_items( %spec )
+
+=item $conf->matching_items( $uri, $request, $response )
+
+Wrappers that hides the entries themselves.
+
+=back
+
+=head2 Matching
+
+The following keys on a configuration entry specify matching. For all
+of these you can provide an array of values instead of a single value.
+The entry matches if at least one of the values in the array matches.
+
+Entries that require match against a response object attribute will never match
+unless a response object was provided.
+
+=over
+
+=item m_scheme => $scheme
+
+Matches if the URI uses the specified scheme; e.g. "http".
+
+=item m_secure => $bool
+
+If $bool is TRUE; matches if the URI uses a secure scheme. If $bool
+is FALSE; matches if the URI does not use a secure scheme. An example
+of a secure scheme is "https".
+
+=item m_host_port => "$hostname:$port"
+
+Matches if the URI's host_port method return the specified value.
+
+=item m_host => $hostname
+
+Matches if the URI's host method returns the specified value.
+
+=item m_port => $port
+
+Matches if the URI's port method returns the specified value.
+
+=item m_domain => ".$domain"
+
+Matches if the URI's host method return a value that within the given
+domain. The hostname "www.example.com" will for instance match the
+domain ".com".
+
+=item m_path => $path
+
+Matches if the URI's path method returns the specified value.
+
+=item m_path_prefix => $path
+
+Matches if the URI's path is the specified path or has the specified
+path as prefix.
+
+=item m_path_match => $Regexp
+
+Matches if the regular expression matches the URI's path. Eg. qr/\.html$/.
+
+=item m_method => $method
+
+Matches if the request method matches the specified value. Eg. "GET" or "POST".
+
+=item m_code => $digit
+
+=item m_code => $status_code
+
+Matches if the response status code matches. If a single digit is
+specified; matches for all response status codes beginning with that digit.
+
+=item m_proxy => $url
+
+Matches if the request is to be sent to the given Proxy server.
+
+=item m_media_type => "*/*"
+
+=item m_media_type => "text/*"
+
+=item m_media_type => "html"
+
+=item m_media_type => "xhtml"
+
+=item m_media_type => "text/html"
+
+Matches if the response media type matches.
+
+With a value of "html" matches if $response->content_is_html returns TRUE.
+With a value of "xhtml" matches if $response->content_is_xhtml returns TRUE.
+
+=item m_uri__I<$method> => undef
+
+Matches if the URI object provides the method.
+
+=item m_uri__I<$method> => $string
+
+Matches if the URI's $method method returns the given value.
+
+=item m_header__I<$field> => $string
+
+Matches if either the request or the response have a header $field with the given value.
+
+=item m_response_attr__I<$key> => undef
+
+=item m_response_attr__I<$key> => $string
+
+Matches if the response object has that key, or the entry has the given value.
+
+=back
+
+=head1 SEE ALSO
+
+L<URI>, L<HTTP::Request>, L<HTTP::Response>
+
+=head1 COPYRIGHT
+
+Copyright 2008, Gisle Aas
+
+This library is free software; you can redistribute it and/or
+modify it under the same terms as Perl itself.
+
+=cut
--- /dev/null
+package HTTP::Headers;
+
+use strict;
+use Carp ();
+
+use vars qw($VERSION $TRANSLATE_UNDERSCORE);
+$VERSION = "6.00";
+
+# The $TRANSLATE_UNDERSCORE variable controls whether '_' can be used
+# as a replacement for '-' in header field names.
+$TRANSLATE_UNDERSCORE = 1 unless defined $TRANSLATE_UNDERSCORE;
+
+# "Good Practice" order of HTTP message headers:
+# - General-Headers
+# - Request-Headers
+# - Response-Headers
+# - Entity-Headers
+
+my @general_headers = qw(
+ Cache-Control Connection Date Pragma Trailer Transfer-Encoding Upgrade
+ Via Warning
+);
+
+my @request_headers = qw(
+ Accept Accept-Charset Accept-Encoding Accept-Language
+ Authorization Expect From Host
+ If-Match If-Modified-Since If-None-Match If-Range If-Unmodified-Since
+ Max-Forwards Proxy-Authorization Range Referer TE User-Agent
+);
+
+my @response_headers = qw(
+ Accept-Ranges Age ETag Location Proxy-Authenticate Retry-After Server
+ Vary WWW-Authenticate
+);
+
+my @entity_headers = qw(
+ Allow Content-Encoding Content-Language Content-Length Content-Location
+ Content-MD5 Content-Range Content-Type Expires Last-Modified
+);
+
+my %entity_header = map { lc($_) => 1 } @entity_headers;
+
+my @header_order = (
+ @general_headers,
+ @request_headers,
+ @response_headers,
+ @entity_headers,
+);
+
+# Make alternative representations of @header_order. This is used
+# for sorting and case matching.
+my %header_order;
+my %standard_case;
+
+{
+ my $i = 0;
+ for (@header_order) {
+ my $lc = lc $_;
+ $header_order{$lc} = ++$i;
+ $standard_case{$lc} = $_;
+ }
+}
+
+
+
+sub new
+{
+ my($class) = shift;
+ my $self = bless {}, $class;
+ $self->header(@_) if @_; # set up initial headers
+ $self;
+}
+
+
+sub header
+{
+ my $self = shift;
+ Carp::croak('Usage: $h->header($field, ...)') unless @_;
+ my(@old);
+ my %seen;
+ while (@_) {
+ my $field = shift;
+ my $op = @_ ? ($seen{lc($field)}++ ? 'PUSH' : 'SET') : 'GET';
+ @old = $self->_header($field, shift, $op);
+ }
+ return @old if wantarray;
+ return $old[0] if @old <= 1;
+ join(", ", @old);
+}
+
+sub clear
+{
+ my $self = shift;
+ %$self = ();
+}
+
+
+sub push_header
+{
+ my $self = shift;
+ return $self->_header(@_, 'PUSH_H') if @_ == 2;
+ while (@_) {
+ $self->_header(splice(@_, 0, 2), 'PUSH_H');
+ }
+}
+
+
+sub init_header
+{
+ Carp::croak('Usage: $h->init_header($field, $val)') if @_ != 3;
+ shift->_header(@_, 'INIT');
+}
+
+
+sub remove_header
+{
+ my($self, @fields) = @_;
+ my $field;
+ my @values;
+ foreach $field (@fields) {
+ $field =~ tr/_/-/ if $field !~ /^:/ && $TRANSLATE_UNDERSCORE;
+ my $v = delete $self->{lc $field};
+ push(@values, ref($v) eq 'ARRAY' ? @$v : $v) if defined $v;
+ }
+ return @values;
+}
+
+sub remove_content_headers
+{
+ my $self = shift;
+ unless (defined(wantarray)) {
+ # fast branch that does not create return object
+ delete @$self{grep $entity_header{$_} || /^content-/, keys %$self};
+ return;
+ }
+
+ my $c = ref($self)->new;
+ for my $f (grep $entity_header{$_} || /^content-/, keys %$self) {
+ $c->{$f} = delete $self->{$f};
+ }
+ $c;
+}
+
+
+sub _header
+{
+ my($self, $field, $val, $op) = @_;
+
+ unless ($field =~ /^:/) {
+ $field =~ tr/_/-/ if $TRANSLATE_UNDERSCORE;
+ my $old = $field;
+ $field = lc $field;
+ unless(defined $standard_case{$field}) {
+ # generate a %standard_case entry for this field
+ $old =~ s/\b(\w)/\u$1/g;
+ $standard_case{$field} = $old;
+ }
+ }
+
+ $op ||= defined($val) ? 'SET' : 'GET';
+ if ($op eq 'PUSH_H') {
+ # Like PUSH but where we don't care about the return value
+ if (exists $self->{$field}) {
+ my $h = $self->{$field};
+ if (ref($h) eq 'ARRAY') {
+ push(@$h, ref($val) eq "ARRAY" ? @$val : $val);
+ }
+ else {
+ $self->{$field} = [$h, ref($val) eq "ARRAY" ? @$val : $val]
+ }
+ return;
+ }
+ $self->{$field} = $val;
+ return;
+ }
+
+ my $h = $self->{$field};
+ my @old = ref($h) eq 'ARRAY' ? @$h : (defined($h) ? ($h) : ());
+
+ unless ($op eq 'GET' || ($op eq 'INIT' && @old)) {
+ if (defined($val)) {
+ my @new = ($op eq 'PUSH') ? @old : ();
+ if (ref($val) ne 'ARRAY') {
+ push(@new, $val);
+ }
+ else {
+ push(@new, @$val);
+ }
+ $self->{$field} = @new > 1 ? \@new : $new[0];
+ }
+ elsif ($op ne 'PUSH') {
+ delete $self->{$field};
+ }
+ }
+ @old;
+}
+
+
+sub _sorted_field_names
+{
+ my $self = shift;
+ return [ sort {
+ ($header_order{$a} || 999) <=> ($header_order{$b} || 999) ||
+ $a cmp $b
+ } keys %$self ];
+}
+
+
+sub header_field_names {
+ my $self = shift;
+ return map $standard_case{$_} || $_, @{ $self->_sorted_field_names },
+ if wantarray;
+ return keys %$self;
+}
+
+
+sub scan
+{
+ my($self, $sub) = @_;
+ my $key;
+ for $key (@{ $self->_sorted_field_names }) {
+ next if substr($key, 0, 1) eq '_';
+ my $vals = $self->{$key};
+ if (ref($vals) eq 'ARRAY') {
+ my $val;
+ for $val (@$vals) {
+ $sub->($standard_case{$key} || $key, $val);
+ }
+ }
+ else {
+ $sub->($standard_case{$key} || $key, $vals);
+ }
+ }
+}
+
+
+sub as_string
+{
+ my($self, $endl) = @_;
+ $endl = "\n" unless defined $endl;
+
+ my @result = ();
+ for my $key (@{ $self->_sorted_field_names }) {
+ next if index($key, '_') == 0;
+ my $vals = $self->{$key};
+ if ( ref($vals) eq 'ARRAY' ) {
+ for my $val (@$vals) {
+ my $field = $standard_case{$key} || $key;
+ $field =~ s/^://;
+ if ( index($val, "\n") >= 0 ) {
+ $val = _process_newline($val, $endl);
+ }
+ push @result, $field . ': ' . $val;
+ }
+ }
+ else {
+ my $field = $standard_case{$key} || $key;
+ $field =~ s/^://;
+ if ( index($vals, "\n") >= 0 ) {
+ $vals = _process_newline($vals, $endl);
+ }
+ push @result, $field . ': ' . $vals;
+ }
+ }
+
+ join($endl, @result, '');
+}
+
+sub _process_newline {
+ local $_ = shift;
+ my $endl = shift;
+ # must handle header values with embedded newlines with care
+ s/\s+$//; # trailing newlines and space must go
+ s/\n(\x0d?\n)+/\n/g; # no empty lines
+ s/\n([^\040\t])/\n $1/g; # intial space for continuation
+ s/\n/$endl/g; # substitute with requested line ending
+ $_;
+}
+
+
+
+if (eval { require Storable; 1 }) {
+ *clone = \&Storable::dclone;
+} else {
+ *clone = sub {
+ my $self = shift;
+ my $clone = HTTP::Headers->new;
+ $self->scan(sub { $clone->push_header(@_);} );
+ $clone;
+ };
+}
+
+
+sub _date_header
+{
+ require HTTP::Date;
+ my($self, $header, $time) = @_;
+ my($old) = $self->_header($header);
+ if (defined $time) {
+ $self->_header($header, HTTP::Date::time2str($time));
+ }
+ $old =~ s/;.*// if defined($old);
+ HTTP::Date::str2time($old);
+}
+
+
+sub date { shift->_date_header('Date', @_); }
+sub expires { shift->_date_header('Expires', @_); }
+sub if_modified_since { shift->_date_header('If-Modified-Since', @_); }
+sub if_unmodified_since { shift->_date_header('If-Unmodified-Since', @_); }
+sub last_modified { shift->_date_header('Last-Modified', @_); }
+
+# This is used as a private LWP extension. The Client-Date header is
+# added as a timestamp to a response when it has been received.
+sub client_date { shift->_date_header('Client-Date', @_); }
+
+# The retry_after field is dual format (can also be a expressed as
+# number of seconds from now), so we don't provide an easy way to
+# access it until we have know how both these interfaces can be
+# addressed. One possibility is to return a negative value for
+# relative seconds and a positive value for epoch based time values.
+#sub retry_after { shift->_date_header('Retry-After', @_); }
+
+sub content_type {
+ my $self = shift;
+ my $ct = $self->{'content-type'};
+ $self->{'content-type'} = shift if @_;
+ $ct = $ct->[0] if ref($ct) eq 'ARRAY';
+ return '' unless defined($ct) && length($ct);
+ my @ct = split(/;\s*/, $ct, 2);
+ for ($ct[0]) {
+ s/\s+//g;
+ $_ = lc($_);
+ }
+ wantarray ? @ct : $ct[0];
+}
+
+sub content_type_charset {
+ my $self = shift;
+ require HTTP::Headers::Util;
+ my $h = $self->{'content-type'};
+ $h = $h->[0] if ref($h);
+ $h = "" unless defined $h;
+ my @v = HTTP::Headers::Util::split_header_words($h);
+ if (@v) {
+ my($ct, undef, %ct_param) = @{$v[0]};
+ my $charset = $ct_param{charset};
+ if ($ct) {
+ $ct = lc($ct);
+ $ct =~ s/\s+//;
+ }
+ if ($charset) {
+ $charset = uc($charset);
+ $charset =~ s/^\s+//; $charset =~ s/\s+\z//;
+ undef($charset) if $charset eq "";
+ }
+ return $ct, $charset if wantarray;
+ return $charset;
+ }
+ return undef, undef if wantarray;
+ return undef;
+}
+
+sub content_is_text {
+ my $self = shift;
+ return $self->content_type =~ m,^text/,;
+}
+
+sub content_is_html {
+ my $self = shift;
+ return $self->content_type eq 'text/html' || $self->content_is_xhtml;
+}
+
+sub content_is_xhtml {
+ my $ct = shift->content_type;
+ return $ct eq "application/xhtml+xml" ||
+ $ct eq "application/vnd.wap.xhtml+xml";
+}
+
+sub content_is_xml {
+ my $ct = shift->content_type;
+ return 1 if $ct eq "text/xml";
+ return 1 if $ct eq "application/xml";
+ return 1 if $ct =~ /\+xml$/;
+ return 0;
+}
+
+sub referer {
+ my $self = shift;
+ if (@_ && $_[0] =~ /#/) {
+ # Strip fragment per RFC 2616, section 14.36.
+ my $uri = shift;
+ if (ref($uri)) {
+ $uri = $uri->clone;
+ $uri->fragment(undef);
+ }
+ else {
+ $uri =~ s/\#.*//;
+ }
+ unshift @_, $uri;
+ }
+ ($self->_header('Referer', @_))[0];
+}
+*referrer = \&referer; # on tchrist's request
+
+sub title { (shift->_header('Title', @_))[0] }
+sub content_encoding { (shift->_header('Content-Encoding', @_))[0] }
+sub content_language { (shift->_header('Content-Language', @_))[0] }
+sub content_length { (shift->_header('Content-Length', @_))[0] }
+
+sub user_agent { (shift->_header('User-Agent', @_))[0] }
+sub server { (shift->_header('Server', @_))[0] }
+
+sub from { (shift->_header('From', @_))[0] }
+sub warning { (shift->_header('Warning', @_))[0] }
+
+sub www_authenticate { (shift->_header('WWW-Authenticate', @_))[0] }
+sub authorization { (shift->_header('Authorization', @_))[0] }
+
+sub proxy_authenticate { (shift->_header('Proxy-Authenticate', @_))[0] }
+sub proxy_authorization { (shift->_header('Proxy-Authorization', @_))[0] }
+
+sub authorization_basic { shift->_basic_auth("Authorization", @_) }
+sub proxy_authorization_basic { shift->_basic_auth("Proxy-Authorization", @_) }
+
+sub _basic_auth {
+ require MIME::Base64;
+ my($self, $h, $user, $passwd) = @_;
+ my($old) = $self->_header($h);
+ if (defined $user) {
+ Carp::croak("Basic authorization user name can't contain ':'")
+ if $user =~ /:/;
+ $passwd = '' unless defined $passwd;
+ $self->_header($h => 'Basic ' .
+ MIME::Base64::encode("$user:$passwd", ''));
+ }
+ if (defined $old && $old =~ s/^\s*Basic\s+//) {
+ my $val = MIME::Base64::decode($old);
+ return $val unless wantarray;
+ return split(/:/, $val, 2);
+ }
+ return;
+}
+
+
+1;
+
+__END__
+
+=head1 NAME
+
+HTTP::Headers - Class encapsulating HTTP Message headers
+
+=head1 SYNOPSIS
+
+ require HTTP::Headers;
+ $h = HTTP::Headers->new;
+
+ $h->header('Content-Type' => 'text/plain'); # set
+ $ct = $h->header('Content-Type'); # get
+ $h->remove_header('Content-Type'); # delete
+
+=head1 DESCRIPTION
+
+The C<HTTP::Headers> class encapsulates HTTP-style message headers.
+The headers consist of attribute-value pairs also called fields, which
+may be repeated, and which are printed in a particular order. The
+field names are cases insensitive.
+
+Instances of this class are usually created as member variables of the
+C<HTTP::Request> and C<HTTP::Response> classes, internal to the
+library.
+
+The following methods are available:
+
+=over 4
+
+=item $h = HTTP::Headers->new
+
+Constructs a new C<HTTP::Headers> object. You might pass some initial
+attribute-value pairs as parameters to the constructor. I<E.g.>:
+
+ $h = HTTP::Headers->new(
+ Date => 'Thu, 03 Feb 1994 00:00:00 GMT',
+ Content_Type => 'text/html; version=3.2',
+ Content_Base => 'http://www.perl.org/');
+
+The constructor arguments are passed to the C<header> method which is
+described below.
+
+=item $h->clone
+
+Returns a copy of this C<HTTP::Headers> object.
+
+=item $h->header( $field )
+
+=item $h->header( $field => $value )
+
+=item $h->header( $f1 => $v1, $f2 => $v2, ... )
+
+Get or set the value of one or more header fields. The header field
+name ($field) is not case sensitive. To make the life easier for perl
+users who wants to avoid quoting before the => operator, you can use
+'_' as a replacement for '-' in header names.
+
+The header() method accepts multiple ($field => $value) pairs, which
+means that you can update several fields with a single invocation.
+
+The $value argument may be a plain string or a reference to an array
+of strings for a multi-valued field. If the $value is provided as
+C<undef> then the field is removed. If the $value is not given, then
+that header field will remain unchanged.
+
+The old value (or values) of the last of the header fields is returned.
+If no such field exists C<undef> will be returned.
+
+A multi-valued field will be returned as separate values in list
+context and will be concatenated with ", " as separator in scalar
+context. The HTTP spec (RFC 2616) promise that joining multiple
+values in this way will not change the semantic of a header field, but
+in practice there are cases like old-style Netscape cookies (see
+L<HTTP::Cookies>) where "," is used as part of the syntax of a single
+field value.
+
+Examples:
+
+ $header->header(MIME_Version => '1.0',
+ User_Agent => 'My-Web-Client/0.01');
+ $header->header(Accept => "text/html, text/plain, image/*");
+ $header->header(Accept => [qw(text/html text/plain image/*)]);
+ @accepts = $header->header('Accept'); # get multiple values
+ $accepts = $header->header('Accept'); # get values as a single string
+
+=item $h->push_header( $field => $value )
+
+=item $h->push_header( $f1 => $v1, $f2 => $v2, ... )
+
+Add a new field value for the specified header field. Previous values
+for the same field are retained.
+
+As for the header() method, the field name ($field) is not case
+sensitive and '_' can be used as a replacement for '-'.
+
+The $value argument may be a scalar or a reference to a list of
+scalars.
+
+ $header->push_header(Accept => 'image/jpeg');
+ $header->push_header(Accept => [map "image/$_", qw(gif png tiff)]);
+
+=item $h->init_header( $field => $value )
+
+Set the specified header to the given value, but only if no previous
+value for that field is set.
+
+The header field name ($field) is not case sensitive and '_'
+can be used as a replacement for '-'.
+
+The $value argument may be a scalar or a reference to a list of
+scalars.
+
+=item $h->remove_header( $field, ... )
+
+This function removes the header fields with the specified names.
+
+The header field names ($field) are not case sensitive and '_'
+can be used as a replacement for '-'.
+
+The return value is the values of the fields removed. In scalar
+context the number of fields removed is returned.
+
+Note that if you pass in multiple field names then it is generally not
+possible to tell which of the returned values belonged to which field.
+
+=item $h->remove_content_headers
+
+This will remove all the header fields used to describe the content of
+a message. All header field names prefixed with C<Content-> fall
+into this category, as well as C<Allow>, C<Expires> and
+C<Last-Modified>. RFC 2616 denotes these fields as I<Entity Header
+Fields>.
+
+The return value is a new C<HTTP::Headers> object that contains the
+removed headers only.
+
+=item $h->clear
+
+This will remove all header fields.
+
+=item $h->header_field_names
+
+Returns the list of distinct names for the fields present in the
+header. The field names have case as suggested by HTTP spec, and the
+names are returned in the recommended "Good Practice" order.
+
+In scalar context return the number of distinct field names.
+
+=item $h->scan( \&process_header_field )
+
+Apply a subroutine to each header field in turn. The callback routine
+is called with two parameters; the name of the field and a single
+value (a string). If a header field is multi-valued, then the
+routine is called once for each value. The field name passed to the
+callback routine has case as suggested by HTTP spec, and the headers
+will be visited in the recommended "Good Practice" order.
+
+Any return values of the callback routine are ignored. The loop can
+be broken by raising an exception (C<die>), but the caller of scan()
+would have to trap the exception itself.
+
+=item $h->as_string
+
+=item $h->as_string( $eol )
+
+Return the header fields as a formatted MIME header. Since it
+internally uses the C<scan> method to build the string, the result
+will use case as suggested by HTTP spec, and it will follow
+recommended "Good Practice" of ordering the header fields. Long header
+values are not folded.
+
+The optional $eol parameter specifies the line ending sequence to
+use. The default is "\n". Embedded "\n" characters in header field
+values will be substituted with this line ending sequence.
+
+=back
+
+=head1 CONVENIENCE METHODS
+
+The most frequently used headers can also be accessed through the
+following convenience methods. Most of these methods can both be used to read
+and to set the value of a header. The header value is set if you pass
+an argument to the method. The old header value is always returned.
+If the given header did not exist then C<undef> is returned.
+
+Methods that deal with dates/times always convert their value to system
+time (seconds since Jan 1, 1970) and they also expect this kind of
+value when the header value is set.
+
+=over 4
+
+=item $h->date
+
+This header represents the date and time at which the message was
+originated. I<E.g.>:
+
+ $h->date(time); # set current date
+
+=item $h->expires
+
+This header gives the date and time after which the entity should be
+considered stale.
+
+=item $h->if_modified_since
+
+=item $h->if_unmodified_since
+
+These header fields are used to make a request conditional. If the requested
+resource has (or has not) been modified since the time specified in this field,
+then the server will return a C<304 Not Modified> response instead of
+the document itself.
+
+=item $h->last_modified
+
+This header indicates the date and time at which the resource was last
+modified. I<E.g.>:
+
+ # check if document is more than 1 hour old
+ if (my $last_mod = $h->last_modified) {
+ if ($last_mod < time - 60*60) {
+ ...
+ }
+ }
+
+=item $h->content_type
+
+The Content-Type header field indicates the media type of the message
+content. I<E.g.>:
+
+ $h->content_type('text/html');
+
+The value returned will be converted to lower case, and potential
+parameters will be chopped off and returned as a separate value if in
+an array context. If there is no such header field, then the empty
+string is returned. This makes it safe to do the following:
+
+ if ($h->content_type eq 'text/html') {
+ # we enter this place even if the real header value happens to
+ # be 'TEXT/HTML; version=3.0'
+ ...
+ }
+
+=item $h->content_type_charset
+
+Returns the upper-cased charset specified in the Content-Type header. In list
+context return the lower-cased bare content type followed by the upper-cased
+charset. Both values will be C<undef> if not specified in the header.
+
+=item $h->content_is_text
+
+Returns TRUE if the Content-Type header field indicate that the
+content is textual.
+
+=item $h->content_is_html
+
+Returns TRUE if the Content-Type header field indicate that the
+content is some kind of HTML (including XHTML). This method can't be
+used to set Content-Type.
+
+=item $h->content_is_xhtml
+
+Returns TRUE if the Content-Type header field indicate that the
+content is XHTML. This method can't be used to set Content-Type.
+
+=item $h->content_is_xml
+
+Returns TRUE if the Content-Type header field indicate that the
+content is XML. This method can't be used to set Content-Type.
+
+=item $h->content_encoding
+
+The Content-Encoding header field is used as a modifier to the
+media type. When present, its value indicates what additional
+encoding mechanism has been applied to the resource.
+
+=item $h->content_length
+
+A decimal number indicating the size in bytes of the message content.
+
+=item $h->content_language
+
+The natural language(s) of the intended audience for the message
+content. The value is one or more language tags as defined by RFC
+1766. Eg. "no" for some kind of Norwegian and "en-US" for English the
+way it is written in the US.
+
+=item $h->title
+
+The title of the document. In libwww-perl this header will be
+initialized automatically from the E<lt>TITLE>...E<lt>/TITLE> element
+of HTML documents. I<This header is no longer part of the HTTP
+standard.>
+
+=item $h->user_agent
+
+This header field is used in request messages and contains information
+about the user agent originating the request. I<E.g.>:
+
+ $h->user_agent('Mozilla/5.0 (compatible; MSIE 7.0; Windows NT 6.0)');
+
+=item $h->server
+
+The server header field contains information about the software being
+used by the originating server program handling the request.
+
+=item $h->from
+
+This header should contain an Internet e-mail address for the human
+user who controls the requesting user agent. The address should be
+machine-usable, as defined by RFC822. E.g.:
+
+ $h->from('King Kong <king@kong.com>');
+
+I<This header is no longer part of the HTTP standard.>
+
+=item $h->referer
+
+Used to specify the address (URI) of the document from which the
+requested resource address was obtained.
+
+The "Free On-line Dictionary of Computing" as this to say about the
+word I<referer>:
+
+ <World-Wide Web> A misspelling of "referrer" which
+ somehow made it into the {HTTP} standard. A given {web
+ page}'s referer (sic) is the {URL} of whatever web page
+ contains the link that the user followed to the current
+ page. Most browsers pass this information as part of a
+ request.
+
+ (1998-10-19)
+
+By popular demand C<referrer> exists as an alias for this method so you
+can avoid this misspelling in your programs and still send the right
+thing on the wire.
+
+When setting the referrer, this method removes the fragment from the
+given URI if it is present, as mandated by RFC2616. Note that
+the removal does I<not> happen automatically if using the header(),
+push_header() or init_header() methods to set the referrer.
+
+=item $h->www_authenticate
+
+This header must be included as part of a C<401 Unauthorized> response.
+The field value consist of a challenge that indicates the
+authentication scheme and parameters applicable to the requested URI.
+
+=item $h->proxy_authenticate
+
+This header must be included in a C<407 Proxy Authentication Required>
+response.
+
+=item $h->authorization
+
+=item $h->proxy_authorization
+
+A user agent that wishes to authenticate itself with a server or a
+proxy, may do so by including these headers.
+
+=item $h->authorization_basic
+
+This method is used to get or set an authorization header that use the
+"Basic Authentication Scheme". In array context it will return two
+values; the user name and the password. In scalar context it will
+return I<"uname:password"> as a single string value.
+
+When used to set the header value, it expects two arguments. I<E.g.>:
+
+ $h->authorization_basic($uname, $password);
+
+The method will croak if the $uname contains a colon ':'.
+
+=item $h->proxy_authorization_basic
+
+Same as authorization_basic() but will set the "Proxy-Authorization"
+header instead.
+
+=back
+
+=head1 NON-CANONICALIZED FIELD NAMES
+
+The header field name spelling is normally canonicalized including the
+'_' to '-' translation. There are some application where this is not
+appropriate. Prefixing field names with ':' allow you to force a
+specific spelling. For example if you really want a header field name
+to show up as C<foo_bar> instead of "Foo-Bar", you might set it like
+this:
+
+ $h->header(":foo_bar" => 1);
+
+These field names are returned with the ':' intact for
+$h->header_field_names and the $h->scan callback, but the colons do
+not show in $h->as_string.
+
+=head1 COPYRIGHT
+
+Copyright 1995-2005 Gisle Aas.
+
+This library is free software; you can redistribute it and/or
+modify it under the same terms as Perl itself.
+
--- /dev/null
+package HTTP::Headers::Auth;
+
+use strict;
+use vars qw($VERSION);
+$VERSION = "6.00";
+
+use HTTP::Headers;
+
+package HTTP::Headers;
+
+BEGIN {
+ # we provide a new (and better) implementations below
+ undef(&www_authenticate);
+ undef(&proxy_authenticate);
+}
+
+require HTTP::Headers::Util;
+
+sub _parse_authenticate
+{
+ my @ret;
+ for (HTTP::Headers::Util::split_header_words(@_)) {
+ if (!defined($_->[1])) {
+ # this is a new auth scheme
+ push(@ret, shift(@$_) => {});
+ shift @$_;
+ }
+ if (@ret) {
+ # this a new parameter pair for the last auth scheme
+ while (@$_) {
+ my $k = shift @$_;
+ my $v = shift @$_;
+ $ret[-1]{$k} = $v;
+ }
+ }
+ else {
+ # something wrong, parameter pair without any scheme seen
+ # IGNORE
+ }
+ }
+ @ret;
+}
+
+sub _authenticate
+{
+ my $self = shift;
+ my $header = shift;
+ my @old = $self->_header($header);
+ if (@_) {
+ $self->remove_header($header);
+ my @new = @_;
+ while (@new) {
+ my $a_scheme = shift(@new);
+ if ($a_scheme =~ /\s/) {
+ # assume complete valid value, pass it through
+ $self->push_header($header, $a_scheme);
+ }
+ else {
+ my @param;
+ if (@new) {
+ my $p = $new[0];
+ if (ref($p) eq "ARRAY") {
+ @param = @$p;
+ shift(@new);
+ }
+ elsif (ref($p) eq "HASH") {
+ @param = %$p;
+ shift(@new);
+ }
+ }
+ my $val = ucfirst(lc($a_scheme));
+ if (@param) {
+ my $sep = " ";
+ while (@param) {
+ my $k = shift @param;
+ my $v = shift @param;
+ if ($v =~ /[^0-9a-zA-Z]/ || lc($k) eq "realm") {
+ # must quote the value
+ $v =~ s,([\\\"]),\\$1,g;
+ $v = qq("$v");
+ }
+ $val .= "$sep$k=$v";
+ $sep = ", ";
+ }
+ }
+ $self->push_header($header, $val);
+ }
+ }
+ }
+ return unless defined wantarray;
+ wantarray ? _parse_authenticate(@old) : join(", ", @old);
+}
+
+
+sub www_authenticate { shift->_authenticate("WWW-Authenticate", @_) }
+sub proxy_authenticate { shift->_authenticate("Proxy-Authenticate", @_) }
+
+1;
--- /dev/null
+package HTTP::Headers::ETag;
+
+use strict;
+use vars qw($VERSION);
+$VERSION = "6.00";
+
+require HTTP::Date;
+
+require HTTP::Headers;
+package HTTP::Headers;
+
+sub _etags
+{
+ my $self = shift;
+ my $header = shift;
+ my @old = _split_etag_list($self->_header($header));
+ if (@_) {
+ $self->_header($header => join(", ", _split_etag_list(@_)));
+ }
+ wantarray ? @old : join(", ", @old);
+}
+
+sub etag { shift->_etags("ETag", @_); }
+sub if_match { shift->_etags("If-Match", @_); }
+sub if_none_match { shift->_etags("If-None-Match", @_); }
+
+sub if_range {
+ # Either a date or an entity-tag
+ my $self = shift;
+ my @old = $self->_header("If-Range");
+ if (@_) {
+ my $new = shift;
+ if (!defined $new) {
+ $self->remove_header("If-Range");
+ }
+ elsif ($new =~ /^\d+$/) {
+ $self->_date_header("If-Range", $new);
+ }
+ else {
+ $self->_etags("If-Range", $new);
+ }
+ }
+ return unless defined(wantarray);
+ for (@old) {
+ my $t = HTTP::Date::str2time($_);
+ $_ = $t if $t;
+ }
+ wantarray ? @old : join(", ", @old);
+}
+
+
+# Split a list of entity tag values. The return value is a list
+# consisting of one element per entity tag. Suitable for parsing
+# headers like C<If-Match>, C<If-None-Match>. You might even want to
+# use it on C<ETag> and C<If-Range> entity tag values, because it will
+# normalize them to the common form.
+#
+# entity-tag = [ weak ] opaque-tag
+# weak = "W/"
+# opaque-tag = quoted-string
+
+
+sub _split_etag_list
+{
+ my(@val) = @_;
+ my @res;
+ for (@val) {
+ while (length) {
+ my $weak = "";
+ $weak = "W/" if s,^\s*[wW]/,,;
+ my $etag = "";
+ if (s/^\s*(\"[^\"\\]*(?:\\.[^\"\\]*)*\")//) {
+ push(@res, "$weak$1");
+ }
+ elsif (s/^\s*,//) {
+ push(@res, qq(W/"")) if $weak;
+ }
+ elsif (s/^\s*([^,\s]+)//) {
+ $etag = $1;
+ $etag =~ s/([\"\\])/\\$1/g;
+ push(@res, qq($weak"$etag"));
+ }
+ elsif (s/^\s+// || !length) {
+ push(@res, qq(W/"")) if $weak;
+ }
+ else {
+ die "This should not happen: '$_'";
+ }
+ }
+ }
+ @res;
+}
+
+1;
--- /dev/null
+package HTTP::Headers::Util;
+
+use strict;
+use vars qw($VERSION @ISA @EXPORT_OK);
+
+$VERSION = "6.03";
+
+require Exporter;
+@ISA=qw(Exporter);
+
+@EXPORT_OK=qw(split_header_words _split_header_words join_header_words);
+
+
+
+sub split_header_words {
+ my @res = &_split_header_words;
+ for my $arr (@res) {
+ for (my $i = @$arr - 2; $i >= 0; $i -= 2) {
+ $arr->[$i] = lc($arr->[$i]);
+ }
+ }
+ return @res;
+}
+
+sub _split_header_words
+{
+ my(@val) = @_;
+ my @res;
+ for (@val) {
+ my @cur;
+ while (length) {
+ if (s/^\s*(=*[^\s=;,]+)//) { # 'token' or parameter 'attribute'
+ push(@cur, $1);
+ # a quoted value
+ if (s/^\s*=\s*\"([^\"\\]*(?:\\.[^\"\\]*)*)\"//) {
+ my $val = $1;
+ $val =~ s/\\(.)/$1/g;
+ push(@cur, $val);
+ # some unquoted value
+ }
+ elsif (s/^\s*=\s*([^;,\s]*)//) {
+ my $val = $1;
+ $val =~ s/\s+$//;
+ push(@cur, $val);
+ # no value, a lone token
+ }
+ else {
+ push(@cur, undef);
+ }
+ }
+ elsif (s/^\s*,//) {
+ push(@res, [@cur]) if @cur;
+ @cur = ();
+ }
+ elsif (s/^\s*;// || s/^\s+//) {
+ # continue
+ }
+ else {
+ die "This should not happen: '$_'";
+ }
+ }
+ push(@res, \@cur) if @cur;
+ }
+ @res;
+}
+
+
+sub join_header_words
+{
+ @_ = ([@_]) if @_ && !ref($_[0]);
+ my @res;
+ for (@_) {
+ my @cur = @$_;
+ my @attr;
+ while (@cur) {
+ my $k = shift @cur;
+ my $v = shift @cur;
+ if (defined $v) {
+ if ($v =~ /[\x00-\x20()<>@,;:\\\"\/\[\]?={}\x7F-\xFF]/ || !length($v)) {
+ $v =~ s/([\"\\])/\\$1/g; # escape " and \
+ $k .= qq(="$v");
+ }
+ else {
+ # token
+ $k .= "=$v";
+ }
+ }
+ push(@attr, $k);
+ }
+ push(@res, join("; ", @attr)) if @attr;
+ }
+ join(", ", @res);
+}
+
+
+1;
+
+__END__
+
+=head1 NAME
+
+HTTP::Headers::Util - Header value parsing utility functions
+
+=head1 SYNOPSIS
+
+ use HTTP::Headers::Util qw(split_header_words);
+ @values = split_header_words($h->header("Content-Type"));
+
+=head1 DESCRIPTION
+
+This module provides a few functions that helps parsing and
+construction of valid HTTP header values. None of the functions are
+exported by default.
+
+The following functions are available:
+
+=over 4
+
+
+=item split_header_words( @header_values )
+
+This function will parse the header values given as argument into a
+list of anonymous arrays containing key/value pairs. The function
+knows how to deal with ",", ";" and "=" as well as quoted values after
+"=". A list of space separated tokens are parsed as if they were
+separated by ";".
+
+If the @header_values passed as argument contains multiple values,
+then they are treated as if they were a single value separated by
+comma ",".
+
+This means that this function is useful for parsing header fields that
+follow this syntax (BNF as from the HTTP/1.1 specification, but we relax
+the requirement for tokens).
+
+ headers = #header
+ header = (token | parameter) *( [";"] (token | parameter))
+
+ token = 1*<any CHAR except CTLs or separators>
+ separators = "(" | ")" | "<" | ">" | "@"
+ | "," | ";" | ":" | "\" | <">
+ | "/" | "[" | "]" | "?" | "="
+ | "{" | "}" | SP | HT
+
+ quoted-string = ( <"> *(qdtext | quoted-pair ) <"> )
+ qdtext = <any TEXT except <">>
+ quoted-pair = "\" CHAR
+
+ parameter = attribute "=" value
+ attribute = token
+ value = token | quoted-string
+
+Each I<header> is represented by an anonymous array of key/value
+pairs. The keys will be all be forced to lower case.
+The value for a simple token (not part of a parameter) is C<undef>.
+Syntactically incorrect headers will not necessarily be parsed as you
+would want.
+
+This is easier to describe with some examples:
+
+ split_header_words('foo="bar"; port="80,81"; DISCARD, BAR=baz');
+ split_header_words('text/html; charset="iso-8859-1"');
+ split_header_words('Basic realm="\\"foo\\\\bar\\""');
+
+will return
+
+ [foo=>'bar', port=>'80,81', discard=> undef], [bar=>'baz' ]
+ ['text/html' => undef, charset => 'iso-8859-1']
+ [basic => undef, realm => "\"foo\\bar\""]
+
+If you don't want the function to convert tokens and attribute keys to
+lower case you can call it as C<_split_header_words> instead (with a
+leading underscore).
+
+=item join_header_words( @arrays )
+
+This will do the opposite of the conversion done by split_header_words().
+It takes a list of anonymous arrays as arguments (or a list of
+key/value pairs) and produces a single header value. Attribute values
+are quoted if needed.
+
+Example:
+
+ join_header_words(["text/plain" => undef, charset => "iso-8859/1"]);
+ join_header_words("text/plain" => undef, charset => "iso-8859/1");
+
+will both return the string:
+
+ text/plain; charset="iso-8859/1"
+
+=back
+
+=head1 COPYRIGHT
+
+Copyright 1997-1998, Gisle Aas
+
+This library is free software; you can redistribute it and/or
+modify it under the same terms as Perl itself.
+
--- /dev/null
+package HTTP::Message;
+
+use strict;
+use vars qw($VERSION $AUTOLOAD);
+$VERSION = "6.03";
+
+require HTTP::Headers;
+require Carp;
+
+my $CRLF = "\015\012"; # "\r\n" is not portable
+$HTTP::URI_CLASS ||= $ENV{PERL_HTTP_URI_CLASS} || "URI";
+eval "require $HTTP::URI_CLASS"; die $@ if $@;
+
+*_utf8_downgrade = defined(&utf8::downgrade) ?
+ sub {
+ utf8::downgrade($_[0], 1) or
+ Carp::croak("HTTP::Message content must be bytes")
+ }
+ :
+ sub {
+ };
+
+sub new
+{
+ my($class, $header, $content) = @_;
+ if (defined $header) {
+ Carp::croak("Bad header argument") unless ref $header;
+ if (ref($header) eq "ARRAY") {
+ $header = HTTP::Headers->new(@$header);
+ }
+ else {
+ $header = $header->clone;
+ }
+ }
+ else {
+ $header = HTTP::Headers->new;
+ }
+ if (defined $content) {
+ _utf8_downgrade($content);
+ }
+ else {
+ $content = '';
+ }
+
+ bless {
+ '_headers' => $header,
+ '_content' => $content,
+ }, $class;
+}
+
+
+sub parse
+{
+ my($class, $str) = @_;
+
+ my @hdr;
+ while (1) {
+ if ($str =~ s/^([^\s:]+)[ \t]*: ?(.*)\n?//) {
+ push(@hdr, $1, $2);
+ $hdr[-1] =~ s/\r\z//;
+ }
+ elsif (@hdr && $str =~ s/^([ \t].*)\n?//) {
+ $hdr[-1] .= "\n$1";
+ $hdr[-1] =~ s/\r\z//;
+ }
+ else {
+ $str =~ s/^\r?\n//;
+ last;
+ }
+ }
+ local $HTTP::Headers::TRANSLATE_UNDERSCORE;
+ new($class, \@hdr, $str);
+}
+
+
+sub clone
+{
+ my $self = shift;
+ my $clone = HTTP::Message->new($self->headers,
+ $self->content);
+ $clone->protocol($self->protocol);
+ $clone;
+}
+
+
+sub clear {
+ my $self = shift;
+ $self->{_headers}->clear;
+ $self->content("");
+ delete $self->{_parts};
+ return;
+}
+
+
+sub protocol {
+ shift->_elem('_protocol', @_);
+}
+
+sub headers {
+ my $self = shift;
+
+ # recalculation of _content might change headers, so we
+ # need to force it now
+ $self->_content unless exists $self->{_content};
+
+ $self->{_headers};
+}
+
+sub headers_as_string {
+ shift->headers->as_string(@_);
+}
+
+
+sub content {
+
+ my $self = $_[0];
+ if (defined(wantarray)) {
+ $self->_content unless exists $self->{_content};
+ my $old = $self->{_content};
+ $old = $$old if ref($old) eq "SCALAR";
+ &_set_content if @_ > 1;
+ return $old;
+ }
+
+ if (@_ > 1) {
+ &_set_content;
+ }
+ else {
+ Carp::carp("Useless content call in void context") if $^W;
+ }
+}
+
+
+sub _set_content {
+ my $self = $_[0];
+ _utf8_downgrade($_[1]);
+ if (!ref($_[1]) && ref($self->{_content}) eq "SCALAR") {
+ ${$self->{_content}} = $_[1];
+ }
+ else {
+ die "Can't set content to be a scalar reference" if ref($_[1]) eq "SCALAR";
+ $self->{_content} = $_[1];
+ delete $self->{_content_ref};
+ }
+ delete $self->{_parts} unless $_[2];
+}
+
+
+sub add_content
+{
+ my $self = shift;
+ $self->_content unless exists $self->{_content};
+ my $chunkref = \$_[0];
+ $chunkref = $$chunkref if ref($$chunkref); # legacy
+
+ _utf8_downgrade($$chunkref);
+
+ my $ref = ref($self->{_content});
+ if (!$ref) {
+ $self->{_content} .= $$chunkref;
+ }
+ elsif ($ref eq "SCALAR") {
+ ${$self->{_content}} .= $$chunkref;
+ }
+ else {
+ Carp::croak("Can't append to $ref content");
+ }
+ delete $self->{_parts};
+}
+
+sub add_content_utf8 {
+ my($self, $buf) = @_;
+ utf8::upgrade($buf);
+ utf8::encode($buf);
+ $self->add_content($buf);
+}
+
+sub content_ref
+{
+ my $self = shift;
+ $self->_content unless exists $self->{_content};
+ delete $self->{_parts};
+ my $old = \$self->{_content};
+ my $old_cref = $self->{_content_ref};
+ if (@_) {
+ my $new = shift;
+ Carp::croak("Setting content_ref to a non-ref") unless ref($new);
+ delete $self->{_content}; # avoid modifying $$old
+ $self->{_content} = $new;
+ $self->{_content_ref}++;
+ }
+ $old = $$old if $old_cref;
+ return $old;
+}
+
+
+sub content_charset
+{
+ my $self = shift;
+ if (my $charset = $self->content_type_charset) {
+ return $charset;
+ }
+
+ # time to start guessing
+ my $cref = $self->decoded_content(ref => 1, charset => "none");
+
+ # Unicode BOM
+ for ($$cref) {
+ return "UTF-8" if /^\xEF\xBB\xBF/;
+ return "UTF-32-LE" if /^\xFF\xFE\x00\x00/;
+ return "UTF-32-BE" if /^\x00\x00\xFE\xFF/;
+ return "UTF-16-LE" if /^\xFF\xFE/;
+ return "UTF-16-BE" if /^\xFE\xFF/;
+ }
+
+ if ($self->content_is_xml) {
+ # http://www.w3.org/TR/2006/REC-xml-20060816/#sec-guessing
+ # XML entity not accompanied by external encoding information and not
+ # in UTF-8 or UTF-16 encoding must begin with an XML encoding declaration,
+ # in which the first characters must be '<?xml'
+ for ($$cref) {
+ return "UTF-32-BE" if /^\x00\x00\x00</;
+ return "UTF-32-LE" if /^<\x00\x00\x00/;
+ return "UTF-16-BE" if /^(?:\x00\s)*\x00</;
+ return "UTF-16-LE" if /^(?:\s\x00)*<\x00/;
+ if (/^\s*(<\?xml[^\x00]*?\?>)/) {
+ if ($1 =~ /\sencoding\s*=\s*(["'])(.*?)\1/) {
+ my $enc = $2;
+ $enc =~ s/^\s+//; $enc =~ s/\s+\z//;
+ return $enc if $enc;
+ }
+ }
+ }
+ return "UTF-8";
+ }
+ elsif ($self->content_is_html) {
+ # look for <META charset="..."> or <META content="...">
+ # http://dev.w3.org/html5/spec/Overview.html#determining-the-character-encoding
+ my $charset;
+ require HTML::Parser;
+ my $p = HTML::Parser->new(
+ start_h => [sub {
+ my($tag, $attr, $self) = @_;
+ $charset = $attr->{charset};
+ unless ($charset) {
+ # look at $attr->{content} ...
+ if (my $c = $attr->{content}) {
+ require HTTP::Headers::Util;
+ my @v = HTTP::Headers::Util::split_header_words($c);
+ return unless @v;
+ my($ct, undef, %ct_param) = @{$v[0]};
+ $charset = $ct_param{charset};
+ }
+ return unless $charset;
+ }
+ if ($charset =~ /^utf-?16/i) {
+ # converted document, assume UTF-8
+ $charset = "UTF-8";
+ }
+ $self->eof;
+ }, "tagname, attr, self"],
+ report_tags => [qw(meta)],
+ utf8_mode => 1,
+ );
+ $p->parse($$cref);
+ return $charset if $charset;
+ }
+ if ($self->content_type =~ /^text\//) {
+ for ($$cref) {
+ if (length) {
+ return "US-ASCII" unless /[\x80-\xFF]/;
+ require Encode;
+ eval {
+ Encode::decode_utf8($_, Encode::FB_CROAK() | Encode::LEAVE_SRC());
+ };
+ return "UTF-8" unless $@;
+ return "ISO-8859-1";
+ }
+ }
+ }
+
+ return undef;
+}
+
+
+sub decoded_content
+{
+ my($self, %opt) = @_;
+ my $content_ref;
+ my $content_ref_iscopy;
+
+ eval {
+ $content_ref = $self->content_ref;
+ die "Can't decode ref content" if ref($content_ref) ne "SCALAR";
+
+ if (my $h = $self->header("Content-Encoding")) {
+ $h =~ s/^\s+//;
+ $h =~ s/\s+$//;
+ for my $ce (reverse split(/\s*,\s*/, lc($h))) {
+ next unless $ce;
+ next if $ce eq "identity";
+ if ($ce eq "gzip" || $ce eq "x-gzip") {
+ require IO::Uncompress::Gunzip;
+ my $output;
+ IO::Uncompress::Gunzip::gunzip($content_ref, \$output, Transparent => 0)
+ or die "Can't gunzip content: $IO::Uncompress::Gunzip::GunzipError";
+ $content_ref = \$output;
+ $content_ref_iscopy++;
+ }
+ elsif ($ce eq "x-bzip2" or $ce eq "bzip2") {
+ require IO::Uncompress::Bunzip2;
+ my $output;
+ IO::Uncompress::Bunzip2::bunzip2($content_ref, \$output, Transparent => 0)
+ or die "Can't bunzip content: $IO::Uncompress::Bunzip2::Bunzip2Error";
+ $content_ref = \$output;
+ $content_ref_iscopy++;
+ }
+ elsif ($ce eq "deflate") {
+ require IO::Uncompress::Inflate;
+ my $output;
+ my $status = IO::Uncompress::Inflate::inflate($content_ref, \$output, Transparent => 0);
+ my $error = $IO::Uncompress::Inflate::InflateError;
+ unless ($status) {
+ # "Content-Encoding: deflate" is supposed to mean the
+ # "zlib" format of RFC 1950, but Microsoft got that
+ # wrong, so some servers sends the raw compressed
+ # "deflate" data. This tries to inflate this format.
+ $output = undef;
+ require IO::Uncompress::RawInflate;
+ unless (IO::Uncompress::RawInflate::rawinflate($content_ref, \$output)) {
+ $self->push_header("Client-Warning" =>
+ "Could not raw inflate content: $IO::Uncompress::RawInflate::RawInflateError");
+ $output = undef;
+ }
+ }
+ die "Can't inflate content: $error" unless defined $output;
+ $content_ref = \$output;
+ $content_ref_iscopy++;
+ }
+ elsif ($ce eq "compress" || $ce eq "x-compress") {
+ die "Can't uncompress content";
+ }
+ elsif ($ce eq "base64") { # not really C-T-E, but should be harmless
+ require MIME::Base64;
+ $content_ref = \MIME::Base64::decode($$content_ref);
+ $content_ref_iscopy++;
+ }
+ elsif ($ce eq "quoted-printable") { # not really C-T-E, but should be harmless
+ require MIME::QuotedPrint;
+ $content_ref = \MIME::QuotedPrint::decode($$content_ref);
+ $content_ref_iscopy++;
+ }
+ else {
+ die "Don't know how to decode Content-Encoding '$ce'";
+ }
+ }
+ }
+
+ if ($self->content_is_text || (my $is_xml = $self->content_is_xml)) {
+ my $charset = lc(
+ $opt{charset} ||
+ $self->content_type_charset ||
+ $opt{default_charset} ||
+ $self->content_charset ||
+ "ISO-8859-1"
+ );
+ if ($charset eq "none") {
+ # leave it asis
+ }
+ elsif ($charset eq "us-ascii" || $charset eq "iso-8859-1") {
+ if ($$content_ref =~ /[^\x00-\x7F]/ && defined &utf8::upgrade) {
+ unless ($content_ref_iscopy) {
+ my $copy = $$content_ref;
+ $content_ref = \$copy;
+ $content_ref_iscopy++;
+ }
+ utf8::upgrade($$content_ref);
+ }
+ }
+ else {
+ require Encode;
+ eval {
+ $content_ref = \Encode::decode($charset, $$content_ref,
+ ($opt{charset_strict} ? Encode::FB_CROAK() : 0) | Encode::LEAVE_SRC());
+ };
+ if ($@) {
+ my $retried;
+ if ($@ =~ /^Unknown encoding/) {
+ my $alt_charset = lc($opt{alt_charset} || "");
+ if ($alt_charset && $charset ne $alt_charset) {
+ # Retry decoding with the alternative charset
+ $content_ref = \Encode::decode($alt_charset, $$content_ref,
+ ($opt{charset_strict} ? Encode::FB_CROAK() : 0) | Encode::LEAVE_SRC())
+ unless $alt_charset eq "none";
+ $retried++;
+ }
+ }
+ die unless $retried;
+ }
+ die "Encode::decode() returned undef improperly" unless defined $$content_ref;
+ if ($is_xml) {
+ # Get rid of the XML encoding declaration if present
+ $$content_ref =~ s/^\x{FEFF}//;
+ if ($$content_ref =~ /^(\s*<\?xml[^\x00]*?\?>)/) {
+ substr($$content_ref, 0, length($1)) =~ s/\sencoding\s*=\s*(["']).*?\1//;
+ }
+ }
+ }
+ }
+ };
+ if ($@) {
+ Carp::croak($@) if $opt{raise_error};
+ return undef;
+ }
+
+ return $opt{ref} ? $content_ref : $$content_ref;
+}
+
+
+sub decodable
+{
+ # should match the Content-Encoding values that decoded_content can deal with
+ my $self = shift;
+ my @enc;
+ # XXX preferably we should determine if the modules are available without loading
+ # them here
+ eval {
+ require IO::Uncompress::Gunzip;
+ push(@enc, "gzip", "x-gzip");
+ };
+ eval {
+ require IO::Uncompress::Inflate;
+ require IO::Uncompress::RawInflate;
+ push(@enc, "deflate");
+ };
+ eval {
+ require IO::Uncompress::Bunzip2;
+ push(@enc, "x-bzip2");
+ };
+ # we don't care about announcing the 'identity', 'base64' and
+ # 'quoted-printable' stuff
+ return wantarray ? @enc : join(", ", @enc);
+}
+
+
+sub decode
+{
+ my $self = shift;
+ return 1 unless $self->header("Content-Encoding");
+ if (defined(my $content = $self->decoded_content(charset => "none"))) {
+ $self->remove_header("Content-Encoding", "Content-Length", "Content-MD5");
+ $self->content($content);
+ return 1;
+ }
+ return 0;
+}
+
+
+sub encode
+{
+ my($self, @enc) = @_;
+
+ Carp::croak("Can't encode multipart/* messages") if $self->content_type =~ m,^multipart/,;
+ Carp::croak("Can't encode message/* messages") if $self->content_type =~ m,^message/,;
+
+ return 1 unless @enc; # nothing to do
+
+ my $content = $self->content;
+ for my $encoding (@enc) {
+ if ($encoding eq "identity") {
+ # nothing to do
+ }
+ elsif ($encoding eq "base64") {
+ require MIME::Base64;
+ $content = MIME::Base64::encode($content);
+ }
+ elsif ($encoding eq "gzip" || $encoding eq "x-gzip") {
+ require IO::Compress::Gzip;
+ my $output;
+ IO::Compress::Gzip::gzip(\$content, \$output, Minimal => 1)
+ or die "Can't gzip content: $IO::Compress::Gzip::GzipError";
+ $content = $output;
+ }
+ elsif ($encoding eq "deflate") {
+ require IO::Compress::Deflate;
+ my $output;
+ IO::Compress::Deflate::deflate(\$content, \$output)
+ or die "Can't deflate content: $IO::Compress::Deflate::DeflateError";
+ $content = $output;
+ }
+ elsif ($encoding eq "x-bzip2") {
+ require IO::Compress::Bzip2;
+ my $output;
+ IO::Compress::Bzip2::bzip2(\$content, \$output)
+ or die "Can't bzip2 content: $IO::Compress::Bzip2::Bzip2Error";
+ $content = $output;
+ }
+ elsif ($encoding eq "rot13") { # for the fun of it
+ $content =~ tr/A-Za-z/N-ZA-Mn-za-m/;
+ }
+ else {
+ return 0;
+ }
+ }
+ my $h = $self->header("Content-Encoding");
+ unshift(@enc, $h) if $h;
+ $self->header("Content-Encoding", join(", ", @enc));
+ $self->remove_header("Content-Length", "Content-MD5");
+ $self->content($content);
+ return 1;
+}
+
+
+sub as_string
+{
+ my($self, $eol) = @_;
+ $eol = "\n" unless defined $eol;
+
+ # The calculation of content might update the headers
+ # so we need to do that first.
+ my $content = $self->content;
+
+ return join("", $self->{'_headers'}->as_string($eol),
+ $eol,
+ $content,
+ (@_ == 1 && length($content) &&
+ $content !~ /\n\z/) ? "\n" : "",
+ );
+}
+
+
+sub dump
+{
+ my($self, %opt) = @_;
+ my $content = $self->content;
+ my $chopped = 0;
+ if (!ref($content)) {
+ my $maxlen = $opt{maxlength};
+ $maxlen = 512 unless defined($maxlen);
+ if ($maxlen && length($content) > $maxlen * 1.1 + 3) {
+ $chopped = length($content) - $maxlen;
+ $content = substr($content, 0, $maxlen) . "...";
+ }
+
+ $content =~ s/\\/\\\\/g;
+ $content =~ s/\t/\\t/g;
+ $content =~ s/\r/\\r/g;
+
+ # no need for 3 digits in escape for these
+ $content =~ s/([\0-\11\13-\037])(?!\d)/sprintf('\\%o',ord($1))/eg;
+
+ $content =~ s/([\0-\11\13-\037\177-\377])/sprintf('\\x%02X',ord($1))/eg;
+ $content =~ s/([^\12\040-\176])/sprintf('\\x{%X}',ord($1))/eg;
+
+ # remaining whitespace
+ $content =~ s/( +)\n/("\\40" x length($1)) . "\n"/eg;
+ $content =~ s/(\n+)\n/("\\n" x length($1)) . "\n"/eg;
+ $content =~ s/\n\z/\\n/;
+
+ my $no_content = "(no content)";
+ if ($content eq $no_content) {
+ # escape our $no_content marker
+ $content =~ s/^(.)/sprintf('\\x%02X',ord($1))/eg;
+ }
+ elsif ($content eq "") {
+ $content = "(no content)";
+ }
+ }
+
+ my @dump;
+ push(@dump, $opt{preheader}) if $opt{preheader};
+ push(@dump, $self->{_headers}->as_string, $content);
+ push(@dump, "(+ $chopped more bytes not shown)") if $chopped;
+
+ my $dump = join("\n", @dump, "");
+ $dump =~ s/^/$opt{prefix}/gm if $opt{prefix};
+
+ print $dump unless defined wantarray;
+ return $dump;
+}
+
+
+sub parts {
+ my $self = shift;
+ if (defined(wantarray) && (!exists $self->{_parts} || ref($self->{_content}) eq "SCALAR")) {
+ $self->_parts;
+ }
+ my $old = $self->{_parts};
+ if (@_) {
+ my @parts = map { ref($_) eq 'ARRAY' ? @$_ : $_ } @_;
+ my $ct = $self->content_type || "";
+ if ($ct =~ m,^message/,) {
+ Carp::croak("Only one part allowed for $ct content")
+ if @parts > 1;
+ }
+ elsif ($ct !~ m,^multipart/,) {
+ $self->remove_content_headers;
+ $self->content_type("multipart/mixed");
+ }
+ $self->{_parts} = \@parts;
+ _stale_content($self);
+ }
+ return @$old if wantarray;
+ return $old->[0];
+}
+
+sub add_part {
+ my $self = shift;
+ if (($self->content_type || "") !~ m,^multipart/,) {
+ my $p = HTTP::Message->new($self->remove_content_headers,
+ $self->content(""));
+ $self->content_type("multipart/mixed");
+ $self->{_parts} = [];
+ if ($p->headers->header_field_names || $p->content ne "") {
+ push(@{$self->{_parts}}, $p);
+ }
+ }
+ elsif (!exists $self->{_parts} || ref($self->{_content}) eq "SCALAR") {
+ $self->_parts;
+ }
+
+ push(@{$self->{_parts}}, @_);
+ _stale_content($self);
+ return;
+}
+
+sub _stale_content {
+ my $self = shift;
+ if (ref($self->{_content}) eq "SCALAR") {
+ # must recalculate now
+ $self->_content;
+ }
+ else {
+ # just invalidate cache
+ delete $self->{_content};
+ delete $self->{_content_ref};
+ }
+}
+
+
+# delegate all other method calls the the headers object.
+sub AUTOLOAD
+{
+ my $method = substr($AUTOLOAD, rindex($AUTOLOAD, '::')+2);
+
+ # We create the function here so that it will not need to be
+ # autoloaded the next time.
+ no strict 'refs';
+ *$method = sub { shift->headers->$method(@_) };
+ goto &$method;
+}
+
+
+sub DESTROY {} # avoid AUTOLOADing it
+
+
+# Private method to access members in %$self
+sub _elem
+{
+ my $self = shift;
+ my $elem = shift;
+ my $old = $self->{$elem};
+ $self->{$elem} = $_[0] if @_;
+ return $old;
+}
+
+
+# Create private _parts attribute from current _content
+sub _parts {
+ my $self = shift;
+ my $ct = $self->content_type;
+ if ($ct =~ m,^multipart/,) {
+ require HTTP::Headers::Util;
+ my @h = HTTP::Headers::Util::split_header_words($self->header("Content-Type"));
+ die "Assert" unless @h;
+ my %h = @{$h[0]};
+ if (defined(my $b = $h{boundary})) {
+ my $str = $self->content;
+ $str =~ s/\r?\n--\Q$b\E--\r?\n.*//s;
+ if ($str =~ s/(^|.*?\r?\n)--\Q$b\E\r?\n//s) {
+ $self->{_parts} = [map HTTP::Message->parse($_),
+ split(/\r?\n--\Q$b\E\r?\n/, $str)]
+ }
+ }
+ }
+ elsif ($ct eq "message/http") {
+ require HTTP::Request;
+ require HTTP::Response;
+ my $content = $self->content;
+ my $class = ($content =~ m,^(HTTP/.*)\n,) ?
+ "HTTP::Response" : "HTTP::Request";
+ $self->{_parts} = [$class->parse($content)];
+ }
+ elsif ($ct =~ m,^message/,) {
+ $self->{_parts} = [ HTTP::Message->parse($self->content) ];
+ }
+
+ $self->{_parts} ||= [];
+}
+
+
+# Create private _content attribute from current _parts
+sub _content {
+ my $self = shift;
+ my $ct = $self->{_headers}->header("Content-Type") || "multipart/mixed";
+ if ($ct =~ m,^\s*message/,i) {
+ _set_content($self, $self->{_parts}[0]->as_string($CRLF), 1);
+ return;
+ }
+
+ require HTTP::Headers::Util;
+ my @v = HTTP::Headers::Util::split_header_words($ct);
+ Carp::carp("Multiple Content-Type headers") if @v > 1;
+ @v = @{$v[0]};
+
+ my $boundary;
+ my $boundary_index;
+ for (my @tmp = @v; @tmp;) {
+ my($k, $v) = splice(@tmp, 0, 2);
+ if ($k eq "boundary") {
+ $boundary = $v;
+ $boundary_index = @v - @tmp - 1;
+ last;
+ }
+ }
+
+ my @parts = map $_->as_string($CRLF), @{$self->{_parts}};
+
+ my $bno = 0;
+ $boundary = _boundary() unless defined $boundary;
+ CHECK_BOUNDARY:
+ {
+ for (@parts) {
+ if (index($_, $boundary) >= 0) {
+ # must have a better boundary
+ $boundary = _boundary(++$bno);
+ redo CHECK_BOUNDARY;
+ }
+ }
+ }
+
+ if ($boundary_index) {
+ $v[$boundary_index] = $boundary;
+ }
+ else {
+ push(@v, boundary => $boundary);
+ }
+
+ $ct = HTTP::Headers::Util::join_header_words(@v);
+ $self->{_headers}->header("Content-Type", $ct);
+
+ _set_content($self, "--$boundary$CRLF" .
+ join("$CRLF--$boundary$CRLF", @parts) .
+ "$CRLF--$boundary--$CRLF",
+ 1);
+}
+
+
+sub _boundary
+{
+ my $size = shift || return "xYzZY";
+ require MIME::Base64;
+ my $b = MIME::Base64::encode(join("", map chr(rand(256)), 1..$size*3), "");
+ $b =~ s/[\W]/X/g; # ensure alnum only
+ $b;
+}
+
+
+1;
+
+
+__END__
+
+=head1 NAME
+
+HTTP::Message - HTTP style message (base class)
+
+=head1 SYNOPSIS
+
+ use base 'HTTP::Message';
+
+=head1 DESCRIPTION
+
+An C<HTTP::Message> object contains some headers and a content body.
+The following methods are available:
+
+=over 4
+
+=item $mess = HTTP::Message->new
+
+=item $mess = HTTP::Message->new( $headers )
+
+=item $mess = HTTP::Message->new( $headers, $content )
+
+This constructs a new message object. Normally you would want
+construct C<HTTP::Request> or C<HTTP::Response> objects instead.
+
+The optional $header argument should be a reference to an
+C<HTTP::Headers> object or a plain array reference of key/value pairs.
+If an C<HTTP::Headers> object is provided then a copy of it will be
+embedded into the constructed message, i.e. it will not be owned and
+can be modified afterwards without affecting the message.
+
+The optional $content argument should be a string of bytes.
+
+=item $mess = HTTP::Message->parse( $str )
+
+This constructs a new message object by parsing the given string.
+
+=item $mess->headers
+
+Returns the embedded C<HTTP::Headers> object.
+
+=item $mess->headers_as_string
+
+=item $mess->headers_as_string( $eol )
+
+Call the as_string() method for the headers in the
+message. This will be the same as
+
+ $mess->headers->as_string
+
+but it will make your program a whole character shorter :-)
+
+=item $mess->content
+
+=item $mess->content( $bytes )
+
+The content() method sets the raw content if an argument is given. If no
+argument is given the content is not touched. In either case the
+original raw content is returned.
+
+Note that the content should be a string of bytes. Strings in perl
+can contain characters outside the range of a byte. The C<Encode>
+module can be used to turn such strings into a string of bytes.
+
+=item $mess->add_content( $bytes )
+
+The add_content() methods appends more data bytes to the end of the
+current content buffer.
+
+=item $mess->add_content_utf8( $string )
+
+The add_content_utf8() method appends the UTF-8 bytes representing the
+string to the end of the current content buffer.
+
+=item $mess->content_ref
+
+=item $mess->content_ref( \$bytes )
+
+The content_ref() method will return a reference to content buffer string.
+It can be more efficient to access the content this way if the content
+is huge, and it can even be used for direct manipulation of the content,
+for instance:
+
+ ${$res->content_ref} =~ s/\bfoo\b/bar/g;
+
+This example would modify the content buffer in-place.
+
+If an argument is passed it will setup the content to reference some
+external source. The content() and add_content() methods
+will automatically dereference scalar references passed this way. For
+other references content() will return the reference itself and
+add_content() will refuse to do anything.
+
+=item $mess->content_charset
+
+This returns the charset used by the content in the message. The
+charset is either found as the charset attribute of the
+C<Content-Type> header or by guessing.
+
+See L<http://www.w3.org/TR/REC-html40/charset.html#spec-char-encoding>
+for details about how charset is determined.
+
+=item $mess->decoded_content( %options )
+
+Returns the content with any C<Content-Encoding> undone and for textual content
+the raw content encoded to Perl's Unicode strings. If the C<Content-Encoding>
+or C<charset> of the message is unknown this method will fail by returning
+C<undef>.
+
+The following options can be specified.
+
+=over
+
+=item C<charset>
+
+This override the charset parameter for text content. The value
+C<none> can used to suppress decoding of the charset.
+
+=item C<default_charset>
+
+This override the default charset guessed by content_charset() or
+if that fails "ISO-8859-1".
+
+=item C<alt_charset>
+
+If decoding fails because the charset specified in the Content-Type header
+isn't recognized by Perl's Encode module, then try decoding using this charset
+instead of failing. The C<alt_charset> might be specified as C<none> to simply
+return the string without any decoding of charset as alternative.
+
+=item C<charset_strict>
+
+Abort decoding if malformed characters is found in the content. By
+default you get the substitution character ("\x{FFFD}") in place of
+malformed characters.
+
+=item C<raise_error>
+
+If TRUE then raise an exception if not able to decode content. Reason
+might be that the specified C<Content-Encoding> or C<charset> is not
+supported. If this option is FALSE, then decoded_content() will return
+C<undef> on errors, but will still set $@.
+
+=item C<ref>
+
+If TRUE then a reference to decoded content is returned. This might
+be more efficient in cases where the decoded content is identical to
+the raw content as no data copying is required in this case.
+
+=back
+
+=item $mess->decodable
+
+=item HTTP::Message::decodable()
+
+This returns the encoding identifiers that decoded_content() can
+process. In scalar context returns a comma separated string of
+identifiers.
+
+This value is suitable for initializing the C<Accept-Encoding> request
+header field.
+
+=item $mess->decode
+
+This method tries to replace the content of the message with the
+decoded version and removes the C<Content-Encoding> header. Returns
+TRUE if successful and FALSE if not.
+
+If the message does not have a C<Content-Encoding> header this method
+does nothing and returns TRUE.
+
+Note that the content of the message is still bytes after this method
+has been called and you still need to call decoded_content() if you
+want to process its content as a string.
+
+=item $mess->encode( $encoding, ... )
+
+Apply the given encodings to the content of the message. Returns TRUE
+if successful. The "identity" (non-)encoding is always supported; other
+currently supported encodings, subject to availability of required
+additional modules, are "gzip", "deflate", "x-bzip2" and "base64".
+
+A successful call to this function will set the C<Content-Encoding>
+header.
+
+Note that C<multipart/*> or C<message/*> messages can't be encoded and
+this method will croak if you try.
+
+=item $mess->parts
+
+=item $mess->parts( @parts )
+
+=item $mess->parts( \@parts )
+
+Messages can be composite, i.e. contain other messages. The composite
+messages have a content type of C<multipart/*> or C<message/*>. This
+method give access to the contained messages.
+
+The argumentless form will return a list of C<HTTP::Message> objects.
+If the content type of $msg is not C<multipart/*> or C<message/*> then
+this will return the empty list. In scalar context only the first
+object is returned. The returned message parts should be regarded as
+read-only (future versions of this library might make it possible
+to modify the parent by modifying the parts).
+
+If the content type of $msg is C<message/*> then there will only be
+one part returned.
+
+If the content type is C<message/http>, then the return value will be
+either an C<HTTP::Request> or an C<HTTP::Response> object.
+
+If a @parts argument is given, then the content of the message will be
+modified. The array reference form is provided so that an empty list
+can be provided. The @parts array should contain C<HTTP::Message>
+objects. The @parts objects are owned by $mess after this call and
+should not be modified or made part of other messages.
+
+When updating the message with this method and the old content type of
+$mess is not C<multipart/*> or C<message/*>, then the content type is
+set to C<multipart/mixed> and all other content headers are cleared.
+
+This method will croak if the content type is C<message/*> and more
+than one part is provided.
+
+=item $mess->add_part( $part )
+
+This will add a part to a message. The $part argument should be
+another C<HTTP::Message> object. If the previous content type of
+$mess is not C<multipart/*> then the old content (together with all
+content headers) will be made part #1 and the content type made
+C<multipart/mixed> before the new part is added. The $part object is
+owned by $mess after this call and should not be modified or made part
+of other messages.
+
+There is no return value.
+
+=item $mess->clear
+
+Will clear the headers and set the content to the empty string. There
+is no return value
+
+=item $mess->protocol
+
+=item $mess->protocol( $proto )
+
+Sets the HTTP protocol used for the message. The protocol() is a string
+like C<HTTP/1.0> or C<HTTP/1.1>.
+
+=item $mess->clone
+
+Returns a copy of the message object.
+
+=item $mess->as_string
+
+=item $mess->as_string( $eol )
+
+Returns the message formatted as a single string.
+
+The optional $eol parameter specifies the line ending sequence to use.
+The default is "\n". If no $eol is given then as_string will ensure
+that the returned string is newline terminated (even when the message
+content is not). No extra newline is appended if an explicit $eol is
+passed.
+
+=item $mess->dump( %opt )
+
+Returns the message formatted as a string. In void context print the string.
+
+This differs from C<< $mess->as_string >> in that it escapes the bytes
+of the content so that it's safe to print them and it limits how much
+content to print. The escapes syntax used is the same as for Perl's
+double quoted strings. If there is no content the string "(no
+content)" is shown in its place.
+
+Options to influence the output can be passed as key/value pairs. The
+following options are recognized:
+
+=over
+
+=item maxlength => $num
+
+How much of the content to show. The default is 512. Set this to 0
+for unlimited.
+
+If the content is longer then the string is chopped at the limit and
+the string "...\n(### more bytes not shown)" appended.
+
+=item prefix => $str
+
+A string that will be prefixed to each line of the dump.
+
+=back
+
+=back
+
+All methods unknown to C<HTTP::Message> itself are delegated to the
+C<HTTP::Headers> object that is part of every message. This allows
+convenient access to these methods. Refer to L<HTTP::Headers> for
+details of these methods:
+
+ $mess->header( $field => $val )
+ $mess->push_header( $field => $val )
+ $mess->init_header( $field => $val )
+ $mess->remove_header( $field )
+ $mess->remove_content_headers
+ $mess->header_field_names
+ $mess->scan( \&doit )
+
+ $mess->date
+ $mess->expires
+ $mess->if_modified_since
+ $mess->if_unmodified_since
+ $mess->last_modified
+ $mess->content_type
+ $mess->content_encoding
+ $mess->content_length
+ $mess->content_language
+ $mess->title
+ $mess->user_agent
+ $mess->server
+ $mess->from
+ $mess->referer
+ $mess->www_authenticate
+ $mess->authorization
+ $mess->proxy_authorization
+ $mess->authorization_basic
+ $mess->proxy_authorization_basic
+
+=head1 COPYRIGHT
+
+Copyright 1995-2004 Gisle Aas.
+
+This library is free software; you can redistribute it and/or
+modify it under the same terms as Perl itself.
+
--- /dev/null
+package HTTP::Request;
+
+require HTTP::Message;
+@ISA = qw(HTTP::Message);
+$VERSION = "6.00";
+
+use strict;
+
+
+
+sub new
+{
+ my($class, $method, $uri, $header, $content) = @_;
+ my $self = $class->SUPER::new($header, $content);
+ $self->method($method);
+ $self->uri($uri);
+ $self;
+}
+
+
+sub parse
+{
+ my($class, $str) = @_;
+ my $request_line;
+ if ($str =~ s/^(.*)\n//) {
+ $request_line = $1;
+ }
+ else {
+ $request_line = $str;
+ $str = "";
+ }
+
+ my $self = $class->SUPER::parse($str);
+ my($method, $uri, $protocol) = split(' ', $request_line);
+ $self->method($method) if defined($method);
+ $self->uri($uri) if defined($uri);
+ $self->protocol($protocol) if $protocol;
+ $self;
+}
+
+
+sub clone
+{
+ my $self = shift;
+ my $clone = bless $self->SUPER::clone, ref($self);
+ $clone->method($self->method);
+ $clone->uri($self->uri);
+ $clone;
+}
+
+
+sub method
+{
+ shift->_elem('_method', @_);
+}
+
+
+sub uri
+{
+ my $self = shift;
+ my $old = $self->{'_uri'};
+ if (@_) {
+ my $uri = shift;
+ if (!defined $uri) {
+ # that's ok
+ }
+ elsif (ref $uri) {
+ Carp::croak("A URI can't be a " . ref($uri) . " reference")
+ if ref($uri) eq 'HASH' or ref($uri) eq 'ARRAY';
+ Carp::croak("Can't use a " . ref($uri) . " object as a URI")
+ unless $uri->can('scheme');
+ $uri = $uri->clone;
+ unless ($HTTP::URI_CLASS eq "URI") {
+ # Argh!! Hate this... old LWP legacy!
+ eval { local $SIG{__DIE__}; $uri = $uri->abs; };
+ die $@ if $@ && $@ !~ /Missing base argument/;
+ }
+ }
+ else {
+ $uri = $HTTP::URI_CLASS->new($uri);
+ }
+ $self->{'_uri'} = $uri;
+ delete $self->{'_uri_canonical'};
+ }
+ $old;
+}
+
+*url = \&uri; # legacy
+
+sub uri_canonical
+{
+ my $self = shift;
+ return $self->{'_uri_canonical'} ||= $self->{'_uri'}->canonical;
+}
+
+
+sub accept_decodable
+{
+ my $self = shift;
+ $self->header("Accept-Encoding", scalar($self->decodable));
+}
+
+sub as_string
+{
+ my $self = shift;
+ my($eol) = @_;
+ $eol = "\n" unless defined $eol;
+
+ my $req_line = $self->method || "-";
+ my $uri = $self->uri;
+ $uri = (defined $uri) ? $uri->as_string : "-";
+ $req_line .= " $uri";
+ my $proto = $self->protocol;
+ $req_line .= " $proto" if $proto;
+
+ return join($eol, $req_line, $self->SUPER::as_string(@_));
+}
+
+sub dump
+{
+ my $self = shift;
+ my @pre = ($self->method || "-", $self->uri || "-");
+ if (my $prot = $self->protocol) {
+ push(@pre, $prot);
+ }
+
+ return $self->SUPER::dump(
+ preheader => join(" ", @pre),
+ @_,
+ );
+}
+
+
+1;
+
+__END__
+
+=head1 NAME
+
+HTTP::Request - HTTP style request message
+
+=head1 SYNOPSIS
+
+ require HTTP::Request;
+ $request = HTTP::Request->new(GET => 'http://www.example.com/');
+
+and usually used like this:
+
+ $ua = LWP::UserAgent->new;
+ $response = $ua->request($request);
+
+=head1 DESCRIPTION
+
+C<HTTP::Request> is a class encapsulating HTTP style requests,
+consisting of a request line, some headers, and a content body. Note
+that the LWP library uses HTTP style requests even for non-HTTP
+protocols. Instances of this class are usually passed to the
+request() method of an C<LWP::UserAgent> object.
+
+C<HTTP::Request> is a subclass of C<HTTP::Message> and therefore
+inherits its methods. The following additional methods are available:
+
+=over 4
+
+=item $r = HTTP::Request->new( $method, $uri )
+
+=item $r = HTTP::Request->new( $method, $uri, $header )
+
+=item $r = HTTP::Request->new( $method, $uri, $header, $content )
+
+Constructs a new C<HTTP::Request> object describing a request on the
+object $uri using method $method. The $method argument must be a
+string. The $uri argument can be either a string, or a reference to a
+C<URI> object. The optional $header argument should be a reference to
+an C<HTTP::Headers> object or a plain array reference of key/value
+pairs. The optional $content argument should be a string of bytes.
+
+=item $r = HTTP::Request->parse( $str )
+
+This constructs a new request object by parsing the given string.
+
+=item $r->method
+
+=item $r->method( $val )
+
+This is used to get/set the method attribute. The method should be a
+short string like "GET", "HEAD", "PUT" or "POST".
+
+=item $r->uri
+
+=item $r->uri( $val )
+
+This is used to get/set the uri attribute. The $val can be a
+reference to a URI object or a plain string. If a string is given,
+then it should be parseable as an absolute URI.
+
+=item $r->header( $field )
+
+=item $r->header( $field => $value )
+
+This is used to get/set header values and it is inherited from
+C<HTTP::Headers> via C<HTTP::Message>. See L<HTTP::Headers> for
+details and other similar methods that can be used to access the
+headers.
+
+=item $r->accept_decodable
+
+This will set the C<Accept-Encoding> header to the list of encodings
+that decoded_content() can decode.
+
+=item $r->content
+
+=item $r->content( $bytes )
+
+This is used to get/set the content and it is inherited from the
+C<HTTP::Message> base class. See L<HTTP::Message> for details and
+other methods that can be used to access the content.
+
+Note that the content should be a string of bytes. Strings in perl
+can contain characters outside the range of a byte. The C<Encode>
+module can be used to turn such strings into a string of bytes.
+
+=item $r->as_string
+
+=item $r->as_string( $eol )
+
+Method returning a textual representation of the request.
+
+=back
+
+=head1 SEE ALSO
+
+L<HTTP::Headers>, L<HTTP::Message>, L<HTTP::Request::Common>,
+L<HTTP::Response>
+
+=head1 COPYRIGHT
+
+Copyright 1995-2004 Gisle Aas.
+
+This library is free software; you can redistribute it and/or
+modify it under the same terms as Perl itself.
+
--- /dev/null
+package HTTP::Request::Common;
+
+use strict;
+use vars qw(@EXPORT @EXPORT_OK $VERSION $DYNAMIC_FILE_UPLOAD);
+
+$DYNAMIC_FILE_UPLOAD ||= 0; # make it defined (don't know why)
+
+require Exporter;
+*import = \&Exporter::import;
+@EXPORT =qw(GET HEAD PUT POST);
+@EXPORT_OK = qw($DYNAMIC_FILE_UPLOAD DELETE);
+
+require HTTP::Request;
+use Carp();
+
+$VERSION = "6.03";
+
+my $CRLF = "\015\012"; # "\r\n" is not portable
+
+sub GET { _simple_req('GET', @_); }
+sub HEAD { _simple_req('HEAD', @_); }
+sub PUT { _simple_req('PUT' , @_); }
+sub DELETE { _simple_req('DELETE', @_); }
+
+sub POST
+{
+ my $url = shift;
+ my $req = HTTP::Request->new(POST => $url);
+ my $content;
+ $content = shift if @_ and ref $_[0];
+ my($k, $v);
+ while (($k,$v) = splice(@_, 0, 2)) {
+ if (lc($k) eq 'content') {
+ $content = $v;
+ }
+ else {
+ $req->push_header($k, $v);
+ }
+ }
+ my $ct = $req->header('Content-Type');
+ unless ($ct) {
+ $ct = 'application/x-www-form-urlencoded';
+ }
+ elsif ($ct eq 'form-data') {
+ $ct = 'multipart/form-data';
+ }
+
+ if (ref $content) {
+ if ($ct =~ m,^multipart/form-data\s*(;|$),i) {
+ require HTTP::Headers::Util;
+ my @v = HTTP::Headers::Util::split_header_words($ct);
+ Carp::carp("Multiple Content-Type headers") if @v > 1;
+ @v = @{$v[0]};
+
+ my $boundary;
+ my $boundary_index;
+ for (my @tmp = @v; @tmp;) {
+ my($k, $v) = splice(@tmp, 0, 2);
+ if ($k eq "boundary") {
+ $boundary = $v;
+ $boundary_index = @v - @tmp - 1;
+ last;
+ }
+ }
+
+ ($content, $boundary) = form_data($content, $boundary, $req);
+
+ if ($boundary_index) {
+ $v[$boundary_index] = $boundary;
+ }
+ else {
+ push(@v, boundary => $boundary);
+ }
+
+ $ct = HTTP::Headers::Util::join_header_words(@v);
+ }
+ else {
+ # We use a temporary URI object to format
+ # the application/x-www-form-urlencoded content.
+ require URI;
+ my $url = URI->new('http:');
+ $url->query_form(ref($content) eq "HASH" ? %$content : @$content);
+ $content = $url->query;
+
+ # HTML/4.01 says that line breaks are represented as "CR LF" pairs (i.e., `%0D%0A')
+ $content =~ s/(?<!%0D)%0A/%0D%0A/g;
+ }
+ }
+
+ $req->header('Content-Type' => $ct); # might be redundant
+ if (defined($content)) {
+ $req->header('Content-Length' =>
+ length($content)) unless ref($content);
+ $req->content($content);
+ }
+ else {
+ $req->header('Content-Length' => 0);
+ }
+ $req;
+}
+
+
+sub _simple_req
+{
+ my($method, $url) = splice(@_, 0, 2);
+ my $req = HTTP::Request->new($method => $url);
+ my($k, $v);
+ my $content;
+ while (($k,$v) = splice(@_, 0, 2)) {
+ if (lc($k) eq 'content') {
+ $req->add_content($v);
+ $content++;
+ }
+ else {
+ $req->push_header($k, $v);
+ }
+ }
+ if ($content && !defined($req->header("Content-Length"))) {
+ $req->header("Content-Length", length(${$req->content_ref}));
+ }
+ $req;
+}
+
+
+sub form_data # RFC1867
+{
+ my($data, $boundary, $req) = @_;
+ my @data = ref($data) eq "HASH" ? %$data : @$data; # copy
+ my $fhparts;
+ my @parts;
+ my($k,$v);
+ while (($k,$v) = splice(@data, 0, 2)) {
+ if (!ref($v)) {
+ $k =~ s/([\\\"])/\\$1/g; # escape quotes and backslashes
+ push(@parts,
+ qq(Content-Disposition: form-data; name="$k"$CRLF$CRLF$v));
+ }
+ else {
+ my($file, $usename, @headers) = @$v;
+ unless (defined $usename) {
+ $usename = $file;
+ $usename =~ s,.*/,, if defined($usename);
+ }
+ $k =~ s/([\\\"])/\\$1/g;
+ my $disp = qq(form-data; name="$k");
+ if (defined($usename) and length($usename)) {
+ $usename =~ s/([\\\"])/\\$1/g;
+ $disp .= qq(; filename="$usename");
+ }
+ my $content = "";
+ my $h = HTTP::Headers->new(@headers);
+ if ($file) {
+ open(my $fh, "<", $file) or Carp::croak("Can't open file $file: $!");
+ binmode($fh);
+ if ($DYNAMIC_FILE_UPLOAD) {
+ # will read file later, close it now in order to
+ # not accumulate to many open file handles
+ close($fh);
+ $content = \$file;
+ }
+ else {
+ local($/) = undef; # slurp files
+ $content = <$fh>;
+ close($fh);
+ }
+ unless ($h->header("Content-Type")) {
+ require LWP::MediaTypes;
+ LWP::MediaTypes::guess_media_type($file, $h);
+ }
+ }
+ if ($h->header("Content-Disposition")) {
+ # just to get it sorted first
+ $disp = $h->header("Content-Disposition");
+ $h->remove_header("Content-Disposition");
+ }
+ if ($h->header("Content")) {
+ $content = $h->header("Content");
+ $h->remove_header("Content");
+ }
+ my $head = join($CRLF, "Content-Disposition: $disp",
+ $h->as_string($CRLF),
+ "");
+ if (ref $content) {
+ push(@parts, [$head, $$content]);
+ $fhparts++;
+ }
+ else {
+ push(@parts, $head . $content);
+ }
+ }
+ }
+ return ("", "none") unless @parts;
+
+ my $content;
+ if ($fhparts) {
+ $boundary = boundary(10) # hopefully enough randomness
+ unless $boundary;
+
+ # add the boundaries to the @parts array
+ for (1..@parts-1) {
+ splice(@parts, $_*2-1, 0, "$CRLF--$boundary$CRLF");
+ }
+ unshift(@parts, "--$boundary$CRLF");
+ push(@parts, "$CRLF--$boundary--$CRLF");
+
+ # See if we can generate Content-Length header
+ my $length = 0;
+ for (@parts) {
+ if (ref $_) {
+ my ($head, $f) = @$_;
+ my $file_size;
+ unless ( -f $f && ($file_size = -s _) ) {
+ # The file is either a dynamic file like /dev/audio
+ # or perhaps a file in the /proc file system where
+ # stat may return a 0 size even though reading it
+ # will produce data. So we cannot make
+ # a Content-Length header.
+ undef $length;
+ last;
+ }
+ $length += $file_size + length $head;
+ }
+ else {
+ $length += length;
+ }
+ }
+ $length && $req->header('Content-Length' => $length);
+
+ # set up a closure that will return content piecemeal
+ $content = sub {
+ for (;;) {
+ unless (@parts) {
+ defined $length && $length != 0 &&
+ Carp::croak "length of data sent did not match calculated Content-Length header. Probably because uploaded file changed in size during transfer.";
+ return;
+ }
+ my $p = shift @parts;
+ unless (ref $p) {
+ $p .= shift @parts while @parts && !ref($parts[0]);
+ defined $length && ($length -= length $p);
+ return $p;
+ }
+ my($buf, $fh) = @$p;
+ unless (ref($fh)) {
+ my $file = $fh;
+ undef($fh);
+ open($fh, "<", $file) || Carp::croak("Can't open file $file: $!");
+ binmode($fh);
+ }
+ my $buflength = length $buf;
+ my $n = read($fh, $buf, 2048, $buflength);
+ if ($n) {
+ $buflength += $n;
+ unshift(@parts, ["", $fh]);
+ }
+ else {
+ close($fh);
+ }
+ if ($buflength) {
+ defined $length && ($length -= $buflength);
+ return $buf
+ }
+ }
+ };
+
+ }
+ else {
+ $boundary = boundary() unless $boundary;
+
+ my $bno = 0;
+ CHECK_BOUNDARY:
+ {
+ for (@parts) {
+ if (index($_, $boundary) >= 0) {
+ # must have a better boundary
+ $boundary = boundary(++$bno);
+ redo CHECK_BOUNDARY;
+ }
+ }
+ last;
+ }
+ $content = "--$boundary$CRLF" .
+ join("$CRLF--$boundary$CRLF", @parts) .
+ "$CRLF--$boundary--$CRLF";
+ }
+
+ wantarray ? ($content, $boundary) : $content;
+}
+
+
+sub boundary
+{
+ my $size = shift || return "xYzZY";
+ require MIME::Base64;
+ my $b = MIME::Base64::encode(join("", map chr(rand(256)), 1..$size*3), "");
+ $b =~ s/[\W]/X/g; # ensure alnum only
+ $b;
+}
+
+1;
+
+__END__
+
+=head1 NAME
+
+HTTP::Request::Common - Construct common HTTP::Request objects
+
+=head1 SYNOPSIS
+
+ use HTTP::Request::Common;
+ $ua = LWP::UserAgent->new;
+ $ua->request(GET 'http://www.sn.no/');
+ $ua->request(POST 'http://somewhere/foo', [foo => bar, bar => foo]);
+
+=head1 DESCRIPTION
+
+This module provide functions that return newly created C<HTTP::Request>
+objects. These functions are usually more convenient to use than the
+standard C<HTTP::Request> constructor for the most common requests. The
+following functions are provided:
+
+=over 4
+
+=item GET $url
+
+=item GET $url, Header => Value,...
+
+The GET() function returns an C<HTTP::Request> object initialized with
+the "GET" method and the specified URL. It is roughly equivalent to the
+following call
+
+ HTTP::Request->new(
+ GET => $url,
+ HTTP::Headers->new(Header => Value,...),
+ )
+
+but is less cluttered. What is different is that a header named
+C<Content> will initialize the content part of the request instead of
+setting a header field. Note that GET requests should normally not
+have a content, so this hack makes more sense for the PUT() and POST()
+functions described below.
+
+The get(...) method of C<LWP::UserAgent> exists as a shortcut for
+$ua->request(GET ...).
+
+=item HEAD $url
+
+=item HEAD $url, Header => Value,...
+
+Like GET() but the method in the request is "HEAD".
+
+The head(...) method of "LWP::UserAgent" exists as a shortcut for
+$ua->request(HEAD ...).
+
+=item PUT $url
+
+=item PUT $url, Header => Value,...
+
+=item PUT $url, Header => Value,..., Content => $content
+
+Like GET() but the method in the request is "PUT".
+
+The content of the request can be specified using the "Content"
+pseudo-header. This steals a bit of the header field namespace as
+there is no way to directly specify a header that is actually called
+"Content". If you really need this you must update the request
+returned in a separate statement.
+
+=item DELETE $url
+
+=item DELETE $url, Header => Value,...
+
+Like GET() but the method in the request is "DELETE". This function
+is not exported by default.
+
+=item POST $url
+
+=item POST $url, Header => Value,...
+
+=item POST $url, $form_ref, Header => Value,...
+
+=item POST $url, Header => Value,..., Content => $form_ref
+
+=item POST $url, Header => Value,..., Content => $content
+
+This works mostly like PUT() with "POST" as the method, but this
+function also takes a second optional array or hash reference
+parameter $form_ref. As for PUT() the content can also be specified
+directly using the "Content" pseudo-header, and you may also provide
+the $form_ref this way.
+
+The $form_ref argument can be used to pass key/value pairs for the
+form content. By default we will initialize a request using the
+C<application/x-www-form-urlencoded> content type. This means that
+you can emulate an HTML E<lt>form> POSTing like this:
+
+ POST 'http://www.perl.org/survey.cgi',
+ [ name => 'Gisle Aas',
+ email => 'gisle@aas.no',
+ gender => 'M',
+ born => '1964',
+ perc => '3%',
+ ];
+
+This will create an HTTP::Request object that looks like this:
+
+ POST http://www.perl.org/survey.cgi
+ Content-Length: 66
+ Content-Type: application/x-www-form-urlencoded
+
+ name=Gisle%20Aas&email=gisle%40aas.no&gender=M&born=1964&perc=3%25
+
+Multivalued form fields can be specified by either repeating the field
+name or by passing the value as an array reference.
+
+The POST method also supports the C<multipart/form-data> content used
+for I<Form-based File Upload> as specified in RFC 1867. You trigger
+this content format by specifying a content type of C<'form-data'> as
+one of the request headers. If one of the values in the $form_ref is
+an array reference, then it is treated as a file part specification
+with the following interpretation:
+
+ [ $file, $filename, Header => Value... ]
+ [ undef, $filename, Header => Value,..., Content => $content ]
+
+The first value in the array ($file) is the name of a file to open.
+This file will be read and its content placed in the request. The
+routine will croak if the file can't be opened. Use an C<undef> as
+$file value if you want to specify the content directly with a
+C<Content> header. The $filename is the filename to report in the
+request. If this value is undefined, then the basename of the $file
+will be used. You can specify an empty string as $filename if you
+want to suppress sending the filename when you provide a $file value.
+
+If a $file is provided by no C<Content-Type> header, then C<Content-Type>
+and C<Content-Encoding> will be filled in automatically with the values
+returned by LWP::MediaTypes::guess_media_type()
+
+Sending my F<~/.profile> to the survey used as example above can be
+achieved by this:
+
+ POST 'http://www.perl.org/survey.cgi',
+ Content_Type => 'form-data',
+ Content => [ name => 'Gisle Aas',
+ email => 'gisle@aas.no',
+ gender => 'M',
+ born => '1964',
+ init => ["$ENV{HOME}/.profile"],
+ ]
+
+This will create an HTTP::Request object that almost looks this (the
+boundary and the content of your F<~/.profile> is likely to be
+different):
+
+ POST http://www.perl.org/survey.cgi
+ Content-Length: 388
+ Content-Type: multipart/form-data; boundary="6G+f"
+
+ --6G+f
+ Content-Disposition: form-data; name="name"
+
+ Gisle Aas
+ --6G+f
+ Content-Disposition: form-data; name="email"
+
+ gisle@aas.no
+ --6G+f
+ Content-Disposition: form-data; name="gender"
+
+ M
+ --6G+f
+ Content-Disposition: form-data; name="born"
+
+ 1964
+ --6G+f
+ Content-Disposition: form-data; name="init"; filename=".profile"
+ Content-Type: text/plain
+
+ PATH=/local/perl/bin:$PATH
+ export PATH
+
+ --6G+f--
+
+If you set the $DYNAMIC_FILE_UPLOAD variable (exportable) to some TRUE
+value, then you get back a request object with a subroutine closure as
+the content attribute. This subroutine will read the content of any
+files on demand and return it in suitable chunks. This allow you to
+upload arbitrary big files without using lots of memory. You can even
+upload infinite files like F</dev/audio> if you wish; however, if
+the file is not a plain file, there will be no Content-Length header
+defined for the request. Not all servers (or server
+applications) like this. Also, if the file(s) change in size between
+the time the Content-Length is calculated and the time that the last
+chunk is delivered, the subroutine will C<Croak>.
+
+The post(...) method of "LWP::UserAgent" exists as a shortcut for
+$ua->request(POST ...).
+
+=back
+
+=head1 SEE ALSO
+
+L<HTTP::Request>, L<LWP::UserAgent>
+
+
+=head1 COPYRIGHT
+
+Copyright 1997-2004, Gisle Aas
+
+This library is free software; you can redistribute it and/or
+modify it under the same terms as Perl itself.
+
+=cut
+
--- /dev/null
+package HTTP::Response;
+
+require HTTP::Message;
+@ISA = qw(HTTP::Message);
+$VERSION = "6.03";
+
+use strict;
+use HTTP::Status ();
+
+
+
+sub new
+{
+ my($class, $rc, $msg, $header, $content) = @_;
+ my $self = $class->SUPER::new($header, $content);
+ $self->code($rc);
+ $self->message($msg);
+ $self;
+}
+
+
+sub parse
+{
+ my($class, $str) = @_;
+ my $status_line;
+ if ($str =~ s/^(.*)\n//) {
+ $status_line = $1;
+ }
+ else {
+ $status_line = $str;
+ $str = "";
+ }
+
+ my $self = $class->SUPER::parse($str);
+ my($protocol, $code, $message);
+ if ($status_line =~ /^\d{3} /) {
+ # Looks like a response created by HTTP::Response->new
+ ($code, $message) = split(' ', $status_line, 2);
+ } else {
+ ($protocol, $code, $message) = split(' ', $status_line, 3);
+ }
+ $self->protocol($protocol) if $protocol;
+ $self->code($code) if defined($code);
+ $self->message($message) if defined($message);
+ $self;
+}
+
+
+sub clone
+{
+ my $self = shift;
+ my $clone = bless $self->SUPER::clone, ref($self);
+ $clone->code($self->code);
+ $clone->message($self->message);
+ $clone->request($self->request->clone) if $self->request;
+ # we don't clone previous
+ $clone;
+}
+
+
+sub code { shift->_elem('_rc', @_); }
+sub message { shift->_elem('_msg', @_); }
+sub previous { shift->_elem('_previous',@_); }
+sub request { shift->_elem('_request', @_); }
+
+
+sub status_line
+{
+ my $self = shift;
+ my $code = $self->{'_rc'} || "000";
+ my $mess = $self->{'_msg'} || HTTP::Status::status_message($code) || "Unknown code";
+ return "$code $mess";
+}
+
+
+sub base
+{
+ my $self = shift;
+ my $base = (
+ $self->header('Content-Base'), # used to be HTTP/1.1
+ $self->header('Content-Location'), # HTTP/1.1
+ $self->header('Base'), # HTTP/1.0
+ )[0];
+ if ($base && $base =~ /^$URI::scheme_re:/o) {
+ # already absolute
+ return $HTTP::URI_CLASS->new($base);
+ }
+
+ my $req = $self->request;
+ if ($req) {
+ # if $base is undef here, the return value is effectively
+ # just a copy of $self->request->uri.
+ return $HTTP::URI_CLASS->new_abs($base, $req->uri);
+ }
+
+ # can't find an absolute base
+ return undef;
+}
+
+
+sub redirects {
+ my $self = shift;
+ my @r;
+ my $r = $self;
+ while (my $p = $r->previous) {
+ push(@r, $p);
+ $r = $p;
+ }
+ return @r unless wantarray;
+ return reverse @r;
+}
+
+
+sub filename
+{
+ my $self = shift;
+ my $file;
+
+ my $cd = $self->header('Content-Disposition');
+ if ($cd) {
+ require HTTP::Headers::Util;
+ if (my @cd = HTTP::Headers::Util::split_header_words($cd)) {
+ my ($disposition, undef, %cd_param) = @{$cd[-1]};
+ $file = $cd_param{filename};
+
+ # RFC 2047 encoded?
+ if ($file && $file =~ /^=\?(.+?)\?(.+?)\?(.+)\?=$/) {
+ my $charset = $1;
+ my $encoding = uc($2);
+ my $encfile = $3;
+
+ if ($encoding eq 'Q' || $encoding eq 'B') {
+ local($SIG{__DIE__});
+ eval {
+ if ($encoding eq 'Q') {
+ $encfile =~ s/_/ /g;
+ require MIME::QuotedPrint;
+ $encfile = MIME::QuotedPrint::decode($encfile);
+ }
+ else { # $encoding eq 'B'
+ require MIME::Base64;
+ $encfile = MIME::Base64::decode($encfile);
+ }
+
+ require Encode;
+ require Encode::Locale;
+ Encode::from_to($encfile, $charset, "locale_fs");
+ };
+
+ $file = $encfile unless $@;
+ }
+ }
+ }
+ }
+
+ unless (defined($file) && length($file)) {
+ my $uri;
+ if (my $cl = $self->header('Content-Location')) {
+ $uri = URI->new($cl);
+ }
+ elsif (my $request = $self->request) {
+ $uri = $request->uri;
+ }
+
+ if ($uri) {
+ $file = ($uri->path_segments)[-1];
+ }
+ }
+
+ if ($file) {
+ $file =~ s,.*[\\/],,; # basename
+ }
+
+ if ($file && !length($file)) {
+ $file = undef;
+ }
+
+ $file;
+}
+
+
+sub as_string
+{
+ require HTTP::Status;
+ my $self = shift;
+ my($eol) = @_;
+ $eol = "\n" unless defined $eol;
+
+ my $status_line = $self->status_line;
+ my $proto = $self->protocol;
+ $status_line = "$proto $status_line" if $proto;
+
+ return join($eol, $status_line, $self->SUPER::as_string(@_));
+}
+
+
+sub dump
+{
+ my $self = shift;
+
+ my $status_line = $self->status_line;
+ my $proto = $self->protocol;
+ $status_line = "$proto $status_line" if $proto;
+
+ return $self->SUPER::dump(
+ preheader => $status_line,
+ @_,
+ );
+}
+
+
+sub is_info { HTTP::Status::is_info (shift->{'_rc'}); }
+sub is_success { HTTP::Status::is_success (shift->{'_rc'}); }
+sub is_redirect { HTTP::Status::is_redirect (shift->{'_rc'}); }
+sub is_error { HTTP::Status::is_error (shift->{'_rc'}); }
+
+
+sub error_as_HTML
+{
+ my $self = shift;
+ my $title = 'An Error Occurred';
+ my $body = $self->status_line;
+ $body =~ s/&/&/g;
+ $body =~ s/</</g;
+ return <<EOM;
+<html>
+<head><title>$title</title></head>
+<body>
+<h1>$title</h1>
+<p>$body</p>
+</body>
+</html>
+EOM
+}
+
+
+sub current_age
+{
+ my $self = shift;
+ my $time = shift;
+
+ # Implementation of RFC 2616 section 13.2.3
+ # (age calculations)
+ my $response_time = $self->client_date;
+ my $date = $self->date;
+
+ my $age = 0;
+ if ($response_time && $date) {
+ $age = $response_time - $date; # apparent_age
+ $age = 0 if $age < 0;
+ }
+
+ my $age_v = $self->header('Age');
+ if ($age_v && $age_v > $age) {
+ $age = $age_v; # corrected_received_age
+ }
+
+ if ($response_time) {
+ my $request = $self->request;
+ if ($request) {
+ my $request_time = $request->date;
+ if ($request_time && $request_time < $response_time) {
+ # Add response_delay to age to get 'corrected_initial_age'
+ $age += $response_time - $request_time;
+ }
+ }
+ $age += ($time || time) - $response_time;
+ }
+ return $age;
+}
+
+
+sub freshness_lifetime
+{
+ my($self, %opt) = @_;
+
+ # First look for the Cache-Control: max-age=n header
+ for my $cc ($self->header('Cache-Control')) {
+ for my $cc_dir (split(/\s*,\s*/, $cc)) {
+ return $1 if $cc_dir =~ /^max-age\s*=\s*(\d+)/i;
+ }
+ }
+
+ # Next possibility is to look at the "Expires" header
+ my $date = $self->date || $self->client_date || $opt{time} || time;
+ if (my $expires = $self->expires) {
+ return $expires - $date;
+ }
+
+ # Must apply heuristic expiration
+ return undef if exists $opt{heuristic_expiry} && !$opt{heuristic_expiry};
+
+ # Default heuristic expiration parameters
+ $opt{h_min} ||= 60;
+ $opt{h_max} ||= 24 * 3600;
+ $opt{h_lastmod_fraction} ||= 0.10; # 10% since last-mod suggested by RFC2616
+ $opt{h_default} ||= 3600;
+
+ # Should give a warning if more than 24 hours according to
+ # RFC 2616 section 13.2.4. Here we just make this the default
+ # maximum value.
+
+ if (my $last_modified = $self->last_modified) {
+ my $h_exp = ($date - $last_modified) * $opt{h_lastmod_fraction};
+ return $opt{h_min} if $h_exp < $opt{h_min};
+ return $opt{h_max} if $h_exp > $opt{h_max};
+ return $h_exp;
+ }
+
+ # default when all else fails
+ return $opt{h_min} if $opt{h_min} > $opt{h_default};
+ return $opt{h_default};
+}
+
+
+sub is_fresh
+{
+ my($self, %opt) = @_;
+ $opt{time} ||= time;
+ my $f = $self->freshness_lifetime(%opt);
+ return undef unless defined($f);
+ return $f > $self->current_age($opt{time});
+}
+
+
+sub fresh_until
+{
+ my($self, %opt) = @_;
+ $opt{time} ||= time;
+ my $f = $self->freshness_lifetime(%opt);
+ return undef unless defined($f);
+ return $f - $self->current_age($opt{time}) + $opt{time};
+}
+
+1;
+
+
+__END__
+
+=head1 NAME
+
+HTTP::Response - HTTP style response message
+
+=head1 SYNOPSIS
+
+Response objects are returned by the request() method of the C<LWP::UserAgent>:
+
+ # ...
+ $response = $ua->request($request)
+ if ($response->is_success) {
+ print $response->decoded_content;
+ }
+ else {
+ print STDERR $response->status_line, "\n";
+ }
+
+=head1 DESCRIPTION
+
+The C<HTTP::Response> class encapsulates HTTP style responses. A
+response consists of a response line, some headers, and a content
+body. Note that the LWP library uses HTTP style responses even for
+non-HTTP protocol schemes. Instances of this class are usually
+created and returned by the request() method of an C<LWP::UserAgent>
+object.
+
+C<HTTP::Response> is a subclass of C<HTTP::Message> and therefore
+inherits its methods. The following additional methods are available:
+
+=over 4
+
+=item $r = HTTP::Response->new( $code )
+
+=item $r = HTTP::Response->new( $code, $msg )
+
+=item $r = HTTP::Response->new( $code, $msg, $header )
+
+=item $r = HTTP::Response->new( $code, $msg, $header, $content )
+
+Constructs a new C<HTTP::Response> object describing a response with
+response code $code and optional message $msg. The optional $header
+argument should be a reference to an C<HTTP::Headers> object or a
+plain array reference of key/value pairs. The optional $content
+argument should be a string of bytes. The meaning these arguments are
+described below.
+
+=item $r = HTTP::Response->parse( $str )
+
+This constructs a new response object by parsing the given string.
+
+=item $r->code
+
+=item $r->code( $code )
+
+This is used to get/set the code attribute. The code is a 3 digit
+number that encode the overall outcome of an HTTP response. The
+C<HTTP::Status> module provide constants that provide mnemonic names
+for the code attribute.
+
+=item $r->message
+
+=item $r->message( $message )
+
+This is used to get/set the message attribute. The message is a short
+human readable single line string that explains the response code.
+
+=item $r->header( $field )
+
+=item $r->header( $field => $value )
+
+This is used to get/set header values and it is inherited from
+C<HTTP::Headers> via C<HTTP::Message>. See L<HTTP::Headers> for
+details and other similar methods that can be used to access the
+headers.
+
+=item $r->content
+
+=item $r->content( $bytes )
+
+This is used to get/set the raw content and it is inherited from the
+C<HTTP::Message> base class. See L<HTTP::Message> for details and
+other methods that can be used to access the content.
+
+=item $r->decoded_content( %options )
+
+This will return the content after any C<Content-Encoding> and
+charsets have been decoded. See L<HTTP::Message> for details.
+
+=item $r->request
+
+=item $r->request( $request )
+
+This is used to get/set the request attribute. The request attribute
+is a reference to the the request that caused this response. It does
+not have to be the same request passed to the $ua->request() method,
+because there might have been redirects and authorization retries in
+between.
+
+=item $r->previous
+
+=item $r->previous( $response )
+
+This is used to get/set the previous attribute. The previous
+attribute is used to link together chains of responses. You get
+chains of responses if the first response is redirect or unauthorized.
+The value is C<undef> if this is the first response in a chain.
+
+Note that the method $r->redirects is provided as a more convenient
+way to access the response chain.
+
+=item $r->status_line
+
+Returns the string "E<lt>code> E<lt>message>". If the message attribute
+is not set then the official name of E<lt>code> (see L<HTTP::Status>)
+is substituted.
+
+=item $r->base
+
+Returns the base URI for this response. The return value will be a
+reference to a URI object.
+
+The base URI is obtained from one the following sources (in priority
+order):
+
+=over 4
+
+=item 1.
+
+Embedded in the document content, for instance <BASE HREF="...">
+in HTML documents.
+
+=item 2.
+
+A "Content-Base:" or a "Content-Location:" header in the response.
+
+For backwards compatibility with older HTTP implementations we will
+also look for the "Base:" header.
+
+=item 3.
+
+The URI used to request this response. This might not be the original
+URI that was passed to $ua->request() method, because we might have
+received some redirect responses first.
+
+=back
+
+If none of these sources provide an absolute URI, undef is returned.
+
+When the LWP protocol modules produce the HTTP::Response object, then
+any base URI embedded in the document (step 1) will already have
+initialized the "Content-Base:" header. This means that this method
+only performs the last 2 steps (the content is not always available
+either).
+
+=item $r->filename
+
+Returns a filename for this response. Note that doing sanity checks
+on the returned filename (eg. removing characters that cannot be used
+on the target filesystem where the filename would be used, and
+laundering it for security purposes) are the caller's responsibility;
+the only related thing done by this method is that it makes a simple
+attempt to return a plain filename with no preceding path segments.
+
+The filename is obtained from one the following sources (in priority
+order):
+
+=over 4
+
+=item 1.
+
+A "Content-Disposition:" header in the response. Proper decoding of
+RFC 2047 encoded filenames requires the C<MIME::QuotedPrint> (for "Q"
+encoding), C<MIME::Base64> (for "B" encoding), and C<Encode> modules.
+
+=item 2.
+
+A "Content-Location:" header in the response.
+
+=item 3.
+
+The URI used to request this response. This might not be the original
+URI that was passed to $ua->request() method, because we might have
+received some redirect responses first.
+
+=back
+
+If a filename cannot be derived from any of these sources, undef is
+returned.
+
+=item $r->as_string
+
+=item $r->as_string( $eol )
+
+Returns a textual representation of the response.
+
+=item $r->is_info
+
+=item $r->is_success
+
+=item $r->is_redirect
+
+=item $r->is_error
+
+These methods indicate if the response was informational, successful, a
+redirection, or an error. See L<HTTP::Status> for the meaning of these.
+
+=item $r->error_as_HTML
+
+Returns a string containing a complete HTML document indicating what
+error occurred. This method should only be called when $r->is_error
+is TRUE.
+
+=item $r->redirects
+
+Returns the list of redirect responses that lead up to this response
+by following the $r->previous chain. The list order is oldest first.
+
+In scalar context return the number of redirect responses leading up
+to this one.
+
+=item $r->current_age
+
+Calculates the "current age" of the response as specified by RFC 2616
+section 13.2.3. The age of a response is the time since it was sent
+by the origin server. The returned value is a number representing the
+age in seconds.
+
+=item $r->freshness_lifetime( %opt )
+
+Calculates the "freshness lifetime" of the response as specified by
+RFC 2616 section 13.2.4. The "freshness lifetime" is the length of
+time between the generation of a response and its expiration time.
+The returned value is the number of seconds until expiry.
+
+If the response does not contain an "Expires" or a "Cache-Control"
+header, then this function will apply some simple heuristic based on
+the "Last-Modified" header to determine a suitable lifetime. The
+following options might be passed to control the heuristics:
+
+=over
+
+=item heuristic_expiry => $bool
+
+If passed as a FALSE value, don't apply heuristics and just return
+C<undef> when "Expires" or "Cache-Control" is lacking.
+
+=item h_lastmod_fraction => $num
+
+This number represent the fraction of the difference since the
+"Last-Modified" timestamp to make the expiry time. The default is
+C<0.10>, the suggested typical setting of 10% in RFC 2616.
+
+=item h_min => $sec
+
+This is the lower limit of the heuristic expiry age to use. The
+default is C<60> (1 minute).
+
+=item h_max => $sec
+
+This is the upper limit of the heuristic expiry age to use. The
+default is C<86400> (24 hours).
+
+=item h_default => $sec
+
+This is the expiry age to use when nothing else applies. The default
+is C<3600> (1 hour) or "h_min" if greater.
+
+=back
+
+=item $r->is_fresh( %opt )
+
+Returns TRUE if the response is fresh, based on the values of
+freshness_lifetime() and current_age(). If the response is no longer
+fresh, then it has to be re-fetched or re-validated by the origin
+server.
+
+Options might be passed to control expiry heuristics, see the
+description of freshness_lifetime().
+
+=item $r->fresh_until( %opt )
+
+Returns the time (seconds since epoch) when this entity is no longer fresh.
+
+Options might be passed to control expiry heuristics, see the
+description of freshness_lifetime().
+
+=back
+
+=head1 SEE ALSO
+
+L<HTTP::Headers>, L<HTTP::Message>, L<HTTP::Status>, L<HTTP::Request>
+
+=head1 COPYRIGHT
+
+Copyright 1995-2004 Gisle Aas.
+
+This library is free software; you can redistribute it and/or
+modify it under the same terms as Perl itself.
+
--- /dev/null
+package HTTP::Status;
+
+use strict;
+require 5.002; # because we use prototypes
+
+use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $VERSION);
+
+require Exporter;
+@ISA = qw(Exporter);
+@EXPORT = qw(is_info is_success is_redirect is_error status_message);
+@EXPORT_OK = qw(is_client_error is_server_error);
+$VERSION = "6.03";
+
+# Note also addition of mnemonics to @EXPORT below
+
+# Unmarked codes are from RFC 2616
+# See also: http://en.wikipedia.org/wiki/List_of_HTTP_status_codes
+
+my %StatusCode = (
+ 100 => 'Continue',
+ 101 => 'Switching Protocols',
+ 102 => 'Processing', # RFC 2518 (WebDAV)
+ 200 => 'OK',
+ 201 => 'Created',
+ 202 => 'Accepted',
+ 203 => 'Non-Authoritative Information',
+ 204 => 'No Content',
+ 205 => 'Reset Content',
+ 206 => 'Partial Content',
+ 207 => 'Multi-Status', # RFC 2518 (WebDAV)
+ 208 => 'Already Reported', # RFC 5842
+ 300 => 'Multiple Choices',
+ 301 => 'Moved Permanently',
+ 302 => 'Found',
+ 303 => 'See Other',
+ 304 => 'Not Modified',
+ 305 => 'Use Proxy',
+ 307 => 'Temporary Redirect',
+ 400 => 'Bad Request',
+ 401 => 'Unauthorized',
+ 402 => 'Payment Required',
+ 403 => 'Forbidden',
+ 404 => 'Not Found',
+ 405 => 'Method Not Allowed',
+ 406 => 'Not Acceptable',
+ 407 => 'Proxy Authentication Required',
+ 408 => 'Request Timeout',
+ 409 => 'Conflict',
+ 410 => 'Gone',
+ 411 => 'Length Required',
+ 412 => 'Precondition Failed',
+ 413 => 'Request Entity Too Large',
+ 414 => 'Request-URI Too Large',
+ 415 => 'Unsupported Media Type',
+ 416 => 'Request Range Not Satisfiable',
+ 417 => 'Expectation Failed',
+ 418 => 'I\'m a teapot', # RFC 2324
+ 422 => 'Unprocessable Entity', # RFC 2518 (WebDAV)
+ 423 => 'Locked', # RFC 2518 (WebDAV)
+ 424 => 'Failed Dependency', # RFC 2518 (WebDAV)
+ 425 => 'No code', # WebDAV Advanced Collections
+ 426 => 'Upgrade Required', # RFC 2817
+ 428 => 'Precondition Required',
+ 429 => 'Too Many Requests',
+ 431 => 'Request Header Fields Too Large',
+ 449 => 'Retry with', # unofficial Microsoft
+ 500 => 'Internal Server Error',
+ 501 => 'Not Implemented',
+ 502 => 'Bad Gateway',
+ 503 => 'Service Unavailable',
+ 504 => 'Gateway Timeout',
+ 505 => 'HTTP Version Not Supported',
+ 506 => 'Variant Also Negotiates', # RFC 2295
+ 507 => 'Insufficient Storage', # RFC 2518 (WebDAV)
+ 509 => 'Bandwidth Limit Exceeded', # unofficial
+ 510 => 'Not Extended', # RFC 2774
+ 511 => 'Network Authentication Required',
+);
+
+my $mnemonicCode = '';
+my ($code, $message);
+while (($code, $message) = each %StatusCode) {
+ # create mnemonic subroutines
+ $message =~ s/I'm/I am/;
+ $message =~ tr/a-z \-/A-Z__/;
+ $mnemonicCode .= "sub HTTP_$message () { $code }\n";
+ $mnemonicCode .= "*RC_$message = \\&HTTP_$message;\n"; # legacy
+ $mnemonicCode .= "push(\@EXPORT_OK, 'HTTP_$message');\n";
+ $mnemonicCode .= "push(\@EXPORT, 'RC_$message');\n";
+}
+eval $mnemonicCode; # only one eval for speed
+die if $@;
+
+# backwards compatibility
+*RC_MOVED_TEMPORARILY = \&RC_FOUND; # 302 was renamed in the standard
+push(@EXPORT, "RC_MOVED_TEMPORARILY");
+
+%EXPORT_TAGS = (
+ constants => [grep /^HTTP_/, @EXPORT_OK],
+ is => [grep /^is_/, @EXPORT, @EXPORT_OK],
+);
+
+
+sub status_message ($) { $StatusCode{$_[0]}; }
+
+sub is_info ($) { $_[0] >= 100 && $_[0] < 200; }
+sub is_success ($) { $_[0] >= 200 && $_[0] < 300; }
+sub is_redirect ($) { $_[0] >= 300 && $_[0] < 400; }
+sub is_error ($) { $_[0] >= 400 && $_[0] < 600; }
+sub is_client_error ($) { $_[0] >= 400 && $_[0] < 500; }
+sub is_server_error ($) { $_[0] >= 500 && $_[0] < 600; }
+
+1;
+
+
+__END__
+
+=head1 NAME
+
+HTTP::Status - HTTP Status code processing
+
+=head1 SYNOPSIS
+
+ use HTTP::Status qw(:constants :is status_message);
+
+ if ($rc != HTTP_OK) {
+ print status_message($rc), "\n";
+ }
+
+ if (is_success($rc)) { ... }
+ if (is_error($rc)) { ... }
+ if (is_redirect($rc)) { ... }
+
+=head1 DESCRIPTION
+
+I<HTTP::Status> is a library of routines for defining and
+classifying HTTP status codes for libwww-perl. Status codes are
+used to encode the overall outcome of an HTTP response message. Codes
+correspond to those defined in RFC 2616 and RFC 2518.
+
+=head1 CONSTANTS
+
+The following constant functions can be used as mnemonic status code
+names. None of these are exported by default. Use the C<:constants>
+tag to import them all.
+
+ HTTP_CONTINUE (100)
+ HTTP_SWITCHING_PROTOCOLS (101)
+ HTTP_PROCESSING (102)
+
+ HTTP_OK (200)
+ HTTP_CREATED (201)
+ HTTP_ACCEPTED (202)
+ HTTP_NON_AUTHORITATIVE_INFORMATION (203)
+ HTTP_NO_CONTENT (204)
+ HTTP_RESET_CONTENT (205)
+ HTTP_PARTIAL_CONTENT (206)
+ HTTP_MULTI_STATUS (207)
+ HTTP_ALREADY_REPORTED (208)
+
+ HTTP_MULTIPLE_CHOICES (300)
+ HTTP_MOVED_PERMANENTLY (301)
+ HTTP_FOUND (302)
+ HTTP_SEE_OTHER (303)
+ HTTP_NOT_MODIFIED (304)
+ HTTP_USE_PROXY (305)
+ HTTP_TEMPORARY_REDIRECT (307)
+
+ HTTP_BAD_REQUEST (400)
+ HTTP_UNAUTHORIZED (401)
+ HTTP_PAYMENT_REQUIRED (402)
+ HTTP_FORBIDDEN (403)
+ HTTP_NOT_FOUND (404)
+ HTTP_METHOD_NOT_ALLOWED (405)
+ HTTP_NOT_ACCEPTABLE (406)
+ HTTP_PROXY_AUTHENTICATION_REQUIRED (407)
+ HTTP_REQUEST_TIMEOUT (408)
+ HTTP_CONFLICT (409)
+ HTTP_GONE (410)
+ HTTP_LENGTH_REQUIRED (411)
+ HTTP_PRECONDITION_FAILED (412)
+ HTTP_REQUEST_ENTITY_TOO_LARGE (413)
+ HTTP_REQUEST_URI_TOO_LARGE (414)
+ HTTP_UNSUPPORTED_MEDIA_TYPE (415)
+ HTTP_REQUEST_RANGE_NOT_SATISFIABLE (416)
+ HTTP_EXPECTATION_FAILED (417)
+ HTTP_I_AM_A_TEAPOT (418)
+ HTTP_UNPROCESSABLE_ENTITY (422)
+ HTTP_LOCKED (423)
+ HTTP_FAILED_DEPENDENCY (424)
+ HTTP_NO_CODE (425)
+ HTTP_UPGRADE_REQUIRED (426)
+ HTTP_PRECONDITION_REQUIRED (428)
+ HTTP_TOO_MANY_REQUESTS (429)
+ HTTP_REQUEST_HEADER_FIELDS_TOO_LARGE (431)
+ HTTP_RETRY_WITH (449)
+
+ HTTP_INTERNAL_SERVER_ERROR (500)
+ HTTP_NOT_IMPLEMENTED (501)
+ HTTP_BAD_GATEWAY (502)
+ HTTP_SERVICE_UNAVAILABLE (503)
+ HTTP_GATEWAY_TIMEOUT (504)
+ HTTP_HTTP_VERSION_NOT_SUPPORTED (505)
+ HTTP_VARIANT_ALSO_NEGOTIATES (506)
+ HTTP_INSUFFICIENT_STORAGE (507)
+ HTTP_BANDWIDTH_LIMIT_EXCEEDED (509)
+ HTTP_NOT_EXTENDED (510)
+ HTTP_NETWORK_AUTHENTICATION_REQUIRED (511)
+
+=head1 FUNCTIONS
+
+The following additional functions are provided. Most of them are
+exported by default. The C<:is> import tag can be used to import all
+the classification functions.
+
+=over 4
+
+=item status_message( $code )
+
+The status_message() function will translate status codes to human
+readable strings. The string is the same as found in the constant
+names above. If the $code is unknown, then C<undef> is returned.
+
+=item is_info( $code )
+
+Return TRUE if C<$code> is an I<Informational> status code (1xx). This
+class of status code indicates a provisional response which can't have
+any content.
+
+=item is_success( $code )
+
+Return TRUE if C<$code> is a I<Successful> status code (2xx).
+
+=item is_redirect( $code )
+
+Return TRUE if C<$code> is a I<Redirection> status code (3xx). This class of
+status code indicates that further action needs to be taken by the
+user agent in order to fulfill the request.
+
+=item is_error( $code )
+
+Return TRUE if C<$code> is an I<Error> status code (4xx or 5xx). The function
+returns TRUE for both client and server error status codes.
+
+=item is_client_error( $code )
+
+Return TRUE if C<$code> is a I<Client Error> status code (4xx). This class
+of status code is intended for cases in which the client seems to have
+erred.
+
+This function is B<not> exported by default.
+
+=item is_server_error( $code )
+
+Return TRUE if C<$code> is a I<Server Error> status code (5xx). This class
+of status codes is intended for cases in which the server is aware
+that it has erred or is incapable of performing the request.
+
+This function is B<not> exported by default.
+
+=back
+
+=head1 BUGS
+
+For legacy reasons all the C<HTTP_> constants are exported by default
+with the prefix C<RC_>. It's recommended to use explicit imports and
+the C<:constants> tag instead of relying on this.
--- /dev/null
+package LWP::Authen::Basic;
+use strict;
+
+require MIME::Base64;
+
+sub auth_header {
+ my($class, $user, $pass) = @_;
+ return "Basic " . MIME::Base64::encode("$user:$pass", "");
+}
+
+sub authenticate
+{
+ my($class, $ua, $proxy, $auth_param, $response,
+ $request, $arg, $size) = @_;
+
+ my $realm = $auth_param->{realm} || "";
+ my $url = $proxy ? $request->{proxy} : $request->uri_canonical;
+ return $response unless $url;
+ my $host_port = $url->host_port;
+ my $auth_header = $proxy ? "Proxy-Authorization" : "Authorization";
+
+ my @m = $proxy ? (m_proxy => $url) : (m_host_port => $host_port);
+ push(@m, realm => $realm);
+
+ my $h = $ua->get_my_handler("request_prepare", @m, sub {
+ $_[0]{callback} = sub {
+ my($req, $ua, $h) = @_;
+ my($user, $pass) = $ua->credentials($host_port, $h->{realm});
+ if (defined $user) {
+ my $auth_value = $class->auth_header($user, $pass, $req, $ua, $h);
+ $req->header($auth_header => $auth_value);
+ }
+ };
+ });
+ $h->{auth_param} = $auth_param;
+
+ if (!$proxy && !$request->header($auth_header) && $ua->credentials($host_port, $realm)) {
+ # we can make sure this handler applies and retry
+ add_path($h, $url->path);
+ return $ua->request($request->clone, $arg, $size, $response);
+ }
+
+ my($user, $pass) = $ua->get_basic_credentials($realm, $url, $proxy);
+ unless (defined $user and defined $pass) {
+ $ua->set_my_handler("request_prepare", undef, @m); # delete handler
+ return $response;
+ }
+
+ # check that the password has changed
+ my ($olduser, $oldpass) = $ua->credentials($host_port, $realm);
+ return $response if (defined $olduser and defined $oldpass and
+ $user eq $olduser and $pass eq $oldpass);
+
+ $ua->credentials($host_port, $realm, $user, $pass);
+ add_path($h, $url->path) unless $proxy;
+ return $ua->request($request->clone, $arg, $size, $response);
+}
+
+sub add_path {
+ my($h, $path) = @_;
+ $path =~ s,[^/]+\z,,;
+ push(@{$h->{m_path_prefix}}, $path);
+}
+
+1;
--- /dev/null
+package LWP::Authen::Digest;
+
+use strict;
+use base 'LWP::Authen::Basic';
+
+require Digest::MD5;
+
+sub auth_header {
+ my($class, $user, $pass, $request, $ua, $h) = @_;
+
+ my $auth_param = $h->{auth_param};
+
+ my $nc = sprintf "%08X", ++$ua->{authen_md5_nonce_count}{$auth_param->{nonce}};
+ my $cnonce = sprintf "%8x", time;
+
+ my $uri = $request->uri->path_query;
+ $uri = "/" unless length $uri;
+
+ my $md5 = Digest::MD5->new;
+
+ my(@digest);
+ $md5->add(join(":", $user, $auth_param->{realm}, $pass));
+ push(@digest, $md5->hexdigest);
+ $md5->reset;
+
+ push(@digest, $auth_param->{nonce});
+
+ if ($auth_param->{qop}) {
+ push(@digest, $nc, $cnonce, ($auth_param->{qop} =~ m|^auth[,;]auth-int$|) ? 'auth' : $auth_param->{qop});
+ }
+
+ $md5->add(join(":", $request->method, $uri));
+ push(@digest, $md5->hexdigest);
+ $md5->reset;
+
+ $md5->add(join(":", @digest));
+ my($digest) = $md5->hexdigest;
+ $md5->reset;
+
+ my %resp = map { $_ => $auth_param->{$_} } qw(realm nonce opaque);
+ @resp{qw(username uri response algorithm)} = ($user, $uri, $digest, "MD5");
+
+ if (($auth_param->{qop} || "") =~ m|^auth([,;]auth-int)?$|) {
+ @resp{qw(qop cnonce nc)} = ("auth", $cnonce, $nc);
+ }
+
+ my(@order) = qw(username realm qop algorithm uri nonce nc cnonce response);
+ if($request->method =~ /^(?:POST|PUT)$/) {
+ $md5->add($request->content);
+ my $content = $md5->hexdigest;
+ $md5->reset;
+ $md5->add(join(":", @digest[0..1], $content));
+ $md5->reset;
+ $resp{"message-digest"} = $md5->hexdigest;
+ push(@order, "message-digest");
+ }
+ push(@order, "opaque");
+ my @pairs;
+ for (@order) {
+ next unless defined $resp{$_};
+ push(@pairs, "$_=" . qq("$resp{$_}"));
+ }
+
+ my $auth_value = "Digest " . join(", ", @pairs);
+ return $auth_value;
+}
+
+1;
--- /dev/null
+package LWP::Authen::Ntlm;
+
+use strict;
+use vars qw/$VERSION/;
+
+$VERSION = "6.00";
+
+use Authen::NTLM "1.02";
+use MIME::Base64 "2.12";
+
+sub authenticate {
+ my($class, $ua, $proxy, $auth_param, $response,
+ $request, $arg, $size) = @_;
+
+ my($user, $pass) = $ua->get_basic_credentials($auth_param->{realm},
+ $request->uri, $proxy);
+
+ unless(defined $user and defined $pass) {
+ return $response;
+ }
+
+ if (!$ua->conn_cache()) {
+ warn "The keep_alive option must be enabled for NTLM authentication to work. NTLM authentication aborted.\n";
+ return $response;
+ }
+
+ my($domain, $username) = split(/\\/, $user);
+
+ ntlm_domain($domain);
+ ntlm_user($username);
+ ntlm_password($pass);
+
+ my $auth_header = $proxy ? "Proxy-Authorization" : "Authorization";
+
+ # my ($challenge) = $response->header('WWW-Authenticate');
+ my $challenge;
+ foreach ($response->header('WWW-Authenticate')) {
+ last if /^NTLM/ && ($challenge=$_);
+ }
+
+ if ($challenge eq 'NTLM') {
+ # First phase, send handshake
+ my $auth_value = "NTLM " . ntlm();
+ ntlm_reset();
+
+ # Need to check this isn't a repeated fail!
+ my $r = $response;
+ my $retry_count = 0;
+ while ($r) {
+ my $auth = $r->request->header($auth_header);
+ ++$retry_count if ($auth && $auth eq $auth_value);
+ if ($retry_count > 2) {
+ # here we know this failed before
+ $response->header("Client-Warning" =>
+ "Credentials for '$user' failed before");
+ return $response;
+ }
+ $r = $r->previous;
+ }
+
+ my $referral = $request->clone;
+ $referral->header($auth_header => $auth_value);
+ return $ua->request($referral, $arg, $size, $response);
+ }
+
+ else {
+ # Second phase, use the response challenge (unless non-401 code
+ # was returned, in which case, we just send back the response
+ # object, as is
+ my $auth_value;
+ if ($response->code ne '401') {
+ return $response;
+ }
+ else {
+ my $challenge;
+ foreach ($response->header('WWW-Authenticate')) {
+ last if /^NTLM/ && ($challenge=$_);
+ }
+ $challenge =~ s/^NTLM //;
+ ntlm();
+ $auth_value = "NTLM " . ntlm($challenge);
+ ntlm_reset();
+ }
+
+ my $referral = $request->clone;
+ $referral->header($auth_header => $auth_value);
+ my $response2 = $ua->request($referral, $arg, $size, $response);
+ return $response2;
+ }
+}
+
+1;
+
+
+=head1 NAME
+
+LWP::Authen::Ntlm - Library for enabling NTLM authentication (Microsoft) in LWP
+
+=head1 SYNOPSIS
+
+ use LWP::UserAgent;
+ use HTTP::Request::Common;
+ my $url = 'http://www.company.com/protected_page.html';
+
+ # Set up the ntlm client and then the base64 encoded ntlm handshake message
+ my $ua = LWP::UserAgent->new(keep_alive=>1);
+ $ua->credentials('www.company.com:80', '', "MyDomain\\MyUserCode", 'MyPassword');
+
+ $request = GET $url;
+ print "--Performing request now...-----------\n";
+ $response = $ua->request($request);
+ print "--Done with request-------------------\n";
+
+ if ($response->is_success) {print "It worked!->" . $response->code . "\n"}
+ else {print "It didn't work!->" . $response->code . "\n"}
+
+=head1 DESCRIPTION
+
+C<LWP::Authen::Ntlm> allows LWP to authenticate against servers that are using the
+NTLM authentication scheme popularized by Microsoft. This type of authentication is
+common on intranets of Microsoft-centric organizations.
+
+The module takes advantage of the Authen::NTLM module by Mark Bush. Since there
+is also another Authen::NTLM module available from CPAN by Yee Man Chan with an
+entirely different interface, it is necessary to ensure that you have the correct
+NTLM module.
+
+In addition, there have been problems with incompatibilities between different
+versions of Mime::Base64, which Bush's Authen::NTLM makes use of. Therefore, it is
+necessary to ensure that your Mime::Base64 module supports exporting of the
+encode_base64 and decode_base64 functions.
+
+=head1 USAGE
+
+The module is used indirectly through LWP, rather than including it directly in your
+code. The LWP system will invoke the NTLM authentication when it encounters the
+authentication scheme while attempting to retrieve a URL from a server. In order
+for the NTLM authentication to work, you must have a few things set up in your
+code prior to attempting to retrieve the URL:
+
+=over 4
+
+=item *
+
+Enable persistent HTTP connections
+
+To do this, pass the "keep_alive=>1" option to the LWP::UserAgent when creating it, like this:
+
+ my $ua = LWP::UserAgent->new(keep_alive=>1);
+
+=item *
+
+Set the credentials on the UserAgent object
+
+The credentials must be set like this:
+
+ $ua->credentials('www.company.com:80', '', "MyDomain\\MyUserCode", 'MyPassword');
+
+Note that you cannot use the HTTP::Request object's authorization_basic() method to set
+the credentials. Note, too, that the 'www.company.com:80' portion only sets credentials
+on the specified port AND it is case-sensitive (this is due to the way LWP is coded, and
+has nothing to do with LWP::Authen::Ntlm)
+
+=back
+
+=head1 AVAILABILITY
+
+General queries regarding LWP should be made to the LWP Mailing List.
+
+Questions specific to LWP::Authen::Ntlm can be forwarded to jtillman@bigfoot.com
+
+=head1 COPYRIGHT
+
+Copyright (c) 2002 James Tillman. All rights reserved. This
+program is free software; you can redistribute it and/or modify it
+under the same terms as Perl itself.
+
+=head1 SEE ALSO
+
+L<LWP>, L<LWP::UserAgent>, L<lwpcook>.
--- /dev/null
+package LWP::ConnCache;
+
+use strict;
+use vars qw($VERSION $DEBUG);
+
+$VERSION = "6.02";
+
+
+sub new {
+ my($class, %cnf) = @_;
+
+ my $total_capacity = 1;
+ if (exists $cnf{total_capacity}) {
+ $total_capacity = delete $cnf{total_capacity};
+ }
+ if (%cnf && $^W) {
+ require Carp;
+ Carp::carp("Unrecognised options: @{[sort keys %cnf]}")
+ }
+ my $self = bless { cc_conns => [] }, $class;
+ $self->total_capacity($total_capacity);
+ $self;
+}
+
+
+sub deposit {
+ my($self, $type, $key, $conn) = @_;
+ push(@{$self->{cc_conns}}, [$conn, $type, $key, time]);
+ $self->enforce_limits($type);
+ return;
+}
+
+
+sub withdraw {
+ my($self, $type, $key) = @_;
+ my $conns = $self->{cc_conns};
+ for my $i (0 .. @$conns - 1) {
+ my $c = $conns->[$i];
+ next unless $c->[1] eq $type && $c->[2] eq $key;
+ splice(@$conns, $i, 1); # remove it
+ return $c->[0];
+ }
+ return undef;
+}
+
+
+sub total_capacity {
+ my $self = shift;
+ my $old = $self->{cc_limit_total};
+ if (@_) {
+ $self->{cc_limit_total} = shift;
+ $self->enforce_limits;
+ }
+ $old;
+}
+
+
+sub capacity {
+ my $self = shift;
+ my $type = shift;
+ my $old = $self->{cc_limit}{$type};
+ if (@_) {
+ $self->{cc_limit}{$type} = shift;
+ $self->enforce_limits($type);
+ }
+ $old;
+}
+
+
+sub enforce_limits {
+ my($self, $type) = @_;
+ my $conns = $self->{cc_conns};
+
+ my @types = $type ? ($type) : ($self->get_types);
+ for $type (@types) {
+ next unless $self->{cc_limit};
+ my $limit = $self->{cc_limit}{$type};
+ next unless defined $limit;
+ for my $i (reverse 0 .. @$conns - 1) {
+ next unless $conns->[$i][1] eq $type;
+ if (--$limit < 0) {
+ $self->dropping(splice(@$conns, $i, 1), "$type capacity exceeded");
+ }
+ }
+ }
+
+ if (defined(my $total = $self->{cc_limit_total})) {
+ while (@$conns > $total) {
+ $self->dropping(shift(@$conns), "Total capacity exceeded");
+ }
+ }
+}
+
+
+sub dropping {
+ my($self, $c, $reason) = @_;
+ print "DROPPING @$c [$reason]\n" if $DEBUG;
+}
+
+
+sub drop {
+ my($self, $checker, $reason) = @_;
+ if (ref($checker) ne "CODE") {
+ # make it so
+ if (!defined $checker) {
+ $checker = sub { 1 }; # drop all of them
+ }
+ elsif (_looks_like_number($checker)) {
+ my $age_limit = $checker;
+ my $time_limit = time - $age_limit;
+ $reason ||= "older than $age_limit";
+ $checker = sub { $_[3] < $time_limit };
+ }
+ else {
+ my $type = $checker;
+ $reason ||= "drop $type";
+ $checker = sub { $_[1] eq $type }; # match on type
+ }
+ }
+ $reason ||= "drop";
+
+ local $SIG{__DIE__}; # don't interfere with eval below
+ local $@;
+ my @c;
+ for (@{$self->{cc_conns}}) {
+ my $drop;
+ eval {
+ if (&$checker(@$_)) {
+ $self->dropping($_, $reason);
+ $drop++;
+ }
+ };
+ push(@c, $_) unless $drop;
+ }
+ @{$self->{cc_conns}} = @c;
+}
+
+
+sub prune {
+ my $self = shift;
+ $self->drop(sub { !shift->ping }, "ping");
+}
+
+
+sub get_types {
+ my $self = shift;
+ my %t;
+ $t{$_->[1]}++ for @{$self->{cc_conns}};
+ return keys %t;
+}
+
+
+sub get_connections {
+ my($self, $type) = @_;
+ my @c;
+ for (@{$self->{cc_conns}}) {
+ push(@c, $_->[0]) if !$type || ($type && $type eq $_->[1]);
+ }
+ @c;
+}
+
+
+sub _looks_like_number {
+ $_[0] =~ /^([+-]?)(?=\d|\.\d)\d*(\.\d*)?([Ee]([+-]?\d+))?$/;
+}
+
+1;
+
+
+__END__
+
+=head1 NAME
+
+LWP::ConnCache - Connection cache manager
+
+=head1 NOTE
+
+This module is experimental. Details of its interface is likely to
+change in the future.
+
+=head1 SYNOPSIS
+
+ use LWP::ConnCache;
+ my $cache = LWP::ConnCache->new;
+ $cache->deposit($type, $key, $sock);
+ $sock = $cache->withdraw($type, $key);
+
+=head1 DESCRIPTION
+
+The C<LWP::ConnCache> class is the standard connection cache manager
+for LWP::UserAgent.
+
+The following basic methods are provided:
+
+=over
+
+=item $cache = LWP::ConnCache->new( %options )
+
+This method constructs a new C<LWP::ConnCache> object. The only
+option currently accepted is 'total_capacity'. If specified it
+initialize the total_capacity option. It defaults to the value 1.
+
+=item $cache->total_capacity( [$num_connections] )
+
+Get/sets the number of connection that will be cached. Connections
+will start to be dropped when this limit is reached. If set to C<0>,
+then all connections are immediately dropped. If set to C<undef>,
+then there is no limit.
+
+=item $cache->capacity($type, [$num_connections] )
+
+Get/set a limit for the number of connections of the specified type
+that can be cached. The $type will typically be a short string like
+"http" or "ftp".
+
+=item $cache->drop( [$checker, [$reason]] )
+
+Drop connections by some criteria. The $checker argument is a
+subroutine that is called for each connection. If the routine returns
+a TRUE value then the connection is dropped. The routine is called
+with ($conn, $type, $key, $deposit_time) as arguments.
+
+Shortcuts: If the $checker argument is absent (or C<undef>) all cached
+connections are dropped. If the $checker is a number then all
+connections untouched that the given number of seconds or more are
+dropped. If $checker is a string then all connections of the given
+type are dropped.
+
+The $reason argument is passed on to the dropped() method.
+
+=item $cache->prune
+
+Calling this method will drop all connections that are dead. This is
+tested by calling the ping() method on the connections. If the ping()
+method exists and returns a FALSE value, then the connection is
+dropped.
+
+=item $cache->get_types
+
+This returns all the 'type' fields used for the currently cached
+connections.
+
+=item $cache->get_connections( [$type] )
+
+This returns all connection objects of the specified type. If no type
+is specified then all connections are returned. In scalar context the
+number of cached connections of the specified type is returned.
+
+=back
+
+
+The following methods are called by low-level protocol modules to
+try to save away connections and to get them back.
+
+=over
+
+=item $cache->deposit($type, $key, $conn)
+
+This method adds a new connection to the cache. As a result other
+already cached connections might be dropped. Multiple connections with
+the same $type/$key might added.
+
+=item $conn = $cache->withdraw($type, $key)
+
+This method tries to fetch back a connection that was previously
+deposited. If no cached connection with the specified $type/$key is
+found, then C<undef> is returned. There is not guarantee that a
+deposited connection can be withdrawn, as the cache manger is free to
+drop connections at any time.
+
+=back
+
+The following methods are called internally. Subclasses might want to
+override them.
+
+=over
+
+=item $conn->enforce_limits([$type])
+
+This method is called with after a new connection is added (deposited)
+in the cache or capacity limits are adjusted. The default
+implementation drops connections until the specified capacity limits
+are not exceeded.
+
+=item $conn->dropping($conn_record, $reason)
+
+This method is called when a connection is dropped. The record
+belonging to the dropped connection is passed as the first argument
+and a string describing the reason for the drop is passed as the
+second argument. The default implementation makes some noise if the
+$LWP::ConnCache::DEBUG variable is set and nothing more.
+
+=back
+
+=head1 SUBCLASSING
+
+For specialized cache policy it makes sense to subclass
+C<LWP::ConnCache> and perhaps override the deposit(), enforce_limits()
+and dropping() methods.
+
+The object itself is a hash. Keys prefixed with C<cc_> are reserved
+for the base class.
+
+=head1 SEE ALSO
+
+L<LWP::UserAgent>
+
+=head1 COPYRIGHT
+
+Copyright 2001 Gisle Aas.
+
+This library is free software; you can redistribute it and/or
+modify it under the same terms as Perl itself.
--- /dev/null
+package LWP::Debug; # legacy
+
+require Exporter;
+@ISA = qw(Exporter);
+@EXPORT_OK = qw(level trace debug conns);
+
+use Carp ();
+
+my @levels = qw(trace debug conns);
+%current_level = ();
+
+
+sub import
+{
+ my $pack = shift;
+ my $callpkg = caller(0);
+ my @symbols = ();
+ my @levels = ();
+ for (@_) {
+ if (/^[-+]/) {
+ push(@levels, $_);
+ }
+ else {
+ push(@symbols, $_);
+ }
+ }
+ Exporter::export($pack, $callpkg, @symbols);
+ level(@levels);
+}
+
+
+sub level
+{
+ for (@_) {
+ if ($_ eq '+') { # all on
+ # switch on all levels
+ %current_level = map { $_ => 1 } @levels;
+ }
+ elsif ($_ eq '-') { # all off
+ %current_level = ();
+ }
+ elsif (/^([-+])(\w+)$/) {
+ $current_level{$2} = $1 eq '+';
+ }
+ else {
+ Carp::croak("Illegal level format $_");
+ }
+ }
+}
+
+
+sub trace { _log(@_) if $current_level{'trace'}; }
+sub debug { _log(@_) if $current_level{'debug'}; }
+sub conns { _log(@_) if $current_level{'conns'}; }
+
+
+sub _log
+{
+ my $msg = shift;
+ $msg .= "\n" unless $msg =~ /\n$/; # ensure trailing "\n"
+
+ my($package,$filename,$line,$sub) = caller(2);
+ print STDERR "$sub: $msg";
+}
+
+1;
+
+__END__
+
+=head1 NAME
+
+LWP::Debug - deprecated
+
+=head1 DESCRIPTION
+
+LWP::Debug used to provide tracing facilities, but these are not used
+by LWP any more. The code in this module is kept around
+(undocumented) so that 3rd party code that happen to use the old
+interfaces continue to run.
+
+One useful feature that LWP::Debug provided (in an imprecise and
+troublesome way) was network traffic monitoring. The following
+section provide some hints about recommened replacements.
+
+=head2 Network traffic monitoring
+
+The best way to monitor the network traffic that LWP generates is to
+use an external TCP monitoring program. The Wireshark program
+(L<http://www.wireshark.org/>) is higly recommended for this.
+
+Another approach it to use a debugging HTTP proxy server and make
+LWP direct all its traffic via this one. Call C<< $ua->proxy >> to
+set it up and then just use LWP as before.
+
+For less precise monitoring needs just setting up a few simple
+handlers might do. The following example sets up handlers to dump the
+request and response objects that pass through LWP:
+
+ use LWP::UserAgent;
+ $ua = LWP::UserAgent->new;
+ $ua->default_header('Accept-Encoding' => scalar HTTP::Message::decodable());
+
+ $ua->add_handler("request_send", sub { shift->dump; return });
+ $ua->add_handler("response_done", sub { shift->dump; return });
+
+ $ua->get("http://www.example.com");
+
+=head1 SEE ALSO
+
+L<LWP::UserAgent>
--- /dev/null
+package LWP::DebugFile;
+
+# legacy stub
+
+1;
--- /dev/null
+package LWP::MemberMixin;
+
+sub _elem
+{
+ my $self = shift;
+ my $elem = shift;
+ my $old = $self->{$elem};
+ $self->{$elem} = shift if @_;
+ return $old;
+}
+
+1;
+
+__END__
+
+=head1 NAME
+
+LWP::MemberMixin - Member access mixin class
+
+=head1 SYNOPSIS
+
+ package Foo;
+ require LWP::MemberMixin;
+ @ISA=qw(LWP::MemberMixin);
+
+=head1 DESCRIPTION
+
+A mixin class to get methods that provide easy access to member
+variables in the %$self.
+Ideally there should be better Perl language support for this.
+
+There is only one method provided:
+
+=over 4
+
+=item _elem($elem [, $val])
+
+Internal method to get/set the value of member variable
+C<$elem>. If C<$val> is present it is used as the new value
+for the member variable. If it is not present the current
+value is not touched. In both cases the previous value of
+the member variable is returned.
+
+=back
--- /dev/null
+package LWP::Protocol;
+
+require LWP::MemberMixin;
+@ISA = qw(LWP::MemberMixin);
+$VERSION = "6.00";
+
+use strict;
+use Carp ();
+use HTTP::Status ();
+use HTTP::Response;
+
+my %ImplementedBy = (); # scheme => classname
+
+
+
+sub new
+{
+ my($class, $scheme, $ua) = @_;
+
+ my $self = bless {
+ scheme => $scheme,
+ ua => $ua,
+
+ # historical/redundant
+ max_size => $ua->{max_size},
+ }, $class;
+
+ $self;
+}
+
+
+sub create
+{
+ my($scheme, $ua) = @_;
+ my $impclass = LWP::Protocol::implementor($scheme) or
+ Carp::croak("Protocol scheme '$scheme' is not supported");
+
+ # hand-off to scheme specific implementation sub-class
+ my $protocol = $impclass->new($scheme, $ua);
+
+ return $protocol;
+}
+
+
+sub implementor
+{
+ my($scheme, $impclass) = @_;
+
+ if ($impclass) {
+ $ImplementedBy{$scheme} = $impclass;
+ }
+ my $ic = $ImplementedBy{$scheme};
+ return $ic if $ic;
+
+ return '' unless $scheme =~ /^([.+\-\w]+)$/; # check valid URL schemes
+ $scheme = $1; # untaint
+ $scheme =~ s/[.+\-]/_/g; # make it a legal module name
+
+ # scheme not yet known, look for a 'use'd implementation
+ $ic = "LWP::Protocol::$scheme"; # default location
+ $ic = "LWP::Protocol::nntp" if $scheme eq 'news'; #XXX ugly hack
+ no strict 'refs';
+ # check we actually have one for the scheme:
+ unless (@{"${ic}::ISA"}) {
+ # try to autoload it
+ eval "require $ic";
+ if ($@) {
+ if ($@ =~ /Can't locate/) { #' #emacs get confused by '
+ $ic = '';
+ }
+ else {
+ die "$@\n";
+ }
+ }
+ }
+ $ImplementedBy{$scheme} = $ic if $ic;
+ $ic;
+}
+
+
+sub request
+{
+ my($self, $request, $proxy, $arg, $size, $timeout) = @_;
+ Carp::croak('LWP::Protocol::request() needs to be overridden in subclasses');
+}
+
+
+# legacy
+sub timeout { shift->_elem('timeout', @_); }
+sub max_size { shift->_elem('max_size', @_); }
+
+
+sub collect
+{
+ my ($self, $arg, $response, $collector) = @_;
+ my $content;
+ my($ua, $max_size) = @{$self}{qw(ua max_size)};
+
+ eval {
+ local $\; # protect the print below from surprises
+ if (!defined($arg) || !$response->is_success) {
+ $response->{default_add_content} = 1;
+ }
+ elsif (!ref($arg) && length($arg)) {
+ open(my $fh, ">", $arg) or die "Can't write to '$arg': $!";
+ binmode($fh);
+ push(@{$response->{handlers}{response_data}}, {
+ callback => sub {
+ print $fh $_[3] or die "Can't write to '$arg': $!";
+ 1;
+ },
+ });
+ push(@{$response->{handlers}{response_done}}, {
+ callback => sub {
+ close($fh) or die "Can't write to '$arg': $!";
+ undef($fh);
+ },
+ });
+ }
+ elsif (ref($arg) eq 'CODE') {
+ push(@{$response->{handlers}{response_data}}, {
+ callback => sub {
+ &$arg($_[3], $_[0], $self);
+ 1;
+ },
+ });
+ }
+ else {
+ die "Unexpected collect argument '$arg'";
+ }
+
+ $ua->run_handlers("response_header", $response);
+
+ if (delete $response->{default_add_content}) {
+ push(@{$response->{handlers}{response_data}}, {
+ callback => sub {
+ $_[0]->add_content($_[3]);
+ 1;
+ },
+ });
+ }
+
+
+ my $content_size = 0;
+ my $length = $response->content_length;
+ my %skip_h;
+
+ while ($content = &$collector, length $$content) {
+ for my $h ($ua->handlers("response_data", $response)) {
+ next if $skip_h{$h};
+ unless ($h->{callback}->($response, $ua, $h, $$content)) {
+ # XXX remove from $response->{handlers}{response_data} if present
+ $skip_h{$h}++;
+ }
+ }
+ $content_size += length($$content);
+ $ua->progress(($length ? ($content_size / $length) : "tick"), $response);
+ if (defined($max_size) && $content_size > $max_size) {
+ $response->push_header("Client-Aborted", "max_size");
+ last;
+ }
+ }
+ };
+ my $err = $@;
+ delete $response->{handlers}{response_data};
+ delete $response->{handlers} unless %{$response->{handlers}};
+ if ($err) {
+ chomp($err);
+ $response->push_header('X-Died' => $err);
+ $response->push_header("Client-Aborted", "die");
+ return $response;
+ }
+
+ return $response;
+}
+
+
+sub collect_once
+{
+ my($self, $arg, $response) = @_;
+ my $content = \ $_[3];
+ my $first = 1;
+ $self->collect($arg, $response, sub {
+ return $content if $first--;
+ return \ "";
+ });
+}
+
+1;
+
+
+__END__
+
+=head1 NAME
+
+LWP::Protocol - Base class for LWP protocols
+
+=head1 SYNOPSIS
+
+ package LWP::Protocol::foo;
+ require LWP::Protocol;
+ @ISA=qw(LWP::Protocol);
+
+=head1 DESCRIPTION
+
+This class is used a the base class for all protocol implementations
+supported by the LWP library.
+
+When creating an instance of this class using
+C<LWP::Protocol::create($url)>, and you get an initialised subclass
+appropriate for that access method. In other words, the
+LWP::Protocol::create() function calls the constructor for one of its
+subclasses.
+
+All derived LWP::Protocol classes need to override the request()
+method which is used to service a request. The overridden method can
+make use of the collect() function to collect together chunks of data
+as it is received.
+
+The following methods and functions are provided:
+
+=over 4
+
+=item $prot = LWP::Protocol->new()
+
+The LWP::Protocol constructor is inherited by subclasses. As this is a
+virtual base class this method should B<not> be called directly.
+
+=item $prot = LWP::Protocol::create($scheme)
+
+Create an object of the class implementing the protocol to handle the
+given scheme. This is a function, not a method. It is more an object
+factory than a constructor. This is the function user agents should
+use to access protocols.
+
+=item $class = LWP::Protocol::implementor($scheme, [$class])
+
+Get and/or set implementor class for a scheme. Returns '' if the
+specified scheme is not supported.
+
+=item $prot->request(...)
+
+ $response = $protocol->request($request, $proxy, undef);
+ $response = $protocol->request($request, $proxy, '/tmp/sss');
+ $response = $protocol->request($request, $proxy, \&callback, 1024);
+
+Dispatches a request over the protocol, and returns a response
+object. This method needs to be overridden in subclasses. Refer to
+L<LWP::UserAgent> for description of the arguments.
+
+=item $prot->collect($arg, $response, $collector)
+
+Called to collect the content of a request, and process it
+appropriately into a scalar, file, or by calling a callback. If $arg
+is undefined, then the content is stored within the $response. If
+$arg is a simple scalar, then $arg is interpreted as a file name and
+the content is written to this file. If $arg is a reference to a
+routine, then content is passed to this routine.
+
+The $collector is a routine that will be called and which is
+responsible for returning pieces (as ref to scalar) of the content to
+process. The $collector signals EOF by returning a reference to an
+empty sting.
+
+The return value from collect() is the $response object reference.
+
+B<Note:> We will only use the callback or file argument if
+$response->is_success(). This avoids sending content data for
+redirects and authentication responses to the callback which would be
+confusing.
+
+=item $prot->collect_once($arg, $response, $content)
+
+Can be called when the whole response content is available as
+$content. This will invoke collect() with a collector callback that
+returns a reference to $content the first time and an empty string the
+next.
+
+=back
+
+=head1 SEE ALSO
+
+Inspect the F<LWP/Protocol/file.pm> and F<LWP/Protocol/http.pm> files
+for examples of usage.
+
+=head1 COPYRIGHT
+
+Copyright 1995-2001 Gisle Aas.
+
+This library is free software; you can redistribute it and/or
+modify it under the same terms as Perl itself.
--- /dev/null
+package LWP::Protocol::GHTTP;
+
+# You can tell LWP to use this module for 'http' requests by running
+# code like this before you make requests:
+#
+# require LWP::Protocol::GHTTP;
+# LWP::Protocol::implementor('http', 'LWP::Protocol::GHTTP');
+#
+
+use strict;
+use vars qw(@ISA);
+
+require LWP::Protocol;
+@ISA=qw(LWP::Protocol);
+
+require HTTP::Response;
+require HTTP::Status;
+
+use HTTP::GHTTP qw(METHOD_GET METHOD_HEAD METHOD_POST);
+
+my %METHOD =
+(
+ GET => METHOD_GET,
+ HEAD => METHOD_HEAD,
+ POST => METHOD_POST,
+);
+
+sub request
+{
+ my($self, $request, $proxy, $arg, $size, $timeout) = @_;
+
+ my $method = $request->method;
+ unless (exists $METHOD{$method}) {
+ return HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST,
+ "Bad method '$method'");
+ }
+
+ my $r = HTTP::GHTTP->new($request->uri);
+
+ # XXX what headers for repeated headers here?
+ $request->headers->scan(sub { $r->set_header(@_)});
+
+ $r->set_type($METHOD{$method});
+
+ # XXX should also deal with subroutine content.
+ my $cref = $request->content_ref;
+ $r->set_body($$cref) if length($$cref);
+
+ # XXX is this right
+ $r->set_proxy($proxy->as_string) if $proxy;
+
+ $r->process_request;
+
+ my $response = HTTP::Response->new($r->get_status);
+
+ # XXX How can get the headers out of $r?? This way is too stupid.
+ my @headers;
+ eval {
+ # Wrapped in eval because this method is not always available
+ @headers = $r->get_headers;
+ };
+ @headers = qw(Date Connection Server Content-type
+ Accept-Ranges Server
+ Content-Length Last-Modified ETag) if $@;
+ for (@headers) {
+ my $v = $r->get_header($_);
+ $response->header($_ => $v) if defined $v;
+ }
+
+ return $self->collect_once($arg, $response, $r->get_body);
+}
+
+1;
--- /dev/null
+package LWP::Protocol::cpan;
+
+use strict;
+use vars qw(@ISA);
+
+require LWP::Protocol;
+@ISA = qw(LWP::Protocol);
+
+require URI;
+require HTTP::Status;
+require HTTP::Response;
+
+our $CPAN;
+
+unless ($CPAN) {
+ # Try to find local CPAN mirror via $CPAN::Config
+ eval {
+ require CPAN::Config;
+ if($CPAN::Config) {
+ my $urls = $CPAN::Config->{urllist};
+ if (ref($urls) eq "ARRAY") {
+ my $file;
+ for (@$urls) {
+ if (/^file:/) {
+ $file = $_;
+ last;
+ }
+ }
+
+ if ($file) {
+ $CPAN = $file;
+ }
+ else {
+ $CPAN = $urls->[0];
+ }
+ }
+ }
+ };
+
+ $CPAN ||= "http://cpan.org/"; # last resort
+}
+
+# ensure that we don't chop of last part
+$CPAN .= "/" unless $CPAN =~ m,/$,;
+
+
+sub request {
+ my($self, $request, $proxy, $arg, $size) = @_;
+ # check proxy
+ if (defined $proxy)
+ {
+ return HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST,
+ 'You can not proxy with cpan');
+ }
+
+ # check method
+ my $method = $request->method;
+ unless ($method eq 'GET' || $method eq 'HEAD') {
+ return HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST,
+ 'Library does not allow method ' .
+ "$method for 'cpan:' URLs");
+ }
+
+ my $path = $request->uri->path;
+ $path =~ s,^/,,;
+
+ my $response = HTTP::Response->new(&HTTP::Status::RC_FOUND);
+ $response->header("Location" => URI->new_abs($path, $CPAN));
+ $response;
+}
+
+1;
--- /dev/null
+package LWP::Protocol::data;
+
+# Implements access to data:-URLs as specified in RFC 2397
+
+use strict;
+use vars qw(@ISA);
+
+require HTTP::Response;
+require HTTP::Status;
+
+require LWP::Protocol;
+@ISA = qw(LWP::Protocol);
+
+use HTTP::Date qw(time2str);
+require LWP; # needs version number
+
+sub request
+{
+ my($self, $request, $proxy, $arg, $size) = @_;
+
+ # check proxy
+ if (defined $proxy)
+ {
+ return HTTP::Response->new( &HTTP::Status::RC_BAD_REQUEST,
+ 'You can not proxy with data');
+ }
+
+ # check method
+ my $method = $request->method;
+ unless ($method eq 'GET' || $method eq 'HEAD') {
+ return HTTP::Response->new( &HTTP::Status::RC_BAD_REQUEST,
+ 'Library does not allow method ' .
+ "$method for 'data:' URLs");
+ }
+
+ my $url = $request->uri;
+ my $response = HTTP::Response->new( &HTTP::Status::RC_OK, "Document follows");
+
+ my $media_type = $url->media_type;
+
+ my $data = $url->data;
+ $response->header('Content-Type' => $media_type,
+ 'Content-Length' => length($data),
+ 'Date' => time2str(time),
+ 'Server' => "libwww-perl-internal/$LWP::VERSION"
+ );
+
+ $data = "" if $method eq "HEAD";
+ return $self->collect_once($arg, $response, $data);
+}
+
+1;
--- /dev/null
+package LWP::Protocol::file;
+
+require LWP::Protocol;
+@ISA = qw(LWP::Protocol);
+
+use strict;
+
+require LWP::MediaTypes;
+require HTTP::Request;
+require HTTP::Response;
+require HTTP::Status;
+require HTTP::Date;
+
+
+sub request
+{
+ my($self, $request, $proxy, $arg, $size) = @_;
+
+ $size = 4096 unless defined $size and $size > 0;
+
+ # check proxy
+ if (defined $proxy)
+ {
+ return HTTP::Response->new( &HTTP::Status::RC_BAD_REQUEST,
+ 'You can not proxy through the filesystem');
+ }
+
+ # check method
+ my $method = $request->method;
+ unless ($method eq 'GET' || $method eq 'HEAD') {
+ return HTTP::Response->new( &HTTP::Status::RC_BAD_REQUEST,
+ 'Library does not allow method ' .
+ "$method for 'file:' URLs");
+ }
+
+ # check url
+ my $url = $request->uri;
+
+ my $scheme = $url->scheme;
+ if ($scheme ne 'file') {
+ return HTTP::Response->new( &HTTP::Status::RC_INTERNAL_SERVER_ERROR,
+ "LWP::Protocol::file::request called for '$scheme'");
+ }
+
+ # URL OK, look at file
+ my $path = $url->file;
+
+ # test file exists and is readable
+ unless (-e $path) {
+ return HTTP::Response->new( &HTTP::Status::RC_NOT_FOUND,
+ "File `$path' does not exist");
+ }
+ unless (-r _) {
+ return HTTP::Response->new( &HTTP::Status::RC_FORBIDDEN,
+ 'User does not have read permission');
+ }
+
+ # looks like file exists
+ my($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$filesize,
+ $atime,$mtime,$ctime,$blksize,$blocks)
+ = stat(_);
+
+ # XXX should check Accept headers?
+
+ # check if-modified-since
+ my $ims = $request->header('If-Modified-Since');
+ if (defined $ims) {
+ my $time = HTTP::Date::str2time($ims);
+ if (defined $time and $time >= $mtime) {
+ return HTTP::Response->new( &HTTP::Status::RC_NOT_MODIFIED,
+ "$method $path");
+ }
+ }
+
+ # Ok, should be an OK response by now...
+ my $response = HTTP::Response->new( &HTTP::Status::RC_OK );
+
+ # fill in response headers
+ $response->header('Last-Modified', HTTP::Date::time2str($mtime));
+
+ if (-d _) { # If the path is a directory, process it
+ # generate the HTML for directory
+ opendir(D, $path) or
+ return HTTP::Response->new( &HTTP::Status::RC_INTERNAL_SERVER_ERROR,
+ "Cannot read directory '$path': $!");
+ my(@files) = sort readdir(D);
+ closedir(D);
+
+ # Make directory listing
+ require URI::Escape;
+ require HTML::Entities;
+ my $pathe = $path . ( $^O eq 'MacOS' ? ':' : '/');
+ for (@files) {
+ my $furl = URI::Escape::uri_escape($_);
+ if ( -d "$pathe$_" ) {
+ $furl .= '/';
+ $_ .= '/';
+ }
+ my $desc = HTML::Entities::encode($_);
+ $_ = qq{<LI><A HREF="$furl">$desc</A>};
+ }
+ # Ensure that the base URL is "/" terminated
+ my $base = $url->clone;
+ unless ($base->path =~ m|/$|) {
+ $base->path($base->path . "/");
+ }
+ my $html = join("\n",
+ "<HTML>\n<HEAD>",
+ "<TITLE>Directory $path</TITLE>",
+ "<BASE HREF=\"$base\">",
+ "</HEAD>\n<BODY>",
+ "<H1>Directory listing of $path</H1>",
+ "<UL>", @files, "</UL>",
+ "</BODY>\n</HTML>\n");
+
+ $response->header('Content-Type', 'text/html');
+ $response->header('Content-Length', length $html);
+ $html = "" if $method eq "HEAD";
+
+ return $self->collect_once($arg, $response, $html);
+
+ }
+
+ # path is a regular file
+ $response->header('Content-Length', $filesize);
+ LWP::MediaTypes::guess_media_type($path, $response);
+
+ # read the file
+ if ($method ne "HEAD") {
+ open(F, $path) or return new
+ HTTP::Response(&HTTP::Status::RC_INTERNAL_SERVER_ERROR,
+ "Cannot read file '$path': $!");
+ binmode(F);
+ $response = $self->collect($arg, $response, sub {
+ my $content = "";
+ my $bytes = sysread(F, $content, $size);
+ return \$content if $bytes > 0;
+ return \ "";
+ });
+ close(F);
+ }
+
+ $response;
+}
+
+1;
--- /dev/null
+package LWP::Protocol::ftp;
+
+# Implementation of the ftp protocol (RFC 959). We let the Net::FTP
+# package do all the dirty work.
+
+use Carp ();
+
+use HTTP::Status ();
+use HTTP::Negotiate ();
+use HTTP::Response ();
+use LWP::MediaTypes ();
+use File::Listing ();
+
+require LWP::Protocol;
+@ISA = qw(LWP::Protocol);
+
+use strict;
+eval {
+ package LWP::Protocol::MyFTP;
+
+ require Net::FTP;
+ Net::FTP->require_version(2.00);
+
+ use vars qw(@ISA);
+ @ISA=qw(Net::FTP);
+
+ sub new {
+ my $class = shift;
+
+ my $self = $class->SUPER::new(@_) || return undef;
+
+ my $mess = $self->message; # welcome message
+ $mess =~ s|\n.*||s; # only first line left
+ $mess =~ s|\s*ready\.?$||;
+ # Make the version number more HTTP like
+ $mess =~ s|\s*\(Version\s*|/| and $mess =~ s|\)$||;
+ ${*$self}{myftp_server} = $mess;
+ #$response->header("Server", $mess);
+
+ $self;
+ }
+
+ sub http_server {
+ my $self = shift;
+ ${*$self}{myftp_server};
+ }
+
+ sub home {
+ my $self = shift;
+ my $old = ${*$self}{myftp_home};
+ if (@_) {
+ ${*$self}{myftp_home} = shift;
+ }
+ $old;
+ }
+
+ sub go_home {
+ my $self = shift;
+ $self->cwd(${*$self}{myftp_home});
+ }
+
+ sub request_count {
+ my $self = shift;
+ ++${*$self}{myftp_reqcount};
+ }
+
+ sub ping {
+ my $self = shift;
+ return $self->go_home;
+ }
+
+};
+my $init_failed = $@;
+
+
+sub _connect {
+ my($self, $host, $port, $user, $account, $password, $timeout) = @_;
+
+ my $key;
+ my $conn_cache = $self->{ua}{conn_cache};
+ if ($conn_cache) {
+ $key = "$host:$port:$user";
+ $key .= ":$account" if defined($account);
+ if (my $ftp = $conn_cache->withdraw("ftp", $key)) {
+ if ($ftp->ping) {
+ # save it again
+ $conn_cache->deposit("ftp", $key, $ftp);
+ return $ftp;
+ }
+ }
+ }
+
+ # try to make a connection
+ my $ftp = LWP::Protocol::MyFTP->new($host,
+ Port => $port,
+ Timeout => $timeout,
+ LocalAddr => $self->{ua}{local_address},
+ );
+ # XXX Should be some what to pass on 'Passive' (header??)
+ unless ($ftp) {
+ $@ =~ s/^Net::FTP: //;
+ return HTTP::Response->new(&HTTP::Status::RC_INTERNAL_SERVER_ERROR, $@);
+ }
+
+ unless ($ftp->login($user, $password, $account)) {
+ # Unauthorized. Let's fake a RC_UNAUTHORIZED response
+ my $mess = scalar($ftp->message);
+ $mess =~ s/\n$//;
+ my $res = HTTP::Response->new(&HTTP::Status::RC_UNAUTHORIZED, $mess);
+ $res->header("Server", $ftp->http_server);
+ $res->header("WWW-Authenticate", qq(Basic Realm="FTP login"));
+ return $res;
+ }
+
+ my $home = $ftp->pwd;
+ $ftp->home($home);
+
+ $conn_cache->deposit("ftp", $key, $ftp) if $conn_cache;
+
+ return $ftp;
+}
+
+
+sub request
+{
+ my($self, $request, $proxy, $arg, $size, $timeout) = @_;
+
+ $size = 4096 unless $size;
+
+ # check proxy
+ if (defined $proxy)
+ {
+ return HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST,
+ 'You can not proxy through the ftp');
+ }
+
+ my $url = $request->uri;
+ if ($url->scheme ne 'ftp') {
+ my $scheme = $url->scheme;
+ return HTTP::Response->new(&HTTP::Status::RC_INTERNAL_SERVER_ERROR,
+ "LWP::Protocol::ftp::request called for '$scheme'");
+ }
+
+ # check method
+ my $method = $request->method;
+
+ unless ($method eq 'GET' || $method eq 'HEAD' || $method eq 'PUT') {
+ return HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST,
+ 'Library does not allow method ' .
+ "$method for 'ftp:' URLs");
+ }
+
+ if ($init_failed) {
+ return HTTP::Response->new(&HTTP::Status::RC_INTERNAL_SERVER_ERROR,
+ $init_failed);
+ }
+
+ my $host = $url->host;
+ my $port = $url->port;
+ my $user = $url->user;
+ my $password = $url->password;
+
+ # If a basic autorization header is present than we prefer these over
+ # the username/password specified in the URL.
+ {
+ my($u,$p) = $request->authorization_basic;
+ if (defined $u) {
+ $user = $u;
+ $password = $p;
+ }
+ }
+
+ # We allow the account to be specified in the "Account" header
+ my $account = $request->header('Account');
+
+ my $ftp = $self->_connect($host, $port, $user, $account, $password, $timeout);
+ return $ftp if ref($ftp) eq "HTTP::Response"; # ugh!
+
+ # Create an initial response object
+ my $response = HTTP::Response->new(&HTTP::Status::RC_OK, "OK");
+ $response->header(Server => $ftp->http_server);
+ $response->header('Client-Request-Num' => $ftp->request_count);
+ $response->request($request);
+
+ # Get & fix the path
+ my @path = grep { length } $url->path_segments;
+ my $remote_file = pop(@path);
+ $remote_file = '' unless defined $remote_file;
+
+ my $type;
+ if (ref $remote_file) {
+ my @params;
+ ($remote_file, @params) = @$remote_file;
+ for (@params) {
+ $type = $_ if s/^type=//;
+ }
+ }
+
+ if ($type && $type eq 'a') {
+ $ftp->ascii;
+ }
+ else {
+ $ftp->binary;
+ }
+
+ for (@path) {
+ unless ($ftp->cwd($_)) {
+ return HTTP::Response->new(&HTTP::Status::RC_NOT_FOUND,
+ "Can't chdir to $_");
+ }
+ }
+
+ if ($method eq 'GET' || $method eq 'HEAD') {
+ if (my $mod_time = $ftp->mdtm($remote_file)) {
+ $response->last_modified($mod_time);
+ if (my $ims = $request->if_modified_since) {
+ if ($mod_time <= $ims) {
+ $response->code(&HTTP::Status::RC_NOT_MODIFIED);
+ $response->message("Not modified");
+ return $response;
+ }
+ }
+ }
+
+ # We'll use this later to abort the transfer if necessary.
+ # if $max_size is defined, we need to abort early. Otherwise, it's
+ # a normal transfer
+ my $max_size = undef;
+
+ # Set resume location, if the client requested it
+ if ($request->header('Range') && $ftp->supported('REST'))
+ {
+ my $range_info = $request->header('Range');
+
+ # Change bytes=2772992-6781209 to just 2772992
+ my ($start_byte,$end_byte) = $range_info =~ /.*=\s*(\d+)-(\d+)?/;
+ if ( defined $start_byte && !defined $end_byte ) {
+
+ # open range -- only the start is specified
+
+ $ftp->restart( $start_byte );
+ # don't define $max_size, we don't want to abort early
+ }
+ elsif ( defined $start_byte && defined $end_byte &&
+ $start_byte >= 0 && $end_byte >= $start_byte ) {
+
+ $ftp->restart( $start_byte );
+ $max_size = $end_byte - $start_byte;
+ }
+ else {
+
+ return HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST,
+ 'Incorrect syntax for Range request');
+ }
+ }
+ elsif ($request->header('Range') && !$ftp->supported('REST'))
+ {
+ return HTTP::Response->new(&HTTP::Status::RC_NOT_IMPLEMENTED,
+ "Server does not support resume.");
+ }
+
+ my $data; # the data handle
+ if (length($remote_file) and $data = $ftp->retr($remote_file)) {
+ my($type, @enc) = LWP::MediaTypes::guess_media_type($remote_file);
+ $response->header('Content-Type', $type) if $type;
+ for (@enc) {
+ $response->push_header('Content-Encoding', $_);
+ }
+ my $mess = $ftp->message;
+ if ($mess =~ /\((\d+)\s+bytes\)/) {
+ $response->header('Content-Length', "$1");
+ }
+
+ if ($method ne 'HEAD') {
+ # Read data from server
+ $response = $self->collect($arg, $response, sub {
+ my $content = '';
+ my $result = $data->read($content, $size);
+
+ # Stop early if we need to.
+ if (defined $max_size)
+ {
+ # We need an interface to Net::FTP::dataconn for getting
+ # the number of bytes already read
+ my $bytes_received = $data->bytes_read();
+
+ # We were already over the limit. (Should only happen
+ # once at the end.)
+ if ($bytes_received - length($content) > $max_size)
+ {
+ $content = '';
+ }
+ # We just went over the limit
+ elsif ($bytes_received > $max_size)
+ {
+ # Trim content
+ $content = substr($content, 0,
+ $max_size - ($bytes_received - length($content)) );
+ }
+ # We're under the limit
+ else
+ {
+ }
+ }
+
+ return \$content;
+ } );
+ }
+ # abort is needed for HEAD, it's == close if the transfer has
+ # already completed.
+ unless ($data->abort) {
+ # Something did not work too well. Note that we treat
+ # responses to abort() with code 0 in case of HEAD as ok
+ # (at least wu-ftpd 2.6.1(1) does that).
+ if ($method ne 'HEAD' || $ftp->code != 0) {
+ $response->code(&HTTP::Status::RC_INTERNAL_SERVER_ERROR);
+ $response->message("FTP close response: " . $ftp->code .
+ " " . $ftp->message);
+ }
+ }
+ }
+ elsif (!length($remote_file) || ( $ftp->code >= 400 && $ftp->code < 600 )) {
+ # not a plain file, try to list instead
+ if (length($remote_file) && !$ftp->cwd($remote_file)) {
+ return HTTP::Response->new(&HTTP::Status::RC_NOT_FOUND,
+ "File '$remote_file' not found");
+ }
+
+ # It should now be safe to try to list the directory
+ my @lsl = $ftp->dir;
+
+ # Try to figure out if the user want us to convert the
+ # directory listing to HTML.
+ my @variants =
+ (
+ ['html', 0.60, 'text/html' ],
+ ['dir', 1.00, 'text/ftp-dir-listing' ]
+ );
+ #$HTTP::Negotiate::DEBUG=1;
+ my $prefer = HTTP::Negotiate::choose(\@variants, $request);
+
+ my $content = '';
+
+ if (!defined($prefer)) {
+ return HTTP::Response->new(&HTTP::Status::RC_NOT_ACCEPTABLE,
+ "Neither HTML nor directory listing wanted");
+ }
+ elsif ($prefer eq 'html') {
+ $response->header('Content-Type' => 'text/html');
+ $content = "<HEAD><TITLE>File Listing</TITLE>\n";
+ my $base = $request->uri->clone;
+ my $path = $base->path;
+ $base->path("$path/") unless $path =~ m|/$|;
+ $content .= qq(<BASE HREF="$base">\n</HEAD>\n);
+ $content .= "<BODY>\n<UL>\n";
+ for (File::Listing::parse_dir(\@lsl, 'GMT')) {
+ my($name, $type, $size, $mtime, $mode) = @$_;
+ $content .= qq( <LI> <a href="$name">$name</a>);
+ $content .= " $size bytes" if $type eq 'f';
+ $content .= "\n";
+ }
+ $content .= "</UL></body>\n";
+ }
+ else {
+ $response->header('Content-Type', 'text/ftp-dir-listing');
+ $content = join("\n", @lsl, '');
+ }
+
+ $response->header('Content-Length', length($content));
+
+ if ($method ne 'HEAD') {
+ $response = $self->collect_once($arg, $response, $content);
+ }
+ }
+ else {
+ my $res = HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST,
+ "FTP return code " . $ftp->code);
+ $res->content_type("text/plain");
+ $res->content($ftp->message);
+ return $res;
+ }
+ }
+ elsif ($method eq 'PUT') {
+ # method must be PUT
+ unless (length($remote_file)) {
+ return HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST,
+ "Must have a file name to PUT to");
+ }
+ my $data;
+ if ($data = $ftp->stor($remote_file)) {
+ my $content = $request->content;
+ my $bytes = 0;
+ if (defined $content) {
+ if (ref($content) eq 'SCALAR') {
+ $bytes = $data->write($$content, length($$content));
+ }
+ elsif (ref($content) eq 'CODE') {
+ my($buf, $n);
+ while (length($buf = &$content)) {
+ $n = $data->write($buf, length($buf));
+ last unless $n;
+ $bytes += $n;
+ }
+ }
+ elsif (!ref($content)) {
+ if (defined $content && length($content)) {
+ $bytes = $data->write($content, length($content));
+ }
+ }
+ else {
+ die "Bad content";
+ }
+ }
+ $data->close;
+
+ $response->code(&HTTP::Status::RC_CREATED);
+ $response->header('Content-Type', 'text/plain');
+ $response->content("$bytes bytes stored as $remote_file on $host\n")
+
+ }
+ else {
+ my $res = HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST,
+ "FTP return code " . $ftp->code);
+ $res->content_type("text/plain");
+ $res->content($ftp->message);
+ return $res;
+ }
+ }
+ else {
+ return HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST,
+ "Illegal method $method");
+ }
+
+ $response;
+}
+
+1;
+
+__END__
+
+# This is what RFC 1738 has to say about FTP access:
+# --------------------------------------------------
+#
+# 3.2. FTP
+#
+# The FTP URL scheme is used to designate files and directories on
+# Internet hosts accessible using the FTP protocol (RFC959).
+#
+# A FTP URL follow the syntax described in Section 3.1. If :<port> is
+# omitted, the port defaults to 21.
+#
+# 3.2.1. FTP Name and Password
+#
+# A user name and password may be supplied; they are used in the ftp
+# "USER" and "PASS" commands after first making the connection to the
+# FTP server. If no user name or password is supplied and one is
+# requested by the FTP server, the conventions for "anonymous" FTP are
+# to be used, as follows:
+#
+# The user name "anonymous" is supplied.
+#
+# The password is supplied as the Internet e-mail address
+# of the end user accessing the resource.
+#
+# If the URL supplies a user name but no password, and the remote
+# server requests a password, the program interpreting the FTP URL
+# should request one from the user.
+#
+# 3.2.2. FTP url-path
+#
+# The url-path of a FTP URL has the following syntax:
+#
+# <cwd1>/<cwd2>/.../<cwdN>/<name>;type=<typecode>
+#
+# Where <cwd1> through <cwdN> and <name> are (possibly encoded) strings
+# and <typecode> is one of the characters "a", "i", or "d". The part
+# ";type=<typecode>" may be omitted. The <cwdx> and <name> parts may be
+# empty. The whole url-path may be omitted, including the "/"
+# delimiting it from the prefix containing user, password, host, and
+# port.
+#
+# The url-path is interpreted as a series of FTP commands as follows:
+#
+# Each of the <cwd> elements is to be supplied, sequentially, as the
+# argument to a CWD (change working directory) command.
+#
+# If the typecode is "d", perform a NLST (name list) command with
+# <name> as the argument, and interpret the results as a file
+# directory listing.
+#
+# Otherwise, perform a TYPE command with <typecode> as the argument,
+# and then access the file whose name is <name> (for example, using
+# the RETR command.)
+#
+# Within a name or CWD component, the characters "/" and ";" are
+# reserved and must be encoded. The components are decoded prior to
+# their use in the FTP protocol. In particular, if the appropriate FTP
+# sequence to access a particular file requires supplying a string
+# containing a "/" as an argument to a CWD or RETR command, it is
+# necessary to encode each "/".
+#
+# For example, the URL <URL:ftp://myname@host.dom/%2Fetc/motd> is
+# interpreted by FTP-ing to "host.dom", logging in as "myname"
+# (prompting for a password if it is asked for), and then executing
+# "CWD /etc" and then "RETR motd". This has a different meaning from
+# <URL:ftp://myname@host.dom/etc/motd> which would "CWD etc" and then
+# "RETR motd"; the initial "CWD" might be executed relative to the
+# default directory for "myname". On the other hand,
+# <URL:ftp://myname@host.dom//etc/motd>, would "CWD " with a null
+# argument, then "CWD etc", and then "RETR motd".
+#
+# FTP URLs may also be used for other operations; for example, it is
+# possible to update a file on a remote file server, or infer
+# information about it from the directory listings. The mechanism for
+# doing so is not spelled out here.
+#
+# 3.2.3. FTP Typecode is Optional
+#
+# The entire ;type=<typecode> part of a FTP URL is optional. If it is
+# omitted, the client program interpreting the URL must guess the
+# appropriate mode to use. In general, the data content type of a file
+# can only be guessed from the name, e.g., from the suffix of the name;
+# the appropriate type code to be used for transfer of the file can
+# then be deduced from the data content of the file.
+#
+# 3.2.4 Hierarchy
+#
+# For some file systems, the "/" used to denote the hierarchical
+# structure of the URL corresponds to the delimiter used to construct a
+# file name hierarchy, and thus, the filename will look similar to the
+# URL path. This does NOT mean that the URL is a Unix filename.
+#
+# 3.2.5. Optimization
+#
+# Clients accessing resources via FTP may employ additional heuristics
+# to optimize the interaction. For some FTP servers, for example, it
+# may be reasonable to keep the control connection open while accessing
+# multiple URLs from the same server. However, there is no common
+# hierarchical model to the FTP protocol, so if a directory change
+# command has been given, it is impossible in general to deduce what
+# sequence should be given to navigate to another directory for a
+# second retrieval, if the paths are different. The only reliable
+# algorithm is to disconnect and reestablish the control connection.
--- /dev/null
+package LWP::Protocol::gopher;
+
+# Implementation of the gopher protocol (RFC 1436)
+#
+# This code is based on 'wwwgopher.pl,v 0.10 1994/10/17 18:12:34 shelden'
+# which in turn is a vastly modified version of Oscar's http'get()
+# dated 28/3/94 in <ftp://cui.unige.ch/PUBLIC/oscar/scripts/http.pl>
+# including contributions from Marc van Heyningen and Martijn Koster.
+
+use strict;
+use vars qw(@ISA);
+
+require HTTP::Response;
+require HTTP::Status;
+require IO::Socket;
+require IO::Select;
+
+require LWP::Protocol;
+@ISA = qw(LWP::Protocol);
+
+
+my %gopher2mimetype = (
+ '0' => 'text/plain', # 0 file
+ '1' => 'text/html', # 1 menu
+ # 2 CSO phone-book server
+ # 3 Error
+ '4' => 'application/mac-binhex40', # 4 BinHexed Macintosh file
+ '5' => 'application/zip', # 5 DOS binary archive of some sort
+ '6' => 'application/octet-stream', # 6 UNIX uuencoded file.
+ '7' => 'text/html', # 7 Index-Search server
+ # 8 telnet session
+ '9' => 'application/octet-stream', # 9 binary file
+ 'h' => 'text/html', # html
+ 'g' => 'image/gif', # gif
+ 'I' => 'image/*', # some kind of image
+);
+
+my %gopher2encoding = (
+ '6' => 'x_uuencode', # 6 UNIX uuencoded file.
+);
+
+sub request
+{
+ my($self, $request, $proxy, $arg, $size, $timeout) = @_;
+
+ $size = 4096 unless $size;
+
+ # check proxy
+ if (defined $proxy) {
+ return HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST,
+ 'You can not proxy through the gopher');
+ }
+
+ my $url = $request->uri;
+ die "bad scheme" if $url->scheme ne 'gopher';
+
+
+ my $method = $request->method;
+ unless ($method eq 'GET' || $method eq 'HEAD') {
+ return HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST,
+ 'Library does not allow method ' .
+ "$method for 'gopher:' URLs");
+ }
+
+ my $gophertype = $url->gopher_type;
+ unless (exists $gopher2mimetype{$gophertype}) {
+ return HTTP::Response->new(&HTTP::Status::RC_NOT_IMPLEMENTED,
+ 'Library does not support gophertype ' .
+ $gophertype);
+ }
+
+ my $response = HTTP::Response->new(&HTTP::Status::RC_OK, "OK");
+ $response->header('Content-type' => $gopher2mimetype{$gophertype}
+ || 'text/plain');
+ $response->header('Content-Encoding' => $gopher2encoding{$gophertype})
+ if exists $gopher2encoding{$gophertype};
+
+ if ($method eq 'HEAD') {
+ # XXX: don't even try it so we set this header
+ $response->header('Client-Warning' => 'Client answer only');
+ return $response;
+ }
+
+ if ($gophertype eq '7' && ! $url->search) {
+ # the url is the prompt for a gopher search; supply boiler-plate
+ return $self->collect_once($arg, $response, <<"EOT");
+<HEAD>
+<TITLE>Gopher Index</TITLE>
+<ISINDEX>
+</HEAD>
+<BODY>
+<H1>$url<BR>Gopher Search</H1>
+This is a searchable Gopher index.
+Use the search function of your browser to enter search terms.
+</BODY>
+EOT
+ }
+
+ my $host = $url->host;
+ my $port = $url->port;
+
+ my $requestLine = "";
+
+ my $selector = $url->selector;
+ if (defined $selector) {
+ $requestLine .= $selector;
+ my $search = $url->search;
+ if (defined $search) {
+ $requestLine .= "\t$search";
+ my $string = $url->string;
+ if (defined $string) {
+ $requestLine .= "\t$string";
+ }
+ }
+ }
+ $requestLine .= "\015\012";
+
+ # potential request headers are just ignored
+
+ # Ok, lets make the request
+ my $socket = IO::Socket::INET->new(PeerAddr => $host,
+ PeerPort => $port,
+ LocalAddr => $self->{ua}{local_address},
+ Proto => 'tcp',
+ Timeout => $timeout);
+ die "Can't connect to $host:$port" unless $socket;
+ my $sel = IO::Select->new($socket);
+
+ {
+ die "write timeout" if $timeout && !$sel->can_write($timeout);
+ my $n = syswrite($socket, $requestLine, length($requestLine));
+ die $! unless defined($n);
+ die "short write" if $n != length($requestLine);
+ }
+
+ my $user_arg = $arg;
+
+ # must handle menus in a special way since they are to be
+ # converted to HTML. Undefing $arg ensures that the user does
+ # not see the data before we get a change to convert it.
+ $arg = undef if $gophertype eq '1' || $gophertype eq '7';
+
+ # collect response
+ my $buf = '';
+ $response = $self->collect($arg, $response, sub {
+ die "read timeout" if $timeout && !$sel->can_read($timeout);
+ my $n = sysread($socket, $buf, $size);
+ die $! unless defined($n);
+ return \$buf;
+ } );
+
+ # Convert menu to HTML and return data to user.
+ if ($gophertype eq '1' || $gophertype eq '7') {
+ my $content = menu2html($response->content);
+ if (defined $user_arg) {
+ $response = $self->collect_once($user_arg, $response, $content);
+ }
+ else {
+ $response->content($content);
+ }
+ }
+
+ $response;
+}
+
+
+sub gopher2url
+{
+ my($gophertype, $path, $host, $port) = @_;
+
+ my $url;
+
+ if ($gophertype eq '8' || $gophertype eq 'T') {
+ # telnet session
+ $url = $HTTP::URI_CLASS->new($gophertype eq '8' ? 'telnet:':'tn3270:');
+ $url->user($path) if defined $path;
+ }
+ else {
+ $path = URI::Escape::uri_escape($path);
+ $url = $HTTP::URI_CLASS->new("gopher:/$gophertype$path");
+ }
+ $url->host($host);
+ $url->port($port);
+ $url;
+}
+
+sub menu2html {
+ my($menu) = @_;
+
+ $menu =~ s/\015//g; # remove carriage return
+ my $tmp = <<"EOT";
+<HTML>
+<HEAD>
+ <TITLE>Gopher menu</TITLE>
+</HEAD>
+<BODY>
+<H1>Gopher menu</H1>
+EOT
+ for (split("\n", $menu)) {
+ last if /^\./;
+ my($pretty, $path, $host, $port) = split("\t");
+
+ $pretty =~ s/^(.)//;
+ my $type = $1;
+
+ my $url = gopher2url($type, $path, $host, $port)->as_string;
+ $tmp .= qq{<A HREF="$url">$pretty</A><BR>\n};
+ }
+ $tmp .= "</BODY>\n</HTML>\n";
+ $tmp;
+}
+
+1;
--- /dev/null
+package LWP::Protocol::http;
+
+use strict;
+
+require HTTP::Response;
+require HTTP::Status;
+require Net::HTTP;
+
+use vars qw(@ISA @EXTRA_SOCK_OPTS);
+
+require LWP::Protocol;
+@ISA = qw(LWP::Protocol);
+
+my $CRLF = "\015\012";
+
+sub _new_socket
+{
+ my($self, $host, $port, $timeout) = @_;
+ my $conn_cache = $self->{ua}{conn_cache};
+ if ($conn_cache) {
+ if (my $sock = $conn_cache->withdraw($self->socket_type, "$host:$port")) {
+ return $sock if $sock && !$sock->can_read(0);
+ # if the socket is readable, then either the peer has closed the
+ # connection or there are some garbage bytes on it. In either
+ # case we abandon it.
+ $sock->close;
+ }
+ }
+
+ local($^W) = 0; # IO::Socket::INET can be noisy
+ my $sock = $self->socket_class->new(PeerAddr => $host,
+ PeerPort => $port,
+ LocalAddr => $self->{ua}{local_address},
+ Proto => 'tcp',
+ Timeout => $timeout,
+ KeepAlive => !!$conn_cache,
+ SendTE => 1,
+ $self->_extra_sock_opts($host, $port),
+ );
+
+ unless ($sock) {
+ # IO::Socket::INET leaves additional error messages in $@
+ my $status = "Can't connect to $host:$port";
+ if ($@ =~ /\bconnect: (.*)/ ||
+ $@ =~ /\b(Bad hostname)\b/ ||
+ $@ =~ /\b(certificate verify failed)\b/ ||
+ $@ =~ /\b(Crypt-SSLeay can't verify hostnames)\b/
+ ) {
+ $status .= " ($1)";
+ }
+ die "$status\n\n$@";
+ }
+
+ # perl 5.005's IO::Socket does not have the blocking method.
+ eval { $sock->blocking(0); };
+
+ $sock;
+}
+
+sub socket_type
+{
+ return "http";
+}
+
+sub socket_class
+{
+ my $self = shift;
+ (ref($self) || $self) . "::Socket";
+}
+
+sub _extra_sock_opts # to be overridden by subclass
+{
+ return @EXTRA_SOCK_OPTS;
+}
+
+sub _check_sock
+{
+ #my($self, $req, $sock) = @_;
+}
+
+sub _get_sock_info
+{
+ my($self, $res, $sock) = @_;
+ if (defined(my $peerhost = $sock->peerhost)) {
+ $res->header("Client-Peer" => "$peerhost:" . $sock->peerport);
+ }
+}
+
+sub _fixup_header
+{
+ my($self, $h, $url, $proxy) = @_;
+
+ # Extract 'Host' header
+ my $hhost = $url->authority;
+ if ($hhost =~ s/^([^\@]*)\@//) { # get rid of potential "user:pass@"
+ # add authorization header if we need them. HTTP URLs do
+ # not really support specification of user and password, but
+ # we allow it.
+ if (defined($1) && not $h->header('Authorization')) {
+ require URI::Escape;
+ $h->authorization_basic(map URI::Escape::uri_unescape($_),
+ split(":", $1, 2));
+ }
+ }
+ $h->init_header('Host' => $hhost);
+
+ if ($proxy) {
+ # Check the proxy URI's userinfo() for proxy credentials
+ # export http_proxy="http://proxyuser:proxypass@proxyhost:port"
+ my $p_auth = $proxy->userinfo();
+ if(defined $p_auth) {
+ require URI::Escape;
+ $h->proxy_authorization_basic(map URI::Escape::uri_unescape($_),
+ split(":", $p_auth, 2))
+ }
+ }
+}
+
+sub hlist_remove {
+ my($hlist, $k) = @_;
+ $k = lc $k;
+ for (my $i = @$hlist - 2; $i >= 0; $i -= 2) {
+ next unless lc($hlist->[$i]) eq $k;
+ splice(@$hlist, $i, 2);
+ }
+}
+
+sub request
+{
+ my($self, $request, $proxy, $arg, $size, $timeout) = @_;
+
+ $size ||= 4096;
+
+ # check method
+ my $method = $request->method;
+ unless ($method =~ /^[A-Za-z0-9_!\#\$%&\'*+\-.^\`|~]+$/) { # HTTP token
+ return HTTP::Response->new( &HTTP::Status::RC_BAD_REQUEST,
+ 'Library does not allow method ' .
+ "$method for 'http:' URLs");
+ }
+
+ my $url = $request->uri;
+ my($host, $port, $fullpath);
+
+ # Check if we're proxy'ing
+ if (defined $proxy) {
+ # $proxy is an URL to an HTTP server which will proxy this request
+ $host = $proxy->host;
+ $port = $proxy->port;
+ $fullpath = $method eq "CONNECT" ?
+ ($url->host . ":" . $url->port) :
+ $url->as_string;
+ }
+ else {
+ $host = $url->host;
+ $port = $url->port;
+ $fullpath = $url->path_query;
+ $fullpath = "/$fullpath" unless $fullpath =~ m,^/,;
+ }
+
+ # connect to remote site
+ my $socket = $self->_new_socket($host, $port, $timeout);
+
+ my $http_version = "";
+ if (my $proto = $request->protocol) {
+ if ($proto =~ /^(?:HTTP\/)?(1.\d+)$/) {
+ $http_version = $1;
+ $socket->http_version($http_version);
+ $socket->send_te(0) if $http_version eq "1.0";
+ }
+ }
+
+ $self->_check_sock($request, $socket);
+
+ my @h;
+ my $request_headers = $request->headers->clone;
+ $self->_fixup_header($request_headers, $url, $proxy);
+
+ $request_headers->scan(sub {
+ my($k, $v) = @_;
+ $k =~ s/^://;
+ $v =~ s/\n/ /g;
+ push(@h, $k, $v);
+ });
+
+ my $content_ref = $request->content_ref;
+ $content_ref = $$content_ref if ref($$content_ref);
+ my $chunked;
+ my $has_content;
+
+ if (ref($content_ref) eq 'CODE') {
+ my $clen = $request_headers->header('Content-Length');
+ $has_content++ if $clen;
+ unless (defined $clen) {
+ push(@h, "Transfer-Encoding" => "chunked");
+ $has_content++;
+ $chunked++;
+ }
+ }
+ else {
+ # Set (or override) Content-Length header
+ my $clen = $request_headers->header('Content-Length');
+ if (defined($$content_ref) && length($$content_ref)) {
+ $has_content = length($$content_ref);
+ if (!defined($clen) || $clen ne $has_content) {
+ if (defined $clen) {
+ warn "Content-Length header value was wrong, fixed";
+ hlist_remove(\@h, 'Content-Length');
+ }
+ push(@h, 'Content-Length' => $has_content);
+ }
+ }
+ elsif ($clen) {
+ warn "Content-Length set when there is no content, fixed";
+ hlist_remove(\@h, 'Content-Length');
+ }
+ }
+
+ my $write_wait = 0;
+ $write_wait = 2
+ if ($request_headers->header("Expect") || "") =~ /100-continue/;
+
+ my $req_buf = $socket->format_request($method, $fullpath, @h);
+ #print "------\n$req_buf\n------\n";
+
+ if (!$has_content || $write_wait || $has_content > 8*1024) {
+ WRITE:
+ {
+ # Since this just writes out the header block it should almost
+ # always succeed to send the whole buffer in a single write call.
+ my $n = $socket->syswrite($req_buf, length($req_buf));
+ unless (defined $n) {
+ redo WRITE if $!{EINTR};
+ if ($!{EAGAIN}) {
+ select(undef, undef, undef, 0.1);
+ redo WRITE;
+ }
+ die "write failed: $!";
+ }
+ if ($n) {
+ substr($req_buf, 0, $n, "");
+ }
+ else {
+ select(undef, undef, undef, 0.5);
+ }
+ redo WRITE if length $req_buf;
+ }
+ }
+
+ my($code, $mess, @junk);
+ my $drop_connection;
+
+ if ($has_content) {
+ my $eof;
+ my $wbuf;
+ my $woffset = 0;
+ INITIAL_READ:
+ if ($write_wait) {
+ # skip filling $wbuf when waiting for 100-continue
+ # because if the response is a redirect or auth required
+ # the request will be cloned and there is no way
+ # to reset the input stream
+ # return here via the label after the 100-continue is read
+ }
+ elsif (ref($content_ref) eq 'CODE') {
+ my $buf = &$content_ref();
+ $buf = "" unless defined($buf);
+ $buf = sprintf "%x%s%s%s", length($buf), $CRLF, $buf, $CRLF
+ if $chunked;
+ substr($buf, 0, 0) = $req_buf if $req_buf;
+ $wbuf = \$buf;
+ }
+ else {
+ if ($req_buf) {
+ my $buf = $req_buf . $$content_ref;
+ $wbuf = \$buf;
+ }
+ else {
+ $wbuf = $content_ref;
+ }
+ $eof = 1;
+ }
+
+ my $fbits = '';
+ vec($fbits, fileno($socket), 1) = 1;
+
+ WRITE:
+ while ($write_wait || $woffset < length($$wbuf)) {
+
+ my $sel_timeout = $timeout;
+ if ($write_wait) {
+ $sel_timeout = $write_wait if $write_wait < $sel_timeout;
+ }
+ my $time_before;
+ $time_before = time if $sel_timeout;
+
+ my $rbits = $fbits;
+ my $wbits = $write_wait ? undef : $fbits;
+ my $sel_timeout_before = $sel_timeout;
+ SELECT:
+ {
+ my $nfound = select($rbits, $wbits, undef, $sel_timeout);
+ if ($nfound < 0) {
+ if ($!{EINTR} || $!{EAGAIN}) {
+ if ($time_before) {
+ $sel_timeout = $sel_timeout_before - (time - $time_before);
+ $sel_timeout = 0 if $sel_timeout < 0;
+ }
+ redo SELECT;
+ }
+ die "select failed: $!";
+ }
+ }
+
+ if ($write_wait) {
+ $write_wait -= time - $time_before;
+ $write_wait = 0 if $write_wait < 0;
+ }
+
+ if (defined($rbits) && $rbits =~ /[^\0]/) {
+ # readable
+ my $buf = $socket->_rbuf;
+ my $n = $socket->sysread($buf, 1024, length($buf));
+ unless (defined $n) {
+ die "read failed: $!" unless $!{EINTR} || $!{EAGAIN};
+ # if we get here the rest of the block will do nothing
+ # and we will retry the read on the next round
+ }
+ elsif ($n == 0) {
+ # the server closed the connection before we finished
+ # writing all the request content. No need to write any more.
+ $drop_connection++;
+ last WRITE;
+ }
+ $socket->_rbuf($buf);
+ if (!$code && $buf =~ /\015?\012\015?\012/) {
+ # a whole response header is present, so we can read it without blocking
+ ($code, $mess, @h) = $socket->read_response_headers(laxed => 1,
+ junk_out => \@junk,
+ );
+ if ($code eq "100") {
+ $write_wait = 0;
+ undef($code);
+ goto INITIAL_READ;
+ }
+ else {
+ $drop_connection++;
+ last WRITE;
+ # XXX should perhaps try to abort write in a nice way too
+ }
+ }
+ }
+ if (defined($wbits) && $wbits =~ /[^\0]/) {
+ my $n = $socket->syswrite($$wbuf, length($$wbuf), $woffset);
+ unless (defined $n) {
+ die "write failed: $!" unless $!{EINTR} || $!{EAGAIN};
+ $n = 0; # will retry write on the next round
+ }
+ elsif ($n == 0) {
+ die "write failed: no bytes written";
+ }
+ $woffset += $n;
+
+ if (!$eof && $woffset >= length($$wbuf)) {
+ # need to refill buffer from $content_ref code
+ my $buf = &$content_ref();
+ $buf = "" unless defined($buf);
+ $eof++ unless length($buf);
+ $buf = sprintf "%x%s%s%s", length($buf), $CRLF, $buf, $CRLF
+ if $chunked;
+ $wbuf = \$buf;
+ $woffset = 0;
+ }
+ }
+ } # WRITE
+ }
+
+ ($code, $mess, @h) = $socket->read_response_headers(laxed => 1, junk_out => \@junk)
+ unless $code;
+ ($code, $mess, @h) = $socket->read_response_headers(laxed => 1, junk_out => \@junk)
+ if $code eq "100";
+
+ my $response = HTTP::Response->new($code, $mess);
+ my $peer_http_version = $socket->peer_http_version;
+ $response->protocol("HTTP/$peer_http_version");
+ {
+ local $HTTP::Headers::TRANSLATE_UNDERSCORE;
+ $response->push_header(@h);
+ }
+ $response->push_header("Client-Junk" => \@junk) if @junk;
+
+ $response->request($request);
+ $self->_get_sock_info($response, $socket);
+
+ if ($method eq "CONNECT") {
+ $response->{client_socket} = $socket; # so it can be picked up
+ return $response;
+ }
+
+ if (my @te = $response->remove_header('Transfer-Encoding')) {
+ $response->push_header('Client-Transfer-Encoding', \@te);
+ }
+ $response->push_header('Client-Response-Num', scalar $socket->increment_response_count);
+
+ my $complete;
+ $response = $self->collect($arg, $response, sub {
+ my $buf = ""; #prevent use of uninitialized value in SSLeay.xs
+ my $n;
+ READ:
+ {
+ $n = $socket->read_entity_body($buf, $size);
+ unless (defined $n) {
+ redo READ if $!{EINTR} || $!{EAGAIN};
+ die "read failed: $!";
+ }
+ redo READ if $n == -1;
+ }
+ $complete++ if !$n;
+ return \$buf;
+ } );
+ $drop_connection++ unless $complete;
+
+ @h = $socket->get_trailers;
+ if (@h) {
+ local $HTTP::Headers::TRANSLATE_UNDERSCORE;
+ $response->push_header(@h);
+ }
+
+ # keep-alive support
+ unless ($drop_connection) {
+ if (my $conn_cache = $self->{ua}{conn_cache}) {
+ my %connection = map { (lc($_) => 1) }
+ split(/\s*,\s*/, ($response->header("Connection") || ""));
+ if (($peer_http_version eq "1.1" && !$connection{close}) ||
+ $connection{"keep-alive"})
+ {
+ $conn_cache->deposit($self->socket_type, "$host:$port", $socket);
+ }
+ }
+ }
+
+ $response;
+}
+
+
+#-----------------------------------------------------------
+package LWP::Protocol::http::SocketMethods;
+
+sub sysread {
+ my $self = shift;
+ if (my $timeout = ${*$self}{io_socket_timeout}) {
+ die "read timeout" unless $self->can_read($timeout);
+ }
+ else {
+ # since we have made the socket non-blocking we
+ # use select to wait for some data to arrive
+ $self->can_read(undef) || die "Assert";
+ }
+ sysread($self, $_[0], $_[1], $_[2] || 0);
+}
+
+sub can_read {
+ my($self, $timeout) = @_;
+ my $fbits = '';
+ vec($fbits, fileno($self), 1) = 1;
+ SELECT:
+ {
+ my $before;
+ $before = time if $timeout;
+ my $nfound = select($fbits, undef, undef, $timeout);
+ if ($nfound < 0) {
+ if ($!{EINTR} || $!{EAGAIN}) {
+ # don't really think EAGAIN can happen here
+ if ($timeout) {
+ $timeout -= time - $before;
+ $timeout = 0 if $timeout < 0;
+ }
+ redo SELECT;
+ }
+ die "select failed: $!";
+ }
+ return $nfound > 0;
+ }
+}
+
+sub ping {
+ my $self = shift;
+ !$self->can_read(0);
+}
+
+sub increment_response_count {
+ my $self = shift;
+ return ++${*$self}{'myhttp_response_count'};
+}
+
+#-----------------------------------------------------------
+package LWP::Protocol::http::Socket;
+use vars qw(@ISA);
+@ISA = qw(LWP::Protocol::http::SocketMethods Net::HTTP);
+
+1;
--- /dev/null
+package LWP::Protocol::loopback;
+
+use strict;
+use vars qw(@ISA);
+require HTTP::Response;
+
+require LWP::Protocol;
+@ISA = qw(LWP::Protocol);
+
+sub request {
+ my($self, $request, $proxy, $arg, $size, $timeout) = @_;
+
+ my $response = HTTP::Response->new(200, "OK");
+ $response->content_type("message/http; msgtype=request");
+
+ $response->header("Via", "loopback/1.0 $proxy")
+ if $proxy;
+
+ $response->header("X-Arg", $arg);
+ $response->header("X-Read-Size", $size);
+ $response->header("X-Timeout", $timeout);
+
+ return $self->collect_once($arg, $response, $request->as_string);
+}
+
+1;
--- /dev/null
+package LWP::Protocol::mailto;
+
+# This module implements the mailto protocol. It is just a simple
+# frontend to the Unix sendmail program except on MacOS, where it uses
+# Mail::Internet.
+
+require LWP::Protocol;
+require HTTP::Request;
+require HTTP::Response;
+require HTTP::Status;
+
+use Carp;
+use strict;
+use vars qw(@ISA $SENDMAIL);
+
+@ISA = qw(LWP::Protocol);
+
+unless ($SENDMAIL = $ENV{SENDMAIL}) {
+ for my $sm (qw(/usr/sbin/sendmail
+ /usr/lib/sendmail
+ /usr/ucblib/sendmail
+ ))
+ {
+ if (-x $sm) {
+ $SENDMAIL = $sm;
+ last;
+ }
+ }
+ die "Can't find the 'sendmail' program" unless $SENDMAIL;
+}
+
+sub request
+{
+ my($self, $request, $proxy, $arg, $size) = @_;
+
+ my ($mail, $addr) if $^O eq "MacOS";
+ my @text = () if $^O eq "MacOS";
+
+ # check proxy
+ if (defined $proxy)
+ {
+ return HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST,
+ 'You can not proxy with mail');
+ }
+
+ # check method
+ my $method = $request->method;
+
+ if ($method ne 'POST') {
+ return HTTP::Response->new( &HTTP::Status::RC_BAD_REQUEST,
+ 'Library does not allow method ' .
+ "$method for 'mailto:' URLs");
+ }
+
+ # check url
+ my $url = $request->uri;
+
+ my $scheme = $url->scheme;
+ if ($scheme ne 'mailto') {
+ return HTTP::Response->new( &HTTP::Status::RC_INTERNAL_SERVER_ERROR,
+ "LWP::Protocol::mailto::request called for '$scheme'");
+ }
+ if ($^O eq "MacOS") {
+ eval {
+ require Mail::Internet;
+ };
+ if($@) {
+ return HTTP::Response->new( &HTTP::Status::RC_INTERNAL_SERVER_ERROR,
+ "You don't have MailTools installed");
+ }
+ unless ($ENV{SMTPHOSTS}) {
+ return HTTP::Response->new( &HTTP::Status::RC_INTERNAL_SERVER_ERROR,
+ "You don't have SMTPHOSTS defined");
+ }
+ }
+ else {
+ unless (-x $SENDMAIL) {
+ return HTTP::Response->new( &HTTP::Status::RC_INTERNAL_SERVER_ERROR,
+ "You don't have $SENDMAIL");
+ }
+ }
+ if ($^O eq "MacOS") {
+ $mail = Mail::Internet->new or
+ return HTTP::Response->new( &HTTP::Status::RC_INTERNAL_SERVER_ERROR,
+ "Can't get a Mail::Internet object");
+ }
+ else {
+ open(SENDMAIL, "| $SENDMAIL -oi -t") or
+ return HTTP::Response->new( &HTTP::Status::RC_INTERNAL_SERVER_ERROR,
+ "Can't run $SENDMAIL: $!");
+ }
+ if ($^O eq "MacOS") {
+ $addr = $url->encoded822addr;
+ }
+ else {
+ $request = $request->clone; # we modify a copy
+ my @h = $url->headers; # URL headers override those in the request
+ while (@h) {
+ my $k = shift @h;
+ my $v = shift @h;
+ next unless defined $v;
+ if (lc($k) eq "body") {
+ $request->content($v);
+ }
+ else {
+ $request->push_header($k => $v);
+ }
+ }
+ }
+ if ($^O eq "MacOS") {
+ $mail->add(To => $addr);
+ $mail->add(split(/[:\n]/,$request->headers_as_string));
+ }
+ else {
+ print SENDMAIL $request->headers_as_string;
+ print SENDMAIL "\n";
+ }
+ my $content = $request->content;
+ if (defined $content) {
+ my $contRef = ref($content) ? $content : \$content;
+ if (ref($contRef) eq 'SCALAR') {
+ if ($^O eq "MacOS") {
+ @text = split("\n",$$contRef);
+ foreach (@text) {
+ $_ .= "\n";
+ }
+ }
+ else {
+ print SENDMAIL $$contRef;
+ }
+
+ }
+ elsif (ref($contRef) eq 'CODE') {
+ # Callback provides data
+ my $d;
+ if ($^O eq "MacOS") {
+ my $stuff = "";
+ while (length($d = &$contRef)) {
+ $stuff .= $d;
+ }
+ @text = split("\n",$stuff);
+ foreach (@text) {
+ $_ .= "\n";
+ }
+ }
+ else {
+ print SENDMAIL $d;
+ }
+ }
+ }
+ if ($^O eq "MacOS") {
+ $mail->body(\@text);
+ unless ($mail->smtpsend) {
+ return HTTP::Response->new(&HTTP::Status::RC_INTERNAL_SERVER_ERROR,
+ "Mail::Internet->smtpsend unable to send message to <$addr>");
+ }
+ }
+ else {
+ unless (close(SENDMAIL)) {
+ my $err = $! ? "$!" : "Exit status $?";
+ return HTTP::Response->new(&HTTP::Status::RC_INTERNAL_SERVER_ERROR,
+ "$SENDMAIL: $err");
+ }
+ }
+
+
+ my $response = HTTP::Response->new(&HTTP::Status::RC_ACCEPTED,
+ "Mail accepted");
+ $response->header('Content-Type', 'text/plain');
+ if ($^O eq "MacOS") {
+ $response->header('Server' => "Mail::Internet $Mail::Internet::VERSION");
+ $response->content("Message sent to <$addr>\n");
+ }
+ else {
+ $response->header('Server' => $SENDMAIL);
+ my $to = $request->header("To");
+ $response->content("Message sent to <$to>\n");
+ }
+
+ return $response;
+}
+
+1;
--- /dev/null
+package LWP::Protocol::nntp;
+
+# Implementation of the Network News Transfer Protocol (RFC 977)
+
+require LWP::Protocol;
+@ISA = qw(LWP::Protocol);
+
+require HTTP::Response;
+require HTTP::Status;
+require Net::NNTP;
+
+use strict;
+
+
+sub request
+{
+ my($self, $request, $proxy, $arg, $size, $timeout) = @_;
+
+ $size = 4096 unless $size;
+
+ # Check for proxy
+ if (defined $proxy) {
+ return HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST,
+ 'You can not proxy through NNTP');
+ }
+
+ # Check that the scheme is as expected
+ my $url = $request->uri;
+ my $scheme = $url->scheme;
+ unless ($scheme eq 'news' || $scheme eq 'nntp') {
+ return HTTP::Response->new(&HTTP::Status::RC_INTERNAL_SERVER_ERROR,
+ "LWP::Protocol::nntp::request called for '$scheme'");
+ }
+
+ # check for a valid method
+ my $method = $request->method;
+ unless ($method eq 'GET' || $method eq 'HEAD' || $method eq 'POST') {
+ return HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST,
+ 'Library does not allow method ' .
+ "$method for '$scheme:' URLs");
+ }
+
+ # extract the identifier and check against posting to an article
+ my $groupart = $url->_group;
+ my $is_art = $groupart =~ /@/;
+
+ if ($is_art && $method eq 'POST') {
+ return HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST,
+ "Can't post to an article <$groupart>");
+ }
+
+ my $nntp = Net::NNTP->new($url->host,
+ #Port => 18574,
+ Timeout => $timeout,
+ #Debug => 1,
+ );
+ die "Can't connect to nntp server" unless $nntp;
+
+ # Check the initial welcome message from the NNTP server
+ if ($nntp->status != 2) {
+ return HTTP::Response->new(&HTTP::Status::RC_SERVICE_UNAVAILABLE,
+ $nntp->message);
+ }
+ my $response = HTTP::Response->new(&HTTP::Status::RC_OK, "OK");
+
+ my $mess = $nntp->message;
+
+ # Try to extract server name from greeting message.
+ # Don't know if this works well for a large class of servers, but
+ # this works for our server.
+ $mess =~ s/\s+ready\b.*//;
+ $mess =~ s/^\S+\s+//;
+ $response->header(Server => $mess);
+
+ # First we handle posting of articles
+ if ($method eq 'POST') {
+ $nntp->quit; $nntp = undef;
+ $response->code(&HTTP::Status::RC_NOT_IMPLEMENTED);
+ $response->message("POST not implemented yet");
+ return $response;
+ }
+
+ # The method must be "GET" or "HEAD" by now
+ if (!$is_art) {
+ if (!$nntp->group($groupart)) {
+ $response->code(&HTTP::Status::RC_NOT_FOUND);
+ $response->message($nntp->message);
+ }
+ $nntp->quit; $nntp = undef;
+ # HEAD: just check if the group exists
+ if ($method eq 'GET' && $response->is_success) {
+ $response->code(&HTTP::Status::RC_NOT_IMPLEMENTED);
+ $response->message("GET newsgroup not implemented yet");
+ }
+ return $response;
+ }
+
+ # Send command to server to retrieve an article (or just the headers)
+ my $get = $method eq 'HEAD' ? "head" : "article";
+ my $art = $nntp->$get("<$groupart>");
+ unless ($art) {
+ $nntp->quit; $nntp = undef;
+ $response->code(&HTTP::Status::RC_NOT_FOUND);
+ $response->message($nntp->message);
+ return $response;
+ }
+
+ # Parse headers
+ my($key, $val);
+ local $_;
+ while ($_ = shift @$art) {
+ if (/^\s+$/) {
+ last; # end of headers
+ }
+ elsif (/^(\S+):\s*(.*)/) {
+ $response->push_header($key, $val) if $key;
+ ($key, $val) = ($1, $2);
+ }
+ elsif (/^\s+(.*)/) {
+ next unless $key;
+ $val .= $1;
+ }
+ else {
+ unshift(@$art, $_);
+ last;
+ }
+ }
+ $response->push_header($key, $val) if $key;
+
+ # Ensure that there is a Content-Type header
+ $response->header("Content-Type", "text/plain")
+ unless $response->header("Content-Type");
+
+ # Collect the body
+ $response = $self->collect_once($arg, $response, join("", @$art))
+ if @$art;
+
+ # Say goodbye to the server
+ $nntp->quit;
+ $nntp = undef;
+
+ $response;
+}
+
+1;
--- /dev/null
+package LWP::Protocol::nogo;
+# If you want to disable access to a particular scheme, use this
+# class and then call
+# LWP::Protocol::implementor(that_scheme, 'LWP::Protocol::nogo');
+# For then on, attempts to access URLs with that scheme will generate
+# a 500 error.
+
+use strict;
+use vars qw(@ISA);
+require HTTP::Response;
+require HTTP::Status;
+require LWP::Protocol;
+@ISA = qw(LWP::Protocol);
+
+sub request {
+ my($self, $request) = @_;
+ my $scheme = $request->uri->scheme;
+
+ return HTTP::Response->new(
+ &HTTP::Status::RC_INTERNAL_SERVER_ERROR,
+ "Access to \'$scheme\' URIs has been disabled"
+ );
+}
+1;
--- /dev/null
+package LWP::RobotUA;
+
+require LWP::UserAgent;
+@ISA = qw(LWP::UserAgent);
+$VERSION = "6.03";
+
+require WWW::RobotRules;
+require HTTP::Request;
+require HTTP::Response;
+
+use Carp ();
+use HTTP::Status ();
+use HTTP::Date qw(time2str);
+use strict;
+
+
+#
+# Additional attributes in addition to those found in LWP::UserAgent:
+#
+# $self->{'delay'} Required delay between request to the same
+# server in minutes.
+#
+# $self->{'rules'} A WWW::RobotRules object
+#
+
+sub new
+{
+ my $class = shift;
+ my %cnf;
+ if (@_ < 4) {
+ # legacy args
+ @cnf{qw(agent from rules)} = @_;
+ }
+ else {
+ %cnf = @_;
+ }
+
+ Carp::croak('LWP::RobotUA agent required') unless $cnf{agent};
+ Carp::croak('LWP::RobotUA from address required')
+ unless $cnf{from} && $cnf{from} =~ m/\@/;
+
+ my $delay = delete $cnf{delay} || 1;
+ my $use_sleep = delete $cnf{use_sleep};
+ $use_sleep = 1 unless defined($use_sleep);
+ my $rules = delete $cnf{rules};
+
+ my $self = LWP::UserAgent->new(%cnf);
+ $self = bless $self, $class;
+
+ $self->{'delay'} = $delay; # minutes
+ $self->{'use_sleep'} = $use_sleep;
+
+ if ($rules) {
+ $rules->agent($cnf{agent});
+ $self->{'rules'} = $rules;
+ }
+ else {
+ $self->{'rules'} = WWW::RobotRules->new($cnf{agent});
+ }
+
+ $self;
+}
+
+
+sub delay { shift->_elem('delay', @_); }
+sub use_sleep { shift->_elem('use_sleep', @_); }
+
+
+sub agent
+{
+ my $self = shift;
+ my $old = $self->SUPER::agent(@_);
+ if (@_) {
+ # Changing our name means to start fresh
+ $self->{'rules'}->agent($self->{'agent'});
+ }
+ $old;
+}
+
+
+sub rules {
+ my $self = shift;
+ my $old = $self->_elem('rules', @_);
+ $self->{'rules'}->agent($self->{'agent'}) if @_;
+ $old;
+}
+
+
+sub no_visits
+{
+ my($self, $netloc) = @_;
+ $self->{'rules'}->no_visits($netloc) || 0;
+}
+
+*host_count = \&no_visits; # backwards compatibility with LWP-5.02
+
+
+sub host_wait
+{
+ my($self, $netloc) = @_;
+ return undef unless defined $netloc;
+ my $last = $self->{'rules'}->last_visit($netloc);
+ if ($last) {
+ my $wait = int($self->{'delay'} * 60 - (time - $last));
+ $wait = 0 if $wait < 0;
+ return $wait;
+ }
+ return 0;
+}
+
+
+sub simple_request
+{
+ my($self, $request, $arg, $size) = @_;
+
+ # Do we try to access a new server?
+ my $allowed = $self->{'rules'}->allowed($request->uri);
+
+ if ($allowed < 0) {
+ # Host is not visited before, or robots.txt expired; fetch "robots.txt"
+ my $robot_url = $request->uri->clone;
+ $robot_url->path("robots.txt");
+ $robot_url->query(undef);
+
+ # make access to robot.txt legal since this will be a recursive call
+ $self->{'rules'}->parse($robot_url, "");
+
+ my $robot_req = HTTP::Request->new('GET', $robot_url);
+ my $parse_head = $self->parse_head(0);
+ my $robot_res = $self->request($robot_req);
+ $self->parse_head($parse_head);
+ my $fresh_until = $robot_res->fresh_until;
+ my $content = "";
+ if ($robot_res->is_success && $robot_res->content_is_text) {
+ $content = $robot_res->decoded_content;
+ $content = "" unless $content && $content =~ /^\s*Disallow\s*:/mi;
+ }
+ $self->{'rules'}->parse($robot_url, $content, $fresh_until);
+
+ # recalculate allowed...
+ $allowed = $self->{'rules'}->allowed($request->uri);
+ }
+
+ # Check rules
+ unless ($allowed) {
+ my $res = HTTP::Response->new(
+ &HTTP::Status::RC_FORBIDDEN, 'Forbidden by robots.txt');
+ $res->request( $request ); # bind it to that request
+ return $res;
+ }
+
+ my $netloc = eval { local $SIG{__DIE__}; $request->uri->host_port; };
+ my $wait = $self->host_wait($netloc);
+
+ if ($wait) {
+ if ($self->{'use_sleep'}) {
+ sleep($wait)
+ }
+ else {
+ my $res = HTTP::Response->new(
+ &HTTP::Status::RC_SERVICE_UNAVAILABLE, 'Please, slow down');
+ $res->header('Retry-After', time2str(time + $wait));
+ $res->request( $request ); # bind it to that request
+ return $res;
+ }
+ }
+
+ # Perform the request
+ my $res = $self->SUPER::simple_request($request, $arg, $size);
+
+ $self->{'rules'}->visit($netloc);
+
+ $res;
+}
+
+
+sub as_string
+{
+ my $self = shift;
+ my @s;
+ push(@s, "Robot: $self->{'agent'} operated by $self->{'from'} [$self]");
+ push(@s, " Minimum delay: " . int($self->{'delay'}*60) . "s");
+ push(@s, " Will sleep if too early") if $self->{'use_sleep'};
+ push(@s, " Rules = $self->{'rules'}");
+ join("\n", @s, '');
+}
+
+1;
+
+
+__END__
+
+=head1 NAME
+
+LWP::RobotUA - a class for well-behaved Web robots
+
+=head1 SYNOPSIS
+
+ use LWP::RobotUA;
+ my $ua = LWP::RobotUA->new('my-robot/0.1', 'me@foo.com');
+ $ua->delay(10); # be very nice -- max one hit every ten minutes!
+ ...
+
+ # Then just use it just like a normal LWP::UserAgent:
+ my $response = $ua->get('http://whatever.int/...');
+ ...
+
+=head1 DESCRIPTION
+
+This class implements a user agent that is suitable for robot
+applications. Robots should be nice to the servers they visit. They
+should consult the F</robots.txt> file to ensure that they are welcomed
+and they should not make requests too frequently.
+
+But before you consider writing a robot, take a look at
+<URL:http://www.robotstxt.org/>.
+
+When you use a I<LWP::RobotUA> object as your user agent, then you do not
+really have to think about these things yourself; C<robots.txt> files
+are automatically consulted and obeyed, the server isn't queried
+too rapidly, and so on. Just send requests
+as you do when you are using a normal I<LWP::UserAgent>
+object (using C<< $ua->get(...) >>, C<< $ua->head(...) >>,
+C<< $ua->request(...) >>, etc.), and this
+special agent will make sure you are nice.
+
+=head1 METHODS
+
+The LWP::RobotUA is a sub-class of LWP::UserAgent and implements the
+same methods. In addition the following methods are provided:
+
+=over 4
+
+=item $ua = LWP::RobotUA->new( %options )
+
+=item $ua = LWP::RobotUA->new( $agent, $from )
+
+=item $ua = LWP::RobotUA->new( $agent, $from, $rules )
+
+The LWP::UserAgent options C<agent> and C<from> are mandatory. The
+options C<delay>, C<use_sleep> and C<rules> initialize attributes
+private to the RobotUA. If C<rules> are not provided, then
+C<WWW::RobotRules> is instantiated providing an internal database of
+F<robots.txt>.
+
+It is also possible to just pass the value of C<agent>, C<from> and
+optionally C<rules> as plain positional arguments.
+
+=item $ua->delay
+
+=item $ua->delay( $minutes )
+
+Get/set the minimum delay between requests to the same server, in
+I<minutes>. The default is 1 minute. Note that this number doesn't
+have to be an integer; for example, this sets the delay to 10 seconds:
+
+ $ua->delay(10/60);
+
+=item $ua->use_sleep
+
+=item $ua->use_sleep( $boolean )
+
+Get/set a value indicating whether the UA should sleep() if requests
+arrive too fast, defined as $ua->delay minutes not passed since
+last request to the given server. The default is TRUE. If this value is
+FALSE then an internal SERVICE_UNAVAILABLE response will be generated.
+It will have an Retry-After header that indicates when it is OK to
+send another request to this server.
+
+=item $ua->rules
+
+=item $ua->rules( $rules )
+
+Set/get which I<WWW::RobotRules> object to use.
+
+=item $ua->no_visits( $netloc )
+
+Returns the number of documents fetched from this server host. Yeah I
+know, this method should probably have been named num_visits() or
+something like that. :-(
+
+=item $ua->host_wait( $netloc )
+
+Returns the number of I<seconds> (from now) you must wait before you can
+make a new request to this host.
+
+=item $ua->as_string
+
+Returns a string that describes the state of the UA.
+Mainly useful for debugging.
+
+=back
+
+=head1 SEE ALSO
+
+L<LWP::UserAgent>, L<WWW::RobotRules>
+
+=head1 COPYRIGHT
+
+Copyright 1996-2004 Gisle Aas.
+
+This library is free software; you can redistribute it and/or
+modify it under the same terms as Perl itself.
--- /dev/null
+package LWP::Simple;
+
+use strict;
+use vars qw($ua %loop_check $FULL_LWP @EXPORT @EXPORT_OK $VERSION);
+
+require Exporter;
+
+@EXPORT = qw(get head getprint getstore mirror);
+@EXPORT_OK = qw($ua);
+
+# I really hate this. I was a bad idea to do it in the first place.
+# Wonder how to get rid of it??? (It even makes LWP::Simple 7% slower
+# for trivial tests)
+use HTTP::Status;
+push(@EXPORT, @HTTP::Status::EXPORT);
+
+$VERSION = "6.00";
+
+sub import
+{
+ my $pkg = shift;
+ my $callpkg = caller;
+ Exporter::export($pkg, $callpkg, @_);
+}
+
+use LWP::UserAgent ();
+use HTTP::Status ();
+use HTTP::Date ();
+$ua = LWP::UserAgent->new; # we create a global UserAgent object
+$ua->agent("LWP::Simple/$VERSION ");
+$ua->env_proxy;
+
+
+sub get ($)
+{
+ my $response = $ua->get(shift);
+ return $response->decoded_content if $response->is_success;
+ return undef;
+}
+
+
+sub head ($)
+{
+ my($url) = @_;
+ my $request = HTTP::Request->new(HEAD => $url);
+ my $response = $ua->request($request);
+
+ if ($response->is_success) {
+ return $response unless wantarray;
+ return (scalar $response->header('Content-Type'),
+ scalar $response->header('Content-Length'),
+ HTTP::Date::str2time($response->header('Last-Modified')),
+ HTTP::Date::str2time($response->header('Expires')),
+ scalar $response->header('Server'),
+ );
+ }
+ return;
+}
+
+
+sub getprint ($)
+{
+ my($url) = @_;
+ my $request = HTTP::Request->new(GET => $url);
+ local($\) = ""; # ensure standard $OUTPUT_RECORD_SEPARATOR
+ my $callback = sub { print $_[0] };
+ if ($^O eq "MacOS") {
+ $callback = sub { $_[0] =~ s/\015?\012/\n/g; print $_[0] }
+ }
+ my $response = $ua->request($request, $callback);
+ unless ($response->is_success) {
+ print STDERR $response->status_line, " <URL:$url>\n";
+ }
+ $response->code;
+}
+
+
+sub getstore ($$)
+{
+ my($url, $file) = @_;
+ my $request = HTTP::Request->new(GET => $url);
+ my $response = $ua->request($request, $file);
+
+ $response->code;
+}
+
+
+sub mirror ($$)
+{
+ my($url, $file) = @_;
+ my $response = $ua->mirror($url, $file);
+ $response->code;
+}
+
+
+1;
+
+__END__
+
+=head1 NAME
+
+LWP::Simple - simple procedural interface to LWP
+
+=head1 SYNOPSIS
+
+ perl -MLWP::Simple -e 'getprint "http://www.sn.no"'
+
+ use LWP::Simple;
+ $content = get("http://www.sn.no/");
+ die "Couldn't get it!" unless defined $content;
+
+ if (mirror("http://www.sn.no/", "foo") == RC_NOT_MODIFIED) {
+ ...
+ }
+
+ if (is_success(getprint("http://www.sn.no/"))) {
+ ...
+ }
+
+=head1 DESCRIPTION
+
+This module is meant for people who want a simplified view of the
+libwww-perl library. It should also be suitable for one-liners. If
+you need more control or access to the header fields in the requests
+sent and responses received, then you should use the full object-oriented
+interface provided by the C<LWP::UserAgent> module.
+
+The following functions are provided (and exported) by this module:
+
+=over 3
+
+=item get($url)
+
+The get() function will fetch the document identified by the given URL
+and return it. It returns C<undef> if it fails. The $url argument can
+be either a string or a reference to a URI object.
+
+You will not be able to examine the response code or response headers
+(like 'Content-Type') when you are accessing the web using this
+function. If you need that information you should use the full OO
+interface (see L<LWP::UserAgent>).
+
+=item head($url)
+
+Get document headers. Returns the following 5 values if successful:
+($content_type, $document_length, $modified_time, $expires, $server)
+
+Returns an empty list if it fails. In scalar context returns TRUE if
+successful.
+
+=item getprint($url)
+
+Get and print a document identified by a URL. The document is printed
+to the selected default filehandle for output (normally STDOUT) as
+data is received from the network. If the request fails, then the
+status code and message are printed on STDERR. The return value is
+the HTTP response code.
+
+=item getstore($url, $file)
+
+Gets a document identified by a URL and stores it in the file. The
+return value is the HTTP response code.
+
+=item mirror($url, $file)
+
+Get and store a document identified by a URL, using
+I<If-modified-since>, and checking the I<Content-Length>. Returns
+the HTTP response code.
+
+=back
+
+This module also exports the HTTP::Status constants and procedures.
+You can use them when you check the response code from getprint(),
+getstore() or mirror(). The constants are:
+
+ RC_CONTINUE
+ RC_SWITCHING_PROTOCOLS
+ RC_OK
+ RC_CREATED
+ RC_ACCEPTED
+ RC_NON_AUTHORITATIVE_INFORMATION
+ RC_NO_CONTENT
+ RC_RESET_CONTENT
+ RC_PARTIAL_CONTENT
+ RC_MULTIPLE_CHOICES
+ RC_MOVED_PERMANENTLY
+ RC_MOVED_TEMPORARILY
+ RC_SEE_OTHER
+ RC_NOT_MODIFIED
+ RC_USE_PROXY
+ RC_BAD_REQUEST
+ RC_UNAUTHORIZED
+ RC_PAYMENT_REQUIRED
+ RC_FORBIDDEN
+ RC_NOT_FOUND
+ RC_METHOD_NOT_ALLOWED
+ RC_NOT_ACCEPTABLE
+ RC_PROXY_AUTHENTICATION_REQUIRED
+ RC_REQUEST_TIMEOUT
+ RC_CONFLICT
+ RC_GONE
+ RC_LENGTH_REQUIRED
+ RC_PRECONDITION_FAILED
+ RC_REQUEST_ENTITY_TOO_LARGE
+ RC_REQUEST_URI_TOO_LARGE
+ RC_UNSUPPORTED_MEDIA_TYPE
+ RC_INTERNAL_SERVER_ERROR
+ RC_NOT_IMPLEMENTED
+ RC_BAD_GATEWAY
+ RC_SERVICE_UNAVAILABLE
+ RC_GATEWAY_TIMEOUT
+ RC_HTTP_VERSION_NOT_SUPPORTED
+
+The HTTP::Status classification functions are:
+
+=over 3
+
+=item is_success($rc)
+
+True if response code indicated a successful request.
+
+=item is_error($rc)
+
+True if response code indicated that an error occurred.
+
+=back
+
+The module will also export the LWP::UserAgent object as C<$ua> if you
+ask for it explicitly.
+
+The user agent created by this module will identify itself as
+"LWP::Simple/#.##"
+and will initialize its proxy defaults from the environment (by
+calling $ua->env_proxy).
+
+=head1 CAVEAT
+
+Note that if you are using both LWP::Simple and the very popular CGI.pm
+module, you may be importing a C<head> function from each module,
+producing a warning like "Prototype mismatch: sub main::head ($) vs
+none". Get around this problem by just not importing LWP::Simple's
+C<head> function, like so:
+
+ use LWP::Simple qw(!head);
+ use CGI qw(:standard); # then only CGI.pm defines a head()
+
+Then if you do need LWP::Simple's C<head> function, you can just call
+it as C<LWP::Simple::head($url)>.
+
+=head1 SEE ALSO
+
+L<LWP>, L<lwpcook>, L<LWP::UserAgent>, L<HTTP::Status>, L<lwp-request>,
+L<lwp-mirror>
--- /dev/null
+package LWP::UserAgent;
+
+use strict;
+use vars qw(@ISA $VERSION);
+
+require LWP::MemberMixin;
+@ISA = qw(LWP::MemberMixin);
+$VERSION = "6.04";
+
+use HTTP::Request ();
+use HTTP::Response ();
+use HTTP::Date ();
+
+use LWP ();
+use LWP::Protocol ();
+
+use Carp ();
+
+
+sub new
+{
+ # Check for common user mistake
+ Carp::croak("Options to LWP::UserAgent should be key/value pairs, not hash reference")
+ if ref($_[1]) eq 'HASH';
+
+ my($class, %cnf) = @_;
+
+ my $agent = delete $cnf{agent};
+ my $from = delete $cnf{from};
+ my $def_headers = delete $cnf{default_headers};
+ my $timeout = delete $cnf{timeout};
+ $timeout = 3*60 unless defined $timeout;
+ my $local_address = delete $cnf{local_address};
+ my $ssl_opts = delete $cnf{ssl_opts} || {};
+ unless (exists $ssl_opts->{verify_hostname}) {
+ # The processing of HTTPS_CA_* below is for compatiblity with Crypt::SSLeay
+ if (exists $ENV{PERL_LWP_SSL_VERIFY_HOSTNAME}) {
+ $ssl_opts->{verify_hostname} = $ENV{PERL_LWP_SSL_VERIFY_HOSTNAME};
+ }
+ elsif ($ENV{HTTPS_CA_FILE} || $ENV{HTTPS_CA_DIR}) {
+ # Crypt-SSLeay compatiblity (verify peer certificate; but not the hostname)
+ $ssl_opts->{verify_hostname} = 0;
+ $ssl_opts->{SSL_verify_mode} = 1;
+ }
+ else {
+ $ssl_opts->{verify_hostname} = 1;
+ }
+ }
+ unless (exists $ssl_opts->{SSL_ca_file}) {
+ if (my $ca_file = $ENV{PERL_LWP_SSL_CA_FILE} || $ENV{HTTPS_CA_FILE}) {
+ $ssl_opts->{SSL_ca_file} = $ca_file;
+ }
+ }
+ unless (exists $ssl_opts->{SSL_ca_path}) {
+ if (my $ca_path = $ENV{PERL_LWP_SSL_CA_PATH} || $ENV{HTTPS_CA_DIR}) {
+ $ssl_opts->{SSL_ca_path} = $ca_path;
+ }
+ }
+ my $use_eval = delete $cnf{use_eval};
+ $use_eval = 1 unless defined $use_eval;
+ my $parse_head = delete $cnf{parse_head};
+ $parse_head = 1 unless defined $parse_head;
+ my $show_progress = delete $cnf{show_progress};
+ my $max_size = delete $cnf{max_size};
+ my $max_redirect = delete $cnf{max_redirect};
+ $max_redirect = 7 unless defined $max_redirect;
+ my $env_proxy = exists $cnf{env_proxy} ? delete $cnf{env_proxy} : $ENV{PERL_LWP_ENV_PROXY};
+
+ my $cookie_jar = delete $cnf{cookie_jar};
+ my $conn_cache = delete $cnf{conn_cache};
+ my $keep_alive = delete $cnf{keep_alive};
+
+ Carp::croak("Can't mix conn_cache and keep_alive")
+ if $conn_cache && $keep_alive;
+
+ my $protocols_allowed = delete $cnf{protocols_allowed};
+ my $protocols_forbidden = delete $cnf{protocols_forbidden};
+
+ my $requests_redirectable = delete $cnf{requests_redirectable};
+ $requests_redirectable = ['GET', 'HEAD']
+ unless defined $requests_redirectable;
+
+ # Actually ""s are just as good as 0's, but for concision we'll just say:
+ Carp::croak("protocols_allowed has to be an arrayref or 0, not \"$protocols_allowed\"!")
+ if $protocols_allowed and ref($protocols_allowed) ne 'ARRAY';
+ Carp::croak("protocols_forbidden has to be an arrayref or 0, not \"$protocols_forbidden\"!")
+ if $protocols_forbidden and ref($protocols_forbidden) ne 'ARRAY';
+ Carp::croak("requests_redirectable has to be an arrayref or 0, not \"$requests_redirectable\"!")
+ if $requests_redirectable and ref($requests_redirectable) ne 'ARRAY';
+
+
+ if (%cnf && $^W) {
+ Carp::carp("Unrecognized LWP::UserAgent options: @{[sort keys %cnf]}");
+ }
+
+ my $self = bless {
+ def_headers => $def_headers,
+ timeout => $timeout,
+ local_address => $local_address,
+ ssl_opts => $ssl_opts,
+ use_eval => $use_eval,
+ show_progress=> $show_progress,
+ max_size => $max_size,
+ max_redirect => $max_redirect,
+ proxy => {},
+ no_proxy => [],
+ protocols_allowed => $protocols_allowed,
+ protocols_forbidden => $protocols_forbidden,
+ requests_redirectable => $requests_redirectable,
+ }, $class;
+
+ $self->agent(defined($agent) ? $agent : $class->_agent)
+ if defined($agent) || !$def_headers || !$def_headers->header("User-Agent");
+ $self->from($from) if $from;
+ $self->cookie_jar($cookie_jar) if $cookie_jar;
+ $self->parse_head($parse_head);
+ $self->env_proxy if $env_proxy;
+
+ $self->protocols_allowed( $protocols_allowed ) if $protocols_allowed;
+ $self->protocols_forbidden($protocols_forbidden) if $protocols_forbidden;
+
+ if ($keep_alive) {
+ $conn_cache ||= { total_capacity => $keep_alive };
+ }
+ $self->conn_cache($conn_cache) if $conn_cache;
+
+ return $self;
+}
+
+
+sub send_request
+{
+ my($self, $request, $arg, $size) = @_;
+ my($method, $url) = ($request->method, $request->uri);
+ my $scheme = $url->scheme;
+
+ local($SIG{__DIE__}); # protect against user defined die handlers
+
+ $self->progress("begin", $request);
+
+ my $response = $self->run_handlers("request_send", $request);
+
+ unless ($response) {
+ my $protocol;
+
+ {
+ # Honor object-specific restrictions by forcing protocol objects
+ # into class LWP::Protocol::nogo.
+ my $x;
+ if($x = $self->protocols_allowed) {
+ if (grep lc($_) eq $scheme, @$x) {
+ }
+ else {
+ require LWP::Protocol::nogo;
+ $protocol = LWP::Protocol::nogo->new;
+ }
+ }
+ elsif ($x = $self->protocols_forbidden) {
+ if(grep lc($_) eq $scheme, @$x) {
+ require LWP::Protocol::nogo;
+ $protocol = LWP::Protocol::nogo->new;
+ }
+ }
+ # else fall thru and create the protocol object normally
+ }
+
+ # Locate protocol to use
+ my $proxy = $request->{proxy};
+ if ($proxy) {
+ $scheme = $proxy->scheme;
+ }
+
+ unless ($protocol) {
+ $protocol = eval { LWP::Protocol::create($scheme, $self) };
+ if ($@) {
+ $@ =~ s/ at .* line \d+.*//s; # remove file/line number
+ $response = _new_response($request, &HTTP::Status::RC_NOT_IMPLEMENTED, $@);
+ if ($scheme eq "https") {
+ $response->message($response->message . " (LWP::Protocol::https not installed)");
+ $response->content_type("text/plain");
+ $response->content(<<EOT);
+LWP will support https URLs if the LWP::Protocol::https module
+is installed.
+EOT
+ }
+ }
+ }
+
+ if (!$response && $self->{use_eval}) {
+ # we eval, and turn dies into responses below
+ eval {
+ $response = $protocol->request($request, $proxy, $arg, $size, $self->{timeout}) ||
+ die "No response returned by $protocol";
+ };
+ if ($@) {
+ if (UNIVERSAL::isa($@, "HTTP::Response")) {
+ $response = $@;
+ $response->request($request);
+ }
+ else {
+ my $full = $@;
+ (my $status = $@) =~ s/\n.*//s;
+ $status =~ s/ at .* line \d+.*//s; # remove file/line number
+ my $code = ($status =~ s/^(\d\d\d)\s+//) ? $1 : &HTTP::Status::RC_INTERNAL_SERVER_ERROR;
+ $response = _new_response($request, $code, $status, $full);
+ }
+ }
+ }
+ elsif (!$response) {
+ $response = $protocol->request($request, $proxy,
+ $arg, $size, $self->{timeout});
+ # XXX: Should we die unless $response->is_success ???
+ }
+ }
+
+ $response->request($request); # record request for reference
+ $response->header("Client-Date" => HTTP::Date::time2str(time));
+
+ $self->run_handlers("response_done", $response);
+
+ $self->progress("end", $response);
+ return $response;
+}
+
+
+sub prepare_request
+{
+ my($self, $request) = @_;
+ die "Method missing" unless $request->method;
+ my $url = $request->uri;
+ die "URL missing" unless $url;
+ die "URL must be absolute" unless $url->scheme;
+
+ $self->run_handlers("request_preprepare", $request);
+
+ if (my $def_headers = $self->{def_headers}) {
+ for my $h ($def_headers->header_field_names) {
+ $request->init_header($h => [$def_headers->header($h)]);
+ }
+ }
+
+ $self->run_handlers("request_prepare", $request);
+
+ return $request;
+}
+
+
+sub simple_request
+{
+ my($self, $request, $arg, $size) = @_;
+
+ # sanity check the request passed in
+ if (defined $request) {
+ if (ref $request) {
+ Carp::croak("You need a request object, not a " . ref($request) . " object")
+ if ref($request) eq 'ARRAY' or ref($request) eq 'HASH' or
+ !$request->can('method') or !$request->can('uri');
+ }
+ else {
+ Carp::croak("You need a request object, not '$request'");
+ }
+ }
+ else {
+ Carp::croak("No request object passed in");
+ }
+
+ eval {
+ $request = $self->prepare_request($request);
+ };
+ if ($@) {
+ $@ =~ s/ at .* line \d+.*//s; # remove file/line number
+ return _new_response($request, &HTTP::Status::RC_BAD_REQUEST, $@);
+ }
+ return $self->send_request($request, $arg, $size);
+}
+
+
+sub request
+{
+ my($self, $request, $arg, $size, $previous) = @_;
+
+ my $response = $self->simple_request($request, $arg, $size);
+ $response->previous($previous) if $previous;
+
+ if ($response->redirects >= $self->{max_redirect}) {
+ $response->header("Client-Warning" =>
+ "Redirect loop detected (max_redirect = $self->{max_redirect})");
+ return $response;
+ }
+
+ if (my $req = $self->run_handlers("response_redirect", $response)) {
+ return $self->request($req, $arg, $size, $response);
+ }
+
+ my $code = $response->code;
+
+ if ($code == &HTTP::Status::RC_MOVED_PERMANENTLY or
+ $code == &HTTP::Status::RC_FOUND or
+ $code == &HTTP::Status::RC_SEE_OTHER or
+ $code == &HTTP::Status::RC_TEMPORARY_REDIRECT)
+ {
+ my $referral = $request->clone;
+
+ # These headers should never be forwarded
+ $referral->remove_header('Host', 'Cookie');
+
+ if ($referral->header('Referer') &&
+ $request->uri->scheme eq 'https' &&
+ $referral->uri->scheme eq 'http')
+ {
+ # RFC 2616, section 15.1.3.
+ # https -> http redirect, suppressing Referer
+ $referral->remove_header('Referer');
+ }
+
+ if ($code == &HTTP::Status::RC_SEE_OTHER ||
+ $code == &HTTP::Status::RC_FOUND)
+ {
+ my $method = uc($referral->method);
+ unless ($method eq "GET" || $method eq "HEAD") {
+ $referral->method("GET");
+ $referral->content("");
+ $referral->remove_content_headers;
+ }
+ }
+
+ # And then we update the URL based on the Location:-header.
+ my $referral_uri = $response->header('Location');
+ {
+ # Some servers erroneously return a relative URL for redirects,
+ # so make it absolute if it not already is.
+ local $URI::ABS_ALLOW_RELATIVE_SCHEME = 1;
+ my $base = $response->base;
+ $referral_uri = "" unless defined $referral_uri;
+ $referral_uri = $HTTP::URI_CLASS->new($referral_uri, $base)
+ ->abs($base);
+ }
+ $referral->uri($referral_uri);
+
+ return $response unless $self->redirect_ok($referral, $response);
+ return $self->request($referral, $arg, $size, $response);
+
+ }
+ elsif ($code == &HTTP::Status::RC_UNAUTHORIZED ||
+ $code == &HTTP::Status::RC_PROXY_AUTHENTICATION_REQUIRED
+ )
+ {
+ my $proxy = ($code == &HTTP::Status::RC_PROXY_AUTHENTICATION_REQUIRED);
+ my $ch_header = $proxy ? "Proxy-Authenticate" : "WWW-Authenticate";
+ my @challenge = $response->header($ch_header);
+ unless (@challenge) {
+ $response->header("Client-Warning" =>
+ "Missing Authenticate header");
+ return $response;
+ }
+
+ require HTTP::Headers::Util;
+ CHALLENGE: for my $challenge (@challenge) {
+ $challenge =~ tr/,/;/; # "," is used to separate auth-params!!
+ ($challenge) = HTTP::Headers::Util::split_header_words($challenge);
+ my $scheme = shift(@$challenge);
+ shift(@$challenge); # no value
+ $challenge = { @$challenge }; # make rest into a hash
+
+ unless ($scheme =~ /^([a-z]+(?:-[a-z]+)*)$/) {
+ $response->header("Client-Warning" =>
+ "Bad authentication scheme '$scheme'");
+ return $response;
+ }
+ $scheme = $1; # untainted now
+ my $class = "LWP::Authen::\u$scheme";
+ $class =~ s/-/_/g;
+
+ no strict 'refs';
+ unless (%{"$class\::"}) {
+ # try to load it
+ eval "require $class";
+ if ($@) {
+ if ($@ =~ /^Can\'t locate/) {
+ $response->header("Client-Warning" =>
+ "Unsupported authentication scheme '$scheme'");
+ }
+ else {
+ $response->header("Client-Warning" => $@);
+ }
+ next CHALLENGE;
+ }
+ }
+ unless ($class->can("authenticate")) {
+ $response->header("Client-Warning" =>
+ "Unsupported authentication scheme '$scheme'");
+ next CHALLENGE;
+ }
+ return $class->authenticate($self, $proxy, $challenge, $response,
+ $request, $arg, $size);
+ }
+ return $response;
+ }
+ return $response;
+}
+
+
+#
+# Now the shortcuts...
+#
+sub get {
+ require HTTP::Request::Common;
+ my($self, @parameters) = @_;
+ my @suff = $self->_process_colonic_headers(\@parameters,1);
+ return $self->request( HTTP::Request::Common::GET( @parameters ), @suff );
+}
+
+
+sub post {
+ require HTTP::Request::Common;
+ my($self, @parameters) = @_;
+ my @suff = $self->_process_colonic_headers(\@parameters, (ref($parameters[1]) ? 2 : 1));
+ return $self->request( HTTP::Request::Common::POST( @parameters ), @suff );
+}
+
+
+sub head {
+ require HTTP::Request::Common;
+ my($self, @parameters) = @_;
+ my @suff = $self->_process_colonic_headers(\@parameters,1);
+ return $self->request( HTTP::Request::Common::HEAD( @parameters ), @suff );
+}
+
+
+sub put {
+ require HTTP::Request::Common;
+ my($self, @parameters) = @_;
+ my @suff = $self->_process_colonic_headers(\@parameters, (ref($parameters[1]) ? 2 : 1));
+ return $self->request( HTTP::Request::Common::PUT( @parameters ), @suff );
+}
+
+
+sub delete {
+ require HTTP::Request::Common;
+ my($self, @parameters) = @_;
+ my @suff = $self->_process_colonic_headers(\@parameters,1);
+ return $self->request( HTTP::Request::Common::DELETE( @parameters ), @suff );
+}
+
+
+sub _process_colonic_headers {
+ # Process :content_cb / :content_file / :read_size_hint headers.
+ my($self, $args, $start_index) = @_;
+
+ my($arg, $size);
+ for(my $i = $start_index; $i < @$args; $i += 2) {
+ next unless defined $args->[$i];
+
+ #printf "Considering %s => %s\n", $args->[$i], $args->[$i + 1];
+
+ if($args->[$i] eq ':content_cb') {
+ # Some sanity-checking...
+ $arg = $args->[$i + 1];
+ Carp::croak("A :content_cb value can't be undef") unless defined $arg;
+ Carp::croak("A :content_cb value must be a coderef")
+ unless ref $arg and UNIVERSAL::isa($arg, 'CODE');
+
+ }
+ elsif ($args->[$i] eq ':content_file') {
+ $arg = $args->[$i + 1];
+
+ # Some sanity-checking...
+ Carp::croak("A :content_file value can't be undef")
+ unless defined $arg;
+ Carp::croak("A :content_file value can't be a reference")
+ if ref $arg;
+ Carp::croak("A :content_file value can't be \"\"")
+ unless length $arg;
+
+ }
+ elsif ($args->[$i] eq ':read_size_hint') {
+ $size = $args->[$i + 1];
+ # Bother checking it?
+
+ }
+ else {
+ next;
+ }
+ splice @$args, $i, 2;
+ $i -= 2;
+ }
+
+ # And return a suitable suffix-list for request(REQ,...)
+
+ return unless defined $arg;
+ return $arg, $size if defined $size;
+ return $arg;
+}
+
+
+sub is_online {
+ my $self = shift;
+ return 1 if $self->get("http://www.msftncsi.com/ncsi.txt")->content eq "Microsoft NCSI";
+ return 1 if $self->get("http://www.apple.com")->content =~ m,<title>Apple</title>,;
+ return 0;
+}
+
+
+my @ANI = qw(- \ | /);
+
+sub progress {
+ my($self, $status, $m) = @_;
+ return unless $self->{show_progress};
+
+ local($,, $\);
+ if ($status eq "begin") {
+ print STDERR "** ", $m->method, " ", $m->uri, " ==> ";
+ $self->{progress_start} = time;
+ $self->{progress_lastp} = "";
+ $self->{progress_ani} = 0;
+ }
+ elsif ($status eq "end") {
+ delete $self->{progress_lastp};
+ delete $self->{progress_ani};
+ print STDERR $m->status_line;
+ my $t = time - delete $self->{progress_start};
+ print STDERR " (${t}s)" if $t;
+ print STDERR "\n";
+ }
+ elsif ($status eq "tick") {
+ print STDERR "$ANI[$self->{progress_ani}++]\b";
+ $self->{progress_ani} %= @ANI;
+ }
+ else {
+ my $p = sprintf "%3.0f%%", $status * 100;
+ return if $p eq $self->{progress_lastp};
+ print STDERR "$p\b\b\b\b";
+ $self->{progress_lastp} = $p;
+ }
+ STDERR->flush;
+}
+
+
+#
+# This whole allow/forbid thing is based on man 1 at's way of doing things.
+#
+sub is_protocol_supported
+{
+ my($self, $scheme) = @_;
+ if (ref $scheme) {
+ # assume we got a reference to an URI object
+ $scheme = $scheme->scheme;
+ }
+ else {
+ Carp::croak("Illegal scheme '$scheme' passed to is_protocol_supported")
+ if $scheme =~ /\W/;
+ $scheme = lc $scheme;
+ }
+
+ my $x;
+ if(ref($self) and $x = $self->protocols_allowed) {
+ return 0 unless grep lc($_) eq $scheme, @$x;
+ }
+ elsif (ref($self) and $x = $self->protocols_forbidden) {
+ return 0 if grep lc($_) eq $scheme, @$x;
+ }
+
+ local($SIG{__DIE__}); # protect against user defined die handlers
+ $x = LWP::Protocol::implementor($scheme);
+ return 1 if $x and $x ne 'LWP::Protocol::nogo';
+ return 0;
+}
+
+
+sub protocols_allowed { shift->_elem('protocols_allowed' , @_) }
+sub protocols_forbidden { shift->_elem('protocols_forbidden' , @_) }
+sub requests_redirectable { shift->_elem('requests_redirectable', @_) }
+
+
+sub redirect_ok
+{
+ # RFC 2616, section 10.3.2 and 10.3.3 say:
+ # If the 30[12] status code is received in response to a request other
+ # than GET or HEAD, the user agent MUST NOT automatically redirect the
+ # request unless it can be confirmed by the user, since this might
+ # change the conditions under which the request was issued.
+
+ # Note that this routine used to be just:
+ # return 0 if $_[1]->method eq "POST"; return 1;
+
+ my($self, $new_request, $response) = @_;
+ my $method = $response->request->method;
+ return 0 unless grep $_ eq $method,
+ @{ $self->requests_redirectable || [] };
+
+ if ($new_request->uri->scheme eq 'file') {
+ $response->header("Client-Warning" =>
+ "Can't redirect to a file:// URL!");
+ return 0;
+ }
+
+ # Otherwise it's apparently okay...
+ return 1;
+}
+
+
+sub credentials
+{
+ my $self = shift;
+ my $netloc = lc(shift);
+ my $realm = shift || "";
+ my $old = $self->{basic_authentication}{$netloc}{$realm};
+ if (@_) {
+ $self->{basic_authentication}{$netloc}{$realm} = [@_];
+ }
+ return unless $old;
+ return @$old if wantarray;
+ return join(":", @$old);
+}
+
+
+sub get_basic_credentials
+{
+ my($self, $realm, $uri, $proxy) = @_;
+ return if $proxy;
+ return $self->credentials($uri->host_port, $realm);
+}
+
+
+sub timeout { shift->_elem('timeout', @_); }
+sub local_address{ shift->_elem('local_address',@_); }
+sub max_size { shift->_elem('max_size', @_); }
+sub max_redirect { shift->_elem('max_redirect', @_); }
+sub show_progress{ shift->_elem('show_progress', @_); }
+
+sub ssl_opts {
+ my $self = shift;
+ if (@_ == 1) {
+ my $k = shift;
+ return $self->{ssl_opts}{$k};
+ }
+ if (@_) {
+ my $old;
+ while (@_) {
+ my($k, $v) = splice(@_, 0, 2);
+ $old = $self->{ssl_opts}{$k} unless @_;
+ if (defined $v) {
+ $self->{ssl_opts}{$k} = $v;
+ }
+ else {
+ delete $self->{ssl_opts}{$k};
+ }
+ }
+ %{$self->{ssl_opts}} = (%{$self->{ssl_opts}}, @_);
+ return $old;
+ }
+
+ return keys %{$self->{ssl_opts}};
+}
+
+sub parse_head {
+ my $self = shift;
+ if (@_) {
+ my $flag = shift;
+ my $parser;
+ my $old = $self->set_my_handler("response_header", $flag ? sub {
+ my($response, $ua) = @_;
+ require HTML::HeadParser;
+ $parser = HTML::HeadParser->new;
+ $parser->xml_mode(1) if $response->content_is_xhtml;
+ $parser->utf8_mode(1) if $] >= 5.008 && $HTML::Parser::VERSION >= 3.40;
+
+ push(@{$response->{handlers}{response_data}}, {
+ callback => sub {
+ return unless $parser;
+ unless ($parser->parse($_[3])) {
+ my $h = $parser->header;
+ my $r = $_[0];
+ for my $f ($h->header_field_names) {
+ $r->init_header($f, [$h->header($f)]);
+ }
+ undef($parser);
+ }
+ },
+ });
+
+ } : undef,
+ m_media_type => "html",
+ );
+ return !!$old;
+ }
+ else {
+ return !!$self->get_my_handler("response_header");
+ }
+}
+
+sub cookie_jar {
+ my $self = shift;
+ my $old = $self->{cookie_jar};
+ if (@_) {
+ my $jar = shift;
+ if (ref($jar) eq "HASH") {
+ require HTTP::Cookies;
+ $jar = HTTP::Cookies->new(%$jar);
+ }
+ $self->{cookie_jar} = $jar;
+ $self->set_my_handler("request_prepare",
+ $jar ? sub { $jar->add_cookie_header($_[0]); } : undef,
+ );
+ $self->set_my_handler("response_done",
+ $jar ? sub { $jar->extract_cookies($_[0]); } : undef,
+ );
+ }
+ $old;
+}
+
+sub default_headers {
+ my $self = shift;
+ my $old = $self->{def_headers} ||= HTTP::Headers->new;
+ if (@_) {
+ Carp::croak("default_headers not set to HTTP::Headers compatible object")
+ unless @_ == 1 && $_[0]->can("header_field_names");
+ $self->{def_headers} = shift;
+ }
+ return $old;
+}
+
+sub default_header {
+ my $self = shift;
+ return $self->default_headers->header(@_);
+}
+
+sub _agent { "libwww-perl/$LWP::VERSION" }
+
+sub agent {
+ my $self = shift;
+ if (@_) {
+ my $agent = shift;
+ if ($agent) {
+ $agent .= $self->_agent if $agent =~ /\s+$/;
+ }
+ else {
+ undef($agent)
+ }
+ return $self->default_header("User-Agent", $agent);
+ }
+ return $self->default_header("User-Agent");
+}
+
+sub from { # legacy
+ my $self = shift;
+ return $self->default_header("From", @_);
+}
+
+
+sub conn_cache {
+ my $self = shift;
+ my $old = $self->{conn_cache};
+ if (@_) {
+ my $cache = shift;
+ if (ref($cache) eq "HASH") {
+ require LWP::ConnCache;
+ $cache = LWP::ConnCache->new(%$cache);
+ }
+ $self->{conn_cache} = $cache;
+ }
+ $old;
+}
+
+
+sub add_handler {
+ my($self, $phase, $cb, %spec) = @_;
+ $spec{line} ||= join(":", (caller)[1,2]);
+ my $conf = $self->{handlers}{$phase} ||= do {
+ require HTTP::Config;
+ HTTP::Config->new;
+ };
+ $conf->add(%spec, callback => $cb);
+}
+
+sub set_my_handler {
+ my($self, $phase, $cb, %spec) = @_;
+ $spec{owner} = (caller(1))[3] unless exists $spec{owner};
+ $self->remove_handler($phase, %spec);
+ $spec{line} ||= join(":", (caller)[1,2]);
+ $self->add_handler($phase, $cb, %spec) if $cb;
+}
+
+sub get_my_handler {
+ my $self = shift;
+ my $phase = shift;
+ my $init = pop if @_ % 2;
+ my %spec = @_;
+ my $conf = $self->{handlers}{$phase};
+ unless ($conf) {
+ return unless $init;
+ require HTTP::Config;
+ $conf = $self->{handlers}{$phase} = HTTP::Config->new;
+ }
+ $spec{owner} = (caller(1))[3] unless exists $spec{owner};
+ my @h = $conf->find(%spec);
+ if (!@h && $init) {
+ if (ref($init) eq "CODE") {
+ $init->(\%spec);
+ }
+ elsif (ref($init) eq "HASH") {
+ while (my($k, $v) = each %$init) {
+ $spec{$k} = $v;
+ }
+ }
+ $spec{callback} ||= sub {};
+ $spec{line} ||= join(":", (caller)[1,2]);
+ $conf->add(\%spec);
+ return \%spec;
+ }
+ return wantarray ? @h : $h[0];
+}
+
+sub remove_handler {
+ my($self, $phase, %spec) = @_;
+ if ($phase) {
+ my $conf = $self->{handlers}{$phase} || return;
+ my @h = $conf->remove(%spec);
+ delete $self->{handlers}{$phase} if $conf->empty;
+ return @h;
+ }
+
+ return unless $self->{handlers};
+ return map $self->remove_handler($_), sort keys %{$self->{handlers}};
+}
+
+sub handlers {
+ my($self, $phase, $o) = @_;
+ my @h;
+ if ($o->{handlers} && $o->{handlers}{$phase}) {
+ push(@h, @{$o->{handlers}{$phase}});
+ }
+ if (my $conf = $self->{handlers}{$phase}) {
+ push(@h, $conf->matching($o));
+ }
+ return @h;
+}
+
+sub run_handlers {
+ my($self, $phase, $o) = @_;
+ if (defined(wantarray)) {
+ for my $h ($self->handlers($phase, $o)) {
+ my $ret = $h->{callback}->($o, $self, $h);
+ return $ret if $ret;
+ }
+ return undef;
+ }
+
+ for my $h ($self->handlers($phase, $o)) {
+ $h->{callback}->($o, $self, $h);
+ }
+}
+
+
+# depreciated
+sub use_eval { shift->_elem('use_eval', @_); }
+sub use_alarm
+{
+ Carp::carp("LWP::UserAgent->use_alarm(BOOL) is a no-op")
+ if @_ > 1 && $^W;
+ "";
+}
+
+
+sub clone
+{
+ my $self = shift;
+ my $copy = bless { %$self }, ref $self; # copy most fields
+
+ delete $copy->{handlers};
+ delete $copy->{conn_cache};
+
+ # copy any plain arrays and hashes; known not to need recursive copy
+ for my $k (qw(proxy no_proxy requests_redirectable ssl_opts)) {
+ next unless $copy->{$k};
+ if (ref($copy->{$k}) eq "ARRAY") {
+ $copy->{$k} = [ @{$copy->{$k}} ];
+ }
+ elsif (ref($copy->{$k}) eq "HASH") {
+ $copy->{$k} = { %{$copy->{$k}} };
+ }
+ }
+
+ if ($self->{def_headers}) {
+ $copy->{def_headers} = $self->{def_headers}->clone;
+ }
+
+ # re-enable standard handlers
+ $copy->parse_head($self->parse_head);
+
+ # no easy way to clone the cookie jar; so let's just remove it for now
+ $copy->cookie_jar(undef);
+
+ $copy;
+}
+
+
+sub mirror
+{
+ my($self, $url, $file) = @_;
+
+ my $request = HTTP::Request->new('GET', $url);
+
+ # If the file exists, add a cache-related header
+ if ( -e $file ) {
+ my ($mtime) = ( stat($file) )[9];
+ if ($mtime) {
+ $request->header( 'If-Modified-Since' => HTTP::Date::time2str($mtime) );
+ }
+ }
+ my $tmpfile = "$file-$$";
+
+ my $response = $self->request($request, $tmpfile);
+ if ( $response->header('X-Died') ) {
+ die $response->header('X-Died');
+ }
+
+ # Only fetching a fresh copy of the would be considered success.
+ # If the file was not modified, "304" would returned, which
+ # is considered by HTTP::Status to be a "redirect", /not/ "success"
+ if ( $response->is_success ) {
+ my @stat = stat($tmpfile) or die "Could not stat tmpfile '$tmpfile': $!";
+ my $file_length = $stat[7];
+ my ($content_length) = $response->header('Content-length');
+
+ if ( defined $content_length and $file_length < $content_length ) {
+ unlink($tmpfile);
+ die "Transfer truncated: " . "only $file_length out of $content_length bytes received\n";
+ }
+ elsif ( defined $content_length and $file_length > $content_length ) {
+ unlink($tmpfile);
+ die "Content-length mismatch: " . "expected $content_length bytes, got $file_length\n";
+ }
+ # The file was the expected length.
+ else {
+ # Replace the stale file with a fresh copy
+ if ( -e $file ) {
+ # Some dosish systems fail to rename if the target exists
+ chmod 0777, $file;
+ unlink $file;
+ }
+ rename( $tmpfile, $file )
+ or die "Cannot rename '$tmpfile' to '$file': $!\n";
+
+ # make sure the file has the same last modification time
+ if ( my $lm = $response->last_modified ) {
+ utime $lm, $lm, $file;
+ }
+ }
+ }
+ # The local copy is fresh enough, so just delete the temp file
+ else {
+ unlink($tmpfile);
+ }
+ return $response;
+}
+
+
+sub _need_proxy {
+ my($req, $ua) = @_;
+ return if exists $req->{proxy};
+ my $proxy = $ua->{proxy}{$req->uri->scheme} || return;
+ if ($ua->{no_proxy}) {
+ if (my $host = eval { $req->uri->host }) {
+ for my $domain (@{$ua->{no_proxy}}) {
+ if ($host =~ /\Q$domain\E$/) {
+ return;
+ }
+ }
+ }
+ }
+ $req->{proxy} = $HTTP::URI_CLASS->new($proxy);
+}
+
+
+sub proxy
+{
+ my $self = shift;
+ my $key = shift;
+ return map $self->proxy($_, @_), @$key if ref $key;
+
+ Carp::croak("'$key' is not a valid URI scheme") unless $key =~ /^$URI::scheme_re\z/;
+ my $old = $self->{'proxy'}{$key};
+ if (@_) {
+ my $url = shift;
+ if (defined($url) && length($url)) {
+ Carp::croak("Proxy must be specified as absolute URI; '$url' is not") unless $url =~ /^$URI::scheme_re:/;
+ Carp::croak("Bad http proxy specification '$url'") if $url =~ /^https?:/ && $url !~ m,^https?://\w,;
+ }
+ $self->{proxy}{$key} = $url;
+ $self->set_my_handler("request_preprepare", \&_need_proxy)
+ }
+ return $old;
+}
+
+
+sub env_proxy {
+ my ($self) = @_;
+ require Encode;
+ require Encode::Locale;
+ my($k,$v);
+ while(($k, $v) = each %ENV) {
+ if ($ENV{REQUEST_METHOD}) {
+ # Need to be careful when called in the CGI environment, as
+ # the HTTP_PROXY variable is under control of that other guy.
+ next if $k =~ /^HTTP_/;
+ $k = "HTTP_PROXY" if $k eq "CGI_HTTP_PROXY";
+ }
+ $k = lc($k);
+ next unless $k =~ /^(.*)_proxy$/;
+ $k = $1;
+ if ($k eq 'no') {
+ $self->no_proxy(split(/\s*,\s*/, $v));
+ }
+ else {
+ # Ignore random _proxy variables, allow only valid schemes
+ next unless $k =~ /^$URI::scheme_re\z/;
+ # Ignore xxx_proxy variables if xxx isn't a supported protocol
+ next unless LWP::Protocol::implementor($k);
+ $self->proxy($k, Encode::decode(locale => $v));
+ }
+ }
+}
+
+
+sub no_proxy {
+ my($self, @no) = @_;
+ if (@no) {
+ push(@{ $self->{'no_proxy'} }, @no);
+ }
+ else {
+ $self->{'no_proxy'} = [];
+ }
+}
+
+
+sub _new_response {
+ my($request, $code, $message, $content) = @_;
+ my $response = HTTP::Response->new($code, $message);
+ $response->request($request);
+ $response->header("Client-Date" => HTTP::Date::time2str(time));
+ $response->header("Client-Warning" => "Internal response");
+ $response->header("Content-Type" => "text/plain");
+ $response->content($content || "$code $message\n");
+ return $response;
+}
+
+
+1;
+
+__END__
+
+=head1 NAME
+
+LWP::UserAgent - Web user agent class
+
+=head1 SYNOPSIS
+
+ require LWP::UserAgent;
+
+ my $ua = LWP::UserAgent->new;
+ $ua->timeout(10);
+ $ua->env_proxy;
+
+ my $response = $ua->get('http://search.cpan.org/');
+
+ if ($response->is_success) {
+ print $response->decoded_content; # or whatever
+ }
+ else {
+ die $response->status_line;
+ }
+
+=head1 DESCRIPTION
+
+The C<LWP::UserAgent> is a class implementing a web user agent.
+C<LWP::UserAgent> objects can be used to dispatch web requests.
+
+In normal use the application creates an C<LWP::UserAgent> object, and
+then configures it with values for timeouts, proxies, name, etc. It
+then creates an instance of C<HTTP::Request> for the request that
+needs to be performed. This request is then passed to one of the
+request method the UserAgent, which dispatches it using the relevant
+protocol, and returns a C<HTTP::Response> object. There are
+convenience methods for sending the most common request types: get(),
+head(), post(), put() and delete(). When using these methods then the
+creation of the request object is hidden as shown in the synopsis above.
+
+The basic approach of the library is to use HTTP style communication
+for all protocol schemes. This means that you will construct
+C<HTTP::Request> objects and receive C<HTTP::Response> objects even
+for non-HTTP resources like I<gopher> and I<ftp>. In order to achieve
+even more similarity to HTTP style communications, gopher menus and
+file directories are converted to HTML documents.
+
+=head1 CONSTRUCTOR METHODS
+
+The following constructor methods are available:
+
+=over 4
+
+=item $ua = LWP::UserAgent->new( %options )
+
+This method constructs a new C<LWP::UserAgent> object and returns it.
+Key/value pair arguments may be provided to set up the initial state.
+The following options correspond to attribute methods described below:
+
+ KEY DEFAULT
+ ----------- --------------------
+ agent "libwww-perl/#.###"
+ from undef
+ conn_cache undef
+ cookie_jar undef
+ default_headers HTTP::Headers->new
+ local_address undef
+ ssl_opts { verify_hostname => 1 }
+ max_size undef
+ max_redirect 7
+ parse_head 1
+ protocols_allowed undef
+ protocols_forbidden undef
+ requests_redirectable ['GET', 'HEAD']
+ timeout 180
+
+The following additional options are also accepted: If the C<env_proxy> option
+is passed in with a TRUE value, then proxy settings are read from environment
+variables (see env_proxy() method below). If C<env_proxy> isn't provided the
+C<PERL_LWP_ENV_PROXY> environment variable controls if env_proxy() is called
+during initalization. If the C<keep_alive> option is passed in, then a
+C<LWP::ConnCache> is set up (see conn_cache() method below). The C<keep_alive>
+value is passed on as the C<total_capacity> for the connection cache.
+
+=item $ua->clone
+
+Returns a copy of the LWP::UserAgent object.
+
+=back
+
+=head1 ATTRIBUTES
+
+The settings of the configuration attributes modify the behaviour of the
+C<LWP::UserAgent> when it dispatches requests. Most of these can also
+be initialized by options passed to the constructor method.
+
+The following attribute methods are provided. The attribute value is
+left unchanged if no argument is given. The return value from each
+method is the old attribute value.
+
+=over
+
+=item $ua->agent
+
+=item $ua->agent( $product_id )
+
+Get/set the product token that is used to identify the user agent on
+the network. The agent value is sent as the "User-Agent" header in
+the requests. The default is the string returned by the _agent()
+method (see below).
+
+If the $product_id ends with space then the _agent() string is
+appended to it.
+
+The user agent string should be one or more simple product identifiers
+with an optional version number separated by the "/" character.
+Examples are:
+
+ $ua->agent('Checkbot/0.4 ' . $ua->_agent);
+ $ua->agent('Checkbot/0.4 '); # same as above
+ $ua->agent('Mozilla/5.0');
+ $ua->agent(""); # don't identify
+
+=item $ua->_agent
+
+Returns the default agent identifier. This is a string of the form
+"libwww-perl/#.###", where "#.###" is substituted with the version number
+of this library.
+
+=item $ua->from
+
+=item $ua->from( $email_address )
+
+Get/set the e-mail address for the human user who controls
+the requesting user agent. The address should be machine-usable, as
+defined in RFC 822. The C<from> value is send as the "From" header in
+the requests. Example:
+
+ $ua->from('gaas@cpan.org');
+
+The default is to not send a "From" header. See the default_headers()
+method for the more general interface that allow any header to be defaulted.
+
+=item $ua->cookie_jar
+
+=item $ua->cookie_jar( $cookie_jar_obj )
+
+Get/set the cookie jar object to use. The only requirement is that
+the cookie jar object must implement the extract_cookies($request) and
+add_cookie_header($response) methods. These methods will then be
+invoked by the user agent as requests are sent and responses are
+received. Normally this will be a C<HTTP::Cookies> object or some
+subclass.
+
+The default is to have no cookie_jar, i.e. never automatically add
+"Cookie" headers to the requests.
+
+Shortcut: If a reference to a plain hash is passed in as the
+$cookie_jar_object, then it is replaced with an instance of
+C<HTTP::Cookies> that is initialized based on the hash. This form also
+automatically loads the C<HTTP::Cookies> module. It means that:
+
+ $ua->cookie_jar({ file => "$ENV{HOME}/.cookies.txt" });
+
+is really just a shortcut for:
+
+ require HTTP::Cookies;
+ $ua->cookie_jar(HTTP::Cookies->new(file => "$ENV{HOME}/.cookies.txt"));
+
+=item $ua->default_headers
+
+=item $ua->default_headers( $headers_obj )
+
+Get/set the headers object that will provide default header values for
+any requests sent. By default this will be an empty C<HTTP::Headers>
+object.
+
+=item $ua->default_header( $field )
+
+=item $ua->default_header( $field => $value )
+
+This is just a short-cut for $ua->default_headers->header( $field =>
+$value ). Example:
+
+ $ua->default_header('Accept-Encoding' => scalar HTTP::Message::decodable());
+ $ua->default_header('Accept-Language' => "no, en");
+
+=item $ua->conn_cache
+
+=item $ua->conn_cache( $cache_obj )
+
+Get/set the C<LWP::ConnCache> object to use. See L<LWP::ConnCache>
+for details.
+
+=item $ua->credentials( $netloc, $realm )
+
+=item $ua->credentials( $netloc, $realm, $uname, $pass )
+
+Get/set the user name and password to be used for a realm.
+
+The $netloc is a string of the form "<host>:<port>". The username and
+password will only be passed to this server. Example:
+
+ $ua->credentials("www.example.com:80", "Some Realm", "foo", "secret");
+
+=item $ua->local_address
+
+=item $ua->local_address( $address )
+
+Get/set the local interface to bind to for network connections. The interface
+can be specified as a hostname or an IP address. This value is passed as the
+C<LocalAddr> argument to L<IO::Socket::INET>.
+
+=item $ua->max_size
+
+=item $ua->max_size( $bytes )
+
+Get/set the size limit for response content. The default is C<undef>,
+which means that there is no limit. If the returned response content
+is only partial, because the size limit was exceeded, then a
+"Client-Aborted" header will be added to the response. The content
+might end up longer than C<max_size> as we abort once appending a
+chunk of data makes the length exceed the limit. The "Content-Length"
+header, if present, will indicate the length of the full content and
+will normally not be the same as C<< length($res->content) >>.
+
+=item $ua->max_redirect
+
+=item $ua->max_redirect( $n )
+
+This reads or sets the object's limit of how many times it will obey
+redirection responses in a given request cycle.
+
+By default, the value is 7. This means that if you call request()
+method and the response is a redirect elsewhere which is in turn a
+redirect, and so on seven times, then LWP gives up after that seventh
+request.
+
+=item $ua->parse_head
+
+=item $ua->parse_head( $boolean )
+
+Get/set a value indicating whether we should initialize response
+headers from the E<lt>head> section of HTML documents. The default is
+TRUE. Do not turn this off, unless you know what you are doing.
+
+=item $ua->protocols_allowed
+
+=item $ua->protocols_allowed( \@protocols )
+
+This reads (or sets) this user agent's list of protocols that the
+request methods will exclusively allow. The protocol names are case
+insensitive.
+
+For example: C<$ua-E<gt>protocols_allowed( [ 'http', 'https'] );>
+means that this user agent will I<allow only> those protocols,
+and attempts to use this user agent to access URLs with any other
+schemes (like "ftp://...") will result in a 500 error.
+
+To delete the list, call: C<$ua-E<gt>protocols_allowed(undef)>
+
+By default, an object has neither a C<protocols_allowed> list, nor a
+C<protocols_forbidden> list.
+
+Note that having a C<protocols_allowed> list causes any
+C<protocols_forbidden> list to be ignored.
+
+=item $ua->protocols_forbidden
+
+=item $ua->protocols_forbidden( \@protocols )
+
+This reads (or sets) this user agent's list of protocols that the
+request method will I<not> allow. The protocol names are case
+insensitive.
+
+For example: C<$ua-E<gt>protocols_forbidden( [ 'file', 'mailto'] );>
+means that this user agent will I<not> allow those protocols, and
+attempts to use this user agent to access URLs with those schemes
+will result in a 500 error.
+
+To delete the list, call: C<$ua-E<gt>protocols_forbidden(undef)>
+
+=item $ua->requests_redirectable
+
+=item $ua->requests_redirectable( \@requests )
+
+This reads or sets the object's list of request names that
+C<$ua-E<gt>redirect_ok(...)> will allow redirection for. By
+default, this is C<['GET', 'HEAD']>, as per RFC 2616. To
+change to include 'POST', consider:
+
+ push @{ $ua->requests_redirectable }, 'POST';
+
+=item $ua->show_progress
+
+=item $ua->show_progress( $boolean )
+
+Get/set a value indicating whether a progress bar should be displayed
+on on the terminal as requests are processed. The default is FALSE.
+
+=item $ua->timeout
+
+=item $ua->timeout( $secs )
+
+Get/set the timeout value in seconds. The default timeout() value is
+180 seconds, i.e. 3 minutes.
+
+The requests is aborted if no activity on the connection to the server
+is observed for C<timeout> seconds. This means that the time it takes
+for the complete transaction and the request() method to actually
+return might be longer.
+
+=item $ua->ssl_opts
+
+=item $ua->ssl_opts( $key )
+
+=item $ua->ssl_opts( $key => $value )
+
+Get/set the options for SSL connections. Without argument return the list
+of options keys currently set. With a single argument return the current
+value for the given option. With 2 arguments set the option value and return
+the old. Setting an option to the value C<undef> removes this option.
+
+The options that LWP relates to are:
+
+=over
+
+=item C<verify_hostname> => $bool
+
+When TRUE LWP will for secure protocol schemes ensure it connects to servers
+that have a valid certificate matching the expected hostname. If FALSE no
+checks are made and you can't be sure that you communicate with the expected peer.
+The no checks behaviour was the default for libwww-perl-5.837 and earlier releases.
+
+This option is initialized from the L<PERL_LWP_SSL_VERIFY_HOSTNAME> environment
+variable. If this environment variable isn't set; then C<verify_hostname>
+defaults to 1.
+
+=item C<SSL_ca_file> => $path
+
+The path to a file containing Certificate Authority certificates.
+A default setting for this option is provided by checking the environment
+variables C<PERL_LWP_SSL_CA_FILE> and C<HTTPS_CA_FILE> in order.
+
+=item C<SSL_ca_path> => $path
+
+The path to a directory containing files containing Certificate Authority
+certificates.
+A default setting for this option is provided by checking the environment
+variables C<PERL_LWP_SSL_CA_PATH> and C<HTTPS_CA_DIR> in order.
+
+=back
+
+Other options can be set and are processed directly by the SSL Socket implementation
+in use. See L<IO::Socket::SSL> or L<Net::SSL> for details.
+
+The libwww-perl core no longer bundles protocol plugins for SSL. You will need
+to install L<LWP::Protocol::https> separately to enable support for processing
+https-URLs.
+
+=back
+
+=head2 Proxy attributes
+
+The following methods set up when requests should be passed via a
+proxy server.
+
+=over
+
+=item $ua->proxy(\@schemes, $proxy_url)
+
+=item $ua->proxy($scheme, $proxy_url)
+
+Set/retrieve proxy URL for a scheme:
+
+ $ua->proxy(['http', 'ftp'], 'http://proxy.sn.no:8001/');
+ $ua->proxy('gopher', 'http://proxy.sn.no:8001/');
+
+The first form specifies that the URL is to be used for proxying of
+access methods listed in the list in the first method argument,
+i.e. 'http' and 'ftp'.
+
+The second form shows a shorthand form for specifying
+proxy URL for a single access scheme.
+
+=item $ua->no_proxy( $domain, ... )
+
+Do not proxy requests to the given domains. Calling no_proxy without
+any domains clears the list of domains. Eg:
+
+ $ua->no_proxy('localhost', 'example.com');
+
+=item $ua->env_proxy
+
+Load proxy settings from *_proxy environment variables. You might
+specify proxies like this (sh-syntax):
+
+ gopher_proxy=http://proxy.my.place/
+ wais_proxy=http://proxy.my.place/
+ no_proxy="localhost,example.com"
+ export gopher_proxy wais_proxy no_proxy
+
+csh or tcsh users should use the C<setenv> command to define these
+environment variables.
+
+On systems with case insensitive environment variables there exists a
+name clash between the CGI environment variables and the C<HTTP_PROXY>
+environment variable normally picked up by env_proxy(). Because of
+this C<HTTP_PROXY> is not honored for CGI scripts. The
+C<CGI_HTTP_PROXY> environment variable can be used instead.
+
+=back
+
+=head2 Handlers
+
+Handlers are code that injected at various phases during the
+processing of requests. The following methods are provided to manage
+the active handlers:
+
+=over
+
+=item $ua->add_handler( $phase => \&cb, %matchspec )
+
+Add handler to be invoked in the given processing phase. For how to
+specify %matchspec see L<HTTP::Config/"Matching">.
+
+The possible values $phase and the corresponding callback signatures are:
+
+=over
+
+=item request_preprepare => sub { my($request, $ua, $h) = @_; ... }
+
+The handler is called before the C<request_prepare> and other standard
+initialization of of the request. This can be used to set up headers
+and attributes that the C<request_prepare> handler depends on. Proxy
+initialization should take place here; but in general don't register
+handlers for this phase.
+
+=item request_prepare => sub { my($request, $ua, $h) = @_; ... }
+
+The handler is called before the request is sent and can modify the
+request any way it see fit. This can for instance be used to add
+certain headers to specific requests.
+
+The method can assign a new request object to $_[0] to replace the
+request that is sent fully.
+
+The return value from the callback is ignored. If an exception is
+raised it will abort the request and make the request method return a
+"400 Bad request" response.
+
+=item request_send => sub { my($request, $ua, $h) = @_; ... }
+
+This handler gets a chance of handling requests before they're sent to the
+protocol handlers. It should return an HTTP::Response object if it
+wishes to terminate the processing; otherwise it should return nothing.
+
+The C<response_header> and C<response_data> handlers will not be
+invoked for this response, but the C<response_done> will be.
+
+=item response_header => sub { my($response, $ua, $h) = @_; ... }
+
+This handler is called right after the response headers have been
+received, but before any content data. The handler might set up
+handlers for data and might croak to abort the request.
+
+The handler might set the $response->{default_add_content} value to
+control if any received data should be added to the response object
+directly. This will initially be false if the $ua->request() method
+was called with a $content_file or $content_cb argument; otherwise true.
+
+=item response_data => sub { my($response, $ua, $h, $data) = @_; ... }
+
+This handler is called for each chunk of data received for the
+response. The handler might croak to abort the request.
+
+This handler needs to return a TRUE value to be called again for
+subsequent chunks for the same request.
+
+=item response_done => sub { my($response, $ua, $h) = @_; ... }
+
+The handler is called after the response has been fully received, but
+before any redirect handling is attempted. The handler can be used to
+extract information or modify the response.
+
+=item response_redirect => sub { my($response, $ua, $h) = @_; ... }
+
+The handler is called in $ua->request after C<response_done>. If the
+handler returns an HTTP::Request object we'll start over with processing
+this request instead.
+
+=back
+
+=item $ua->remove_handler( undef, %matchspec )
+
+=item $ua->remove_handler( $phase, %matchspec )
+
+Remove handlers that match the given %matchspec. If $phase is not
+provided remove handlers from all phases.
+
+Be careful as calling this function with %matchspec that is not not
+specific enough can remove handlers not owned by you. It's probably
+better to use the set_my_handler() method instead.
+
+The removed handlers are returned.
+
+=item $ua->set_my_handler( $phase, $cb, %matchspec )
+
+Set handlers private to the executing subroutine. Works by defaulting
+an C<owner> field to the %matchspec that holds the name of the called
+subroutine. You might pass an explicit C<owner> to override this.
+
+If $cb is passed as C<undef>, remove the handler.
+
+=item $ua->get_my_handler( $phase, %matchspec )
+
+=item $ua->get_my_handler( $phase, %matchspec, $init )
+
+Will retrieve the matching handler as hash ref.
+
+If C<$init> is passed passed as a TRUE value, create and add the
+handler if it's not found. If $init is a subroutine reference, then
+it's called with the created handler hash as argument. This sub might
+populate the hash with extra fields; especially the callback. If
+$init is a hash reference, merge the hashes.
+
+=item $ua->handlers( $phase, $request )
+
+=item $ua->handlers( $phase, $response )
+
+Returns the handlers that apply to the given request or response at
+the given processing phase.
+
+=back
+
+=head1 REQUEST METHODS
+
+The methods described in this section are used to dispatch requests
+via the user agent. The following request methods are provided:
+
+=over
+
+=item $ua->get( $url )
+
+=item $ua->get( $url , $field_name => $value, ... )
+
+This method will dispatch a C<GET> request on the given $url. Further
+arguments can be given to initialize the headers of the request. These
+are given as separate name/value pairs. The return value is a
+response object. See L<HTTP::Response> for a description of the
+interface it provides.
+
+There will still be a response object returned when LWP can't connect to the
+server specified in the URL or when other failures in protocol handlers occur.
+These internal responses use the standard HTTP status codes, so the responses
+can't be differentiated by testing the response status code alone. Error
+responses that LWP generates internally will have the "Client-Warning" header
+set to the value "Internal response". If you need to differentiate these
+internal responses from responses that a remote server actually generates, you
+need to test this header value.
+
+Fields names that start with ":" are special. These will not
+initialize headers of the request but will determine how the response
+content is treated. The following special field names are recognized:
+
+ :content_file => $filename
+ :content_cb => \&callback
+ :read_size_hint => $bytes
+
+If a $filename is provided with the C<:content_file> option, then the
+response content will be saved here instead of in the response
+object. If a callback is provided with the C<:content_cb> option then
+this function will be called for each chunk of the response content as
+it is received from the server. If neither of these options are
+given, then the response content will accumulate in the response
+object itself. This might not be suitable for very large response
+bodies. Only one of C<:content_file> or C<:content_cb> can be
+specified. The content of unsuccessful responses will always
+accumulate in the response object itself, regardless of the
+C<:content_file> or C<:content_cb> options passed in.
+
+The C<:read_size_hint> option is passed to the protocol module which
+will try to read data from the server in chunks of this size. A
+smaller value for the C<:read_size_hint> will result in a higher
+number of callback invocations.
+
+The callback function is called with 3 arguments: a chunk of data, a
+reference to the response object, and a reference to the protocol
+object. The callback can abort the request by invoking die(). The
+exception message will show up as the "X-Died" header field in the
+response returned by the get() function.
+
+=item $ua->head( $url )
+
+=item $ua->head( $url , $field_name => $value, ... )
+
+This method will dispatch a C<HEAD> request on the given $url.
+Otherwise it works like the get() method described above.
+
+=item $ua->post( $url, \%form )
+
+=item $ua->post( $url, \@form )
+
+=item $ua->post( $url, \%form, $field_name => $value, ... )
+
+=item $ua->post( $url, $field_name => $value,... Content => \%form )
+
+=item $ua->post( $url, $field_name => $value,... Content => \@form )
+
+=item $ua->post( $url, $field_name => $value,... Content => $content )
+
+This method will dispatch a C<POST> request on the given $url, with
+%form or @form providing the key/value pairs for the fill-in form
+content. Additional headers and content options are the same as for
+the get() method.
+
+This method will use the POST() function from C<HTTP::Request::Common>
+to build the request. See L<HTTP::Request::Common> for a details on
+how to pass form content and other advanced features.
+
+=item $ua->put( $url, \%form )
+
+=item $ua->put( $url, \@form )
+
+=item $ua->put( $url, \%form, $field_name => $value, ... )
+
+=item $ua->put( $url, $field_name => $value,... Content => \%form )
+
+=item $ua->put( $url, $field_name => $value,... Content => \@form )
+
+=item $ua->put( $url, $field_name => $value,... Content => $content )
+
+This method will dispatch a C<PUT> request on the given $url, with
+%form or @form providing the key/value pairs for the fill-in form
+content. Additional headers and content options are the same as for
+the get() method.
+
+This method will use the PUT() function from C<HTTP::Request::Common>
+to build the request. See L<HTTP::Request::Common> for a details on
+how to pass form content and other advanced features.
+
+=item $ua->delete( $url )
+
+=item $ua->delete( $url, $field_name => $value, ... )
+
+This method will dispatch a C<DELETE> request on the given $url. Additional
+headers and content options are the same as for the get() method.
+
+This method will use the DELETE() function from C<HTTP::Request::Common>
+to build the request. See L<HTTP::Request::Common> for a details on
+how to pass form content and other advanced features.
+
+=item $ua->mirror( $url, $filename )
+
+This method will get the document identified by $url and store it in
+file called $filename. If the file already exists, then the request
+will contain an "If-Modified-Since" header matching the modification
+time of the file. If the document on the server has not changed since
+this time, then nothing happens. If the document has been updated, it
+will be downloaded again. The modification time of the file will be
+forced to match that of the server.
+
+The return value is the the response object.
+
+=item $ua->request( $request )
+
+=item $ua->request( $request, $content_file )
+
+=item $ua->request( $request, $content_cb )
+
+=item $ua->request( $request, $content_cb, $read_size_hint )
+
+This method will dispatch the given $request object. Normally this
+will be an instance of the C<HTTP::Request> class, but any object with
+a similar interface will do. The return value is a response object.
+See L<HTTP::Request> and L<HTTP::Response> for a description of the
+interface provided by these classes.
+
+The request() method will process redirects and authentication
+responses transparently. This means that it may actually send several
+simple requests via the simple_request() method described below.
+
+The request methods described above; get(), head(), post() and
+mirror(), will all dispatch the request they build via this method.
+They are convenience methods that simply hides the creation of the
+request object for you.
+
+The $content_file, $content_cb and $read_size_hint all correspond to
+options described with the get() method above.
+
+You are allowed to use a CODE reference as C<content> in the request
+object passed in. The C<content> function should return the content
+when called. The content can be returned in chunks. The content
+function will be invoked repeatedly until it return an empty string to
+signal that there is no more content.
+
+=item $ua->simple_request( $request )
+
+=item $ua->simple_request( $request, $content_file )
+
+=item $ua->simple_request( $request, $content_cb )
+
+=item $ua->simple_request( $request, $content_cb, $read_size_hint )
+
+This method dispatches a single request and returns the response
+received. Arguments are the same as for request() described above.
+
+The difference from request() is that simple_request() will not try to
+handle redirects or authentication responses. The request() method
+will in fact invoke this method for each simple request it sends.
+
+=item $ua->is_online
+
+Tries to determine if you have access to the Internet. Returns
+TRUE if the built-in heuristics determine that the user agent is
+able to access the Internet (over HTTP). See also L<LWP::Online>.
+
+=item $ua->is_protocol_supported( $scheme )
+
+You can use this method to test whether this user agent object supports the
+specified C<scheme>. (The C<scheme> might be a string (like 'http' or
+'ftp') or it might be an URI object reference.)
+
+Whether a scheme is supported, is determined by the user agent's
+C<protocols_allowed> or C<protocols_forbidden> lists (if any), and by
+the capabilities of LWP. I.e., this will return TRUE only if LWP
+supports this protocol I<and> it's permitted for this particular
+object.
+
+=back
+
+=head2 Callback methods
+
+The following methods will be invoked as requests are processed. These
+methods are documented here because subclasses of C<LWP::UserAgent>
+might want to override their behaviour.
+
+=over
+
+=item $ua->prepare_request( $request )
+
+This method is invoked by simple_request(). Its task is to modify the
+given $request object by setting up various headers based on the
+attributes of the user agent. The return value should normally be the
+$request object passed in. If a different request object is returned
+it will be the one actually processed.
+
+The headers affected by the base implementation are; "User-Agent",
+"From", "Range" and "Cookie".
+
+=item $ua->redirect_ok( $prospective_request, $response )
+
+This method is called by request() before it tries to follow a
+redirection to the request in $response. This should return a TRUE
+value if this redirection is permissible. The $prospective_request
+will be the request to be sent if this method returns TRUE.
+
+The base implementation will return FALSE unless the method
+is in the object's C<requests_redirectable> list,
+FALSE if the proposed redirection is to a "file://..."
+URL, and TRUE otherwise.
+
+=item $ua->get_basic_credentials( $realm, $uri, $isproxy )
+
+This is called by request() to retrieve credentials for documents
+protected by Basic or Digest Authentication. The arguments passed in
+is the $realm provided by the server, the $uri requested and a boolean
+flag to indicate if this is authentication against a proxy server.
+
+The method should return a username and password. It should return an
+empty list to abort the authentication resolution attempt. Subclasses
+can override this method to prompt the user for the information. An
+example of this can be found in C<lwp-request> program distributed
+with this library.
+
+The base implementation simply checks a set of pre-stored member
+variables, set up with the credentials() method.
+
+=item $ua->progress( $status, $request_or_response )
+
+This is called frequently as the response is received regardless of
+how the content is processed. The method is called with $status
+"begin" at the start of processing the request and with $state "end"
+before the request method returns. In between these $status will be
+the fraction of the response currently received or the string "tick"
+if the fraction can't be calculated.
+
+When $status is "begin" the second argument is the request object,
+otherwise it is the response object.
+
+=back
+
+=head1 SEE ALSO
+
+See L<LWP> for a complete overview of libwww-perl5. See L<lwpcook>
+and the scripts F<lwp-request> and F<lwp-download> for examples of
+usage.
+
+See L<HTTP::Request> and L<HTTP::Response> for a description of the
+message objects dispatched and received. See L<HTTP::Request::Common>
+and L<HTML::Form> for other ways to build request objects.
+
+See L<WWW::Mechanize> and L<WWW::Search> for examples of more
+specialized user agents based on C<LWP::UserAgent>.
+
+=head1 COPYRIGHT
+
+Copyright 1995-2009 Gisle Aas.
+
+This library is free software; you can redistribute it and/or
+modify it under the same terms as Perl itself.
--- /dev/null
+package WWW::Mechanize;
+
+=head1 NAME
+
+WWW::Mechanize - Handy web browsing in a Perl object
+
+=head1 VERSION
+
+Version 1.70
+
+=cut
+
+our $VERSION = '1.72';
+
+=head1 SYNOPSIS
+
+C<WWW::Mechanize>, or Mech for short, is a Perl module for stateful
+programmatic web browsing, used for automating interaction with
+websites.
+
+Features include:
+
+=over 4
+
+=item * All HTTP methods
+
+=item * High-level hyperlink and HTML form support, without having to parse HTML yourself
+
+=item * SSL support
+
+=item * Automatic cookies
+
+=item * Custom HTTP headers
+
+=item * Automatic handling of redirections
+
+=item * Proxies
+
+=item * HTTP authentication
+
+=back
+
+Mech supports performing a sequence of page fetches including
+following links and submitting forms. Each fetched page is parsed
+and its links and forms are extracted. A link or a form can be
+selected, form fields can be filled and the next page can be fetched.
+Mech also stores a history of the URLs you've visited, which can
+be queried and revisited.
+
+ use WWW::Mechanize;
+ my $mech = WWW::Mechanize->new();
+
+ $mech->get( $url );
+
+ $mech->follow_link( n => 3 );
+ $mech->follow_link( text_regex => qr/download this/i );
+ $mech->follow_link( url => 'http://host.com/index.html' );
+
+ $mech->submit_form(
+ form_number => 3,
+ fields => {
+ username => 'mungo',
+ password => 'lost-and-alone',
+ }
+ );
+
+ $mech->submit_form(
+ form_name => 'search',
+ fields => { query => 'pot of gold', },
+ button => 'Search Now'
+ );
+
+
+Mech is well suited for use in testing web applications. If you use
+one of the Test::*, like L<Test::HTML::Lint> modules, you can check the
+fetched content and use that as input to a test call.
+
+ use Test::More;
+ like( $mech->content(), qr/$expected/, "Got expected content" );
+
+Each page fetch stores its URL in a history stack which you can
+traverse.
+
+ $mech->back();
+
+If you want finer control over your page fetching, you can use
+these methods. C<follow_link> and C<submit_form> are just high
+level wrappers around them.
+
+ $mech->find_link( n => $number );
+ $mech->form_number( $number );
+ $mech->form_name( $name );
+ $mech->field( $name, $value );
+ $mech->set_fields( %field_values );
+ $mech->set_visible( @criteria );
+ $mech->click( $button );
+
+L<WWW::Mechanize> is a proper subclass of L<LWP::UserAgent> and
+you can also use any of L<LWP::UserAgent>'s methods.
+
+ $mech->add_header($name => $value);
+
+Please note that Mech does NOT support JavaScript, you need additional software
+for that. Please check L<WWW::Mechanize::FAQ/"JavaScript"> for more.
+
+=head1 IMPORTANT LINKS
+
+=over 4
+
+=item * L<http://code.google.com/p/www-mechanize/issues/list>
+
+The queue for bugs & enhancements in WWW::Mechanize and
+Test::WWW::Mechanize. Please note that the queue at L<http://rt.cpan.org>
+is no longer maintained.
+
+=item * L<http://search.cpan.org/dist/WWW-Mechanize/>
+
+The CPAN documentation page for Mechanize.
+
+=item * L<http://search.cpan.org/dist/WWW-Mechanize/lib/WWW/Mechanize/FAQ.pod>
+
+Frequently asked questions. Make sure you read here FIRST.
+
+=back
+
+=cut
+
+use strict;
+use warnings;
+
+use HTTP::Request 1.30;
+use LWP::UserAgent 5.827;
+use HTML::Form 1.00;
+use HTML::TokeParser;
+
+use base 'LWP::UserAgent';
+
+our $HAS_ZLIB;
+BEGIN {
+ $HAS_ZLIB = eval 'use Compress::Zlib (); 1;';
+}
+
+=head1 CONSTRUCTOR AND STARTUP
+
+=head2 new()
+
+Creates and returns a new WWW::Mechanize object, hereafter referred to as
+the "agent".
+
+ my $mech = WWW::Mechanize->new()
+
+The constructor for WWW::Mechanize overrides two of the parms to the
+LWP::UserAgent constructor:
+
+ agent => 'WWW-Mechanize/#.##'
+ cookie_jar => {} # an empty, memory-only HTTP::Cookies object
+
+You can override these overrides by passing parms to the constructor,
+as in:
+
+ my $mech = WWW::Mechanize->new( agent => 'wonderbot 1.01' );
+
+If you want none of the overhead of a cookie jar, or don't want your
+bot accepting cookies, you have to explicitly disallow it, like so:
+
+ my $mech = WWW::Mechanize->new( cookie_jar => undef );
+
+Here are the parms that WWW::Mechanize recognizes. These do not include
+parms that L<LWP::UserAgent> recognizes.
+
+=over 4
+
+=item * C<< autocheck => [0|1] >>
+
+Checks each request made to see if it was successful. This saves
+you the trouble of manually checking yourself. Any errors found
+are errors, not warnings.
+
+The default value is ON, unless it's being subclassed, in which
+case it is OFF. This means that standalone L<WWW::Mechanize>instances
+have autocheck turned on, which is protective for the vast majority
+of Mech users who don't bother checking the return value of get()
+and post() and can't figure why their code fails. However, if
+L<WWW::Mechanize> is subclassed, such as for L<Test::WWW::Mechanize>
+or L<Test::WWW::Mechanize::Catalyst>, this may not be an appropriate
+default, so it's off.
+
+=item * C<< noproxy => [0|1] >>
+
+Turn off the automatic call to the L<LWP::UserAgent> C<env_proxy> function.
+
+This needs to be explicitly turned off if you're using L<Crypt::SSLeay> to
+access a https site via a proxy server. Note: you still need to set your
+HTTPS_PROXY environment variable as appropriate.
+
+=item * C<< onwarn => \&func >>
+
+Reference to a C<warn>-compatible function, such as C<< L<Carp>::carp >>,
+that is called when a warning needs to be shown.
+
+If this is set to C<undef>, no warnings will ever be shown. However,
+it's probably better to use the C<quiet> method to control that behavior.
+
+If this value is not passed, Mech uses C<Carp::carp> if L<Carp> is
+installed, or C<CORE::warn> if not.
+
+=item * C<< onerror => \&func >>
+
+Reference to a C<die>-compatible function, such as C<< L<Carp>::croak >>,
+that is called when there's a fatal error.
+
+If this is set to C<undef>, no errors will ever be shown.
+
+If this value is not passed, Mech uses C<Carp::croak> if L<Carp> is
+installed, or C<CORE::die> if not.
+
+=item * C<< quiet => [0|1] >>
+
+Don't complain on warnings. Setting C<< quiet => 1 >> is the same as
+calling C<< $mech->quiet(1) >>. Default is off.
+
+=item * C<< stack_depth => $value >>
+
+Sets the depth of the page stack that keeps track of all the
+downloaded pages. Default is effectively infinite stack size. If
+the stack is eating up your memory, then set this to a smaller
+number, say 5 or 10. Setting this to zero means Mech will keep no
+history.
+
+=back
+
+To support forms, WWW::Mechanize's constructor pushes POST
+on to the agent's C<requests_redirectable> list (see also
+L<LWP::UserAgent>.)
+
+=cut
+
+sub new {
+ my $class = shift;
+
+ my %parent_parms = (
+ agent => "WWW-Mechanize/$VERSION",
+ cookie_jar => {},
+ );
+
+ my %mech_parms = (
+ autocheck => ($class eq 'WWW::Mechanize' ? 1 : 0),
+ onwarn => \&WWW::Mechanize::_warn,
+ onerror => \&WWW::Mechanize::_die,
+ quiet => 0,
+ stack_depth => 8675309, # Arbitrarily humongous stack
+ headers => {},
+ noproxy => 0,
+ );
+
+ my %passed_parms = @_;
+
+ # Keep the mech-specific parms before creating the object.
+ while ( my($key,$value) = each %passed_parms ) {
+ if ( exists $mech_parms{$key} ) {
+ $mech_parms{$key} = $value;
+ }
+ else {
+ $parent_parms{$key} = $value;
+ }
+ }
+
+ my $self = $class->SUPER::new( %parent_parms );
+ bless $self, $class;
+
+ # Use the mech parms now that we have a mech object.
+ for my $parm ( keys %mech_parms ) {
+ $self->{$parm} = $mech_parms{$parm};
+ }
+ $self->{page_stack} = [];
+ $self->env_proxy() unless $mech_parms{noproxy};
+
+ # libwww-perl 5.800 (and before, I assume) has a problem where
+ # $ua->{proxy} can be undef and clone() doesn't handle it.
+ $self->{proxy} = {} unless defined $self->{proxy};
+ push( @{$self->requests_redirectable}, 'POST' );
+
+ $self->_reset_page();
+
+ return $self;
+}
+
+=head2 $mech->agent_alias( $alias )
+
+Sets the user agent string to the expanded version from a table of actual user strings.
+I<$alias> can be one of the following:
+
+=over 4
+
+=item * Windows IE 6
+
+=item * Windows Mozilla
+
+=item * Mac Safari
+
+=item * Mac Mozilla
+
+=item * Linux Mozilla
+
+=item * Linux Konqueror
+
+=back
+
+then it will be replaced with a more interesting one. For instance,
+
+ $mech->agent_alias( 'Windows IE 6' );
+
+sets your User-Agent to
+
+ Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)
+
+The list of valid aliases can be returned from C<known_agent_aliases()>. The current list is:
+
+=over
+
+=item * Windows IE 6
+
+=item * Windows Mozilla
+
+=item * Mac Safari
+
+=item * Mac Mozilla
+
+=item * Linux Mozilla
+
+=item * Linux Konqueror
+
+=back
+
+=cut
+
+my %known_agents = (
+ 'Windows IE 6' => 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)',
+ 'Windows Mozilla' => 'Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.4b) Gecko/20030516 Mozilla Firebird/0.6',
+ 'Mac Safari' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X; en-us) AppleWebKit/85 (KHTML, like Gecko) Safari/85',
+ 'Mac Mozilla' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.4a) Gecko/20030401',
+ 'Linux Mozilla' => 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.4) Gecko/20030624',
+ 'Linux Konqueror' => 'Mozilla/5.0 (compatible; Konqueror/3; Linux)',
+);
+
+sub agent_alias {
+ my $self = shift;
+ my $alias = shift;
+
+ if ( defined $known_agents{$alias} ) {
+ return $self->agent( $known_agents{$alias} );
+ }
+ else {
+ $self->warn( qq{Unknown agent alias "$alias"} );
+ return $self->agent();
+ }
+}
+
+=head2 known_agent_aliases()
+
+Returns a list of all the agent aliases that Mech knows about.
+
+=cut
+
+sub known_agent_aliases {
+ return sort keys %known_agents;
+}
+
+=head1 PAGE-FETCHING METHODS
+
+=head2 $mech->get( $uri )
+
+Given a URL/URI, fetches it. Returns an L<HTTP::Response> object.
+I<$uri> can be a well-formed URL string, a L<URI> object, or a
+L<WWW::Mechanize::Link> object.
+
+The results are stored internally in the agent object, but you don't
+know that. Just use the accessors listed below. Poking at the
+internals is deprecated and subject to change in the future.
+
+C<get()> is a well-behaved overloaded version of the method in
+L<LWP::UserAgent>. This lets you do things like
+
+ $mech->get( $uri, ':content_file' => $tempfile );
+
+and you can rest assured that the parms will get filtered down
+appropriately.
+
+B<NOTE:> Because C<:content_file> causes the page contents to be
+stored in a file instead of the response object, some Mech functions
+that expect it to be there won't work as expected. Use with caution.
+
+=cut
+
+sub get {
+ my $self = shift;
+ my $uri = shift;
+
+ $uri = $uri->url if ref($uri) eq 'WWW::Mechanize::Link';
+
+ $uri = $self->base
+ ? URI->new_abs( $uri, $self->base )
+ : URI->new( $uri );
+
+ # It appears we are returning a super-class method,
+ # but it in turn calls the request() method here in Mechanize
+ return $self->SUPER::get( $uri->as_string, @_ );
+}
+
+=head2 $mech->put( $uri, content => $content )
+
+PUTs I<$content> to $uri. Returns an L<HTTP::Response> object.
+I<$uri> can be a well-formed URI string, a L<URI> object, or a
+L<WWW::Mechanize::Link> object.
+
+=cut
+
+sub put {
+ my $self = shift;
+ my $uri = shift;
+
+ $uri = $uri->url if ref($uri) eq 'WWW::Mechanize::Link';
+
+ $uri = $self->base
+ ? URI->new_abs( $uri, $self->base )
+ : URI->new( $uri );
+
+ # It appears we are returning a super-class method,
+ # but it in turn calls the request() method here in Mechanize
+ return $self->_SUPER_put( $uri->as_string, @_ );
+}
+
+
+# Added until LWP::UserAgent has it.
+sub _SUPER_put {
+ require HTTP::Request::Common;
+ my($self, @parameters) = @_;
+ my @suff = $self->_process_colonic_headers(\@parameters,1);
+ return $self->request( HTTP::Request::Common::PUT( @parameters ), @suff );
+}
+
+=head2 $mech->reload()
+
+Acts like the reload button in a browser: repeats the current
+request. The history (as per the L</back> method) is not altered.
+
+Returns the L<HTTP::Response> object from the reload, or C<undef>
+if there's no current request.
+
+=cut
+
+sub reload {
+ my $self = shift;
+
+ return unless my $req = $self->{req};
+
+ return $self->_update_page( $req, $self->_make_request( $req, @_ ) );
+}
+
+=head2 $mech->back()
+
+The equivalent of hitting the "back" button in a browser. Returns to
+the previous page. Won't go back past the first page. (Really, what
+would it do if it could?)
+
+Returns true if it could go back, or false if not.
+
+=cut
+
+sub back {
+ my $self = shift;
+
+ my $stack = $self->{page_stack};
+ return unless $stack && @{$stack};
+
+ my $popped = pop @{$self->{page_stack}};
+ my $req = $popped->{req};
+ my $res = $popped->{res};
+
+ $self->_update_page( $req, $res );
+
+ return 1;
+}
+
+=head1 STATUS METHODS
+
+=head2 $mech->success()
+
+Returns a boolean telling whether the last request was successful.
+If there hasn't been an operation yet, returns false.
+
+This is a convenience function that wraps C<< $mech->res->is_success >>.
+
+=cut
+
+sub success {
+ my $self = shift;
+
+ return $self->res && $self->res->is_success;
+}
+
+
+=head2 $mech->uri()
+
+Returns the current URI as a L<URI> object. This object stringifies
+to the URI itself.
+
+=head2 $mech->response() / $mech->res()
+
+Return the current response as an L<HTTP::Response> object.
+
+Synonym for C<< $mech->response() >>
+
+=head2 $mech->status()
+
+Returns the HTTP status code of the response. This is a 3-digit
+number like 200 for OK, 404 for not found, and so on.
+
+=head2 $mech->ct() / $mech->content_type()
+
+Returns the content type of the response.
+
+=head2 $mech->base()
+
+Returns the base URI for the current response
+
+=head2 $mech->forms()
+
+When called in a list context, returns a list of the forms found in
+the last fetched page. In a scalar context, returns a reference to
+an array with those forms. The forms returned are all L<HTML::Form>
+objects.
+
+=head2 $mech->current_form()
+
+Returns the current form as an L<HTML::Form> object.
+
+=head2 $mech->links()
+
+When called in a list context, returns a list of the links found in the
+last fetched page. In a scalar context it returns a reference to an array
+with those links. Each link is a L<WWW::Mechanize::Link> object.
+
+=head2 $mech->is_html()
+
+Returns true/false on whether our content is HTML, according to the
+HTTP headers.
+
+=cut
+
+sub uri {
+ my $self = shift;
+ return $self->response->request->uri;
+}
+
+sub res { my $self = shift; return $self->{res}; }
+sub response { my $self = shift; return $self->{res}; }
+sub status { my $self = shift; return $self->{status}; }
+sub ct { my $self = shift; return $self->{ct}; }
+sub content_type { my $self = shift; return $self->{ct}; }
+sub base { my $self = shift; return $self->{base}; }
+sub is_html {
+ my $self = shift;
+ return defined $self->ct &&
+ ($self->ct eq 'text/html' || $self->ct eq 'application/xhtml+xml');
+}
+
+=head2 $mech->title()
+
+Returns the contents of the C<< <TITLE> >> tag, as parsed by
+L<HTML::HeadParser>. Returns undef if the content is not HTML.
+
+=cut
+
+sub title {
+ my $self = shift;
+
+ return unless $self->is_html;
+
+ if ( not defined $self->{title} ) {
+ require HTML::HeadParser;
+ my $p = HTML::HeadParser->new;
+ $p->parse($self->content);
+ $self->{title} = $p->header('Title');
+ }
+ return $self->{title};
+}
+
+=head1 CONTENT-HANDLING METHODS
+
+=head2 $mech->content(...)
+
+Returns the content that the mech uses internally for the last page
+fetched. Ordinarily this is the same as $mech->response()->content(),
+but this may differ for HTML documents if L</update_html> is
+overloaded (in which case the value passed to the base-class
+implementation of same will be returned), and/or extra named arguments
+are passed to I<content()>:
+
+=over 2
+
+=item I<< $mech->content( format => 'text' ) >>
+
+Returns a text-only version of the page, with all HTML markup
+stripped. This feature requires I<HTML::TreeBuilder> to be installed,
+or a fatal error will be thrown.
+
+=item I<< $mech->content( base_href => [$base_href|undef] ) >>
+
+Returns the HTML document, modified to contain a
+C<< <base href="$base_href"> >> mark-up in the header.
+I<$base_href> is C<< $mech->base() >> if not specified. This is
+handy to pass the HTML to e.g. L<HTML::Display>.
+
+=back
+
+Passing arguments to C<content()> if the current document is not
+HTML has no effect now (i.e. the return value is the same as
+C<< $self->response()->content() >>. This may change in the future,
+but will likely be backwards-compatible when it does.
+
+=cut
+
+sub content {
+ my $self = shift;
+ my $content = $self->{content};
+
+ if ( $self->is_html ) {
+ my %parms = @_;
+
+ if ( exists $parms{base_href} ) {
+ my $base_href = (delete $parms{base_href}) || $self->base;
+ $content=~s/<head>/<head>\n<base href="$base_href">/i;
+ }
+
+ if ( my $format = delete $parms{format} ) {
+ if ( $format eq 'text' ) {
+ $content = $self->text;
+ }
+ else {
+ $self->die( qq{Unknown "format" parameter "$format"} );
+ }
+ }
+
+ $self->_check_unhandled_parms( %parms );
+ }
+
+ return $content;
+}
+
+=head2 $mech->text()
+
+Returns the text of the current HTML content. If the content isn't
+HTML, $mech will die.
+
+The text is extracted by parsing the content, and then the extracted
+text is cached, so don't worry about performance of calling this
+repeatedly.
+
+=cut
+
+sub text {
+ my $self = shift;
+
+ if ( not defined $self->{text} ) {
+ require HTML::TreeBuilder;
+ my $tree = HTML::TreeBuilder->new();
+ $tree->parse( $self->content );
+ $tree->eof();
+ $tree->elementify(); # just for safety
+ $self->{text} = $tree->as_text();
+ $tree->delete;
+ }
+
+ return $self->{text};
+}
+
+sub _check_unhandled_parms {
+ my $self = shift;
+ my %parms = @_;
+
+ for my $cmd ( sort keys %parms ) {
+ $self->die( qq{Unknown named argument "$cmd"} );
+ }
+}
+
+=head1 LINK METHODS
+
+=head2 $mech->links()
+
+Lists all the links on the current page. Each link is a
+WWW::Mechanize::Link object. In list context, returns a list of all
+links. In scalar context, returns an array reference of all links.
+
+=cut
+
+sub links {
+ my $self = shift;
+
+ $self->_extract_links() unless $self->{links};
+
+ return @{$self->{links}} if wantarray;
+ return $self->{links};
+}
+
+=head2 $mech->follow_link(...)
+
+Follows a specified link on the page. You specify the match to be
+found using the same parms that C<L<find_link()>> uses.
+
+Here some examples:
+
+=over 4
+
+=item * 3rd link called "download"
+
+ $mech->follow_link( text => 'download', n => 3 );
+
+=item * first link where the URL has "download" in it, regardless of case:
+
+ $mech->follow_link( url_regex => qr/download/i );
+
+or
+
+ $mech->follow_link( url_regex => qr/(?i:download)/ );
+
+=item * 3rd link on the page
+
+ $mech->follow_link( n => 3 );
+
+=back
+
+Returns the result of the GET method (an HTTP::Response object) if
+a link was found. If the page has no links, or the specified link
+couldn't be found, returns undef.
+
+=cut
+
+sub follow_link {
+ my $self = shift;
+ my %parms = ( n=>1, @_ );
+
+ if ( $parms{n} eq 'all' ) {
+ delete $parms{n};
+ $self->warn( q{follow_link(n=>"all") is not valid} );
+ }
+
+ my $link = $self->find_link(%parms);
+ if ( $link ) {
+ return $self->get( $link->url );
+ }
+
+ if ( $self->{autocheck} ) {
+ $self->die( 'Link not found' );
+ }
+
+ return;
+}
+
+=head2 $mech->find_link( ... )
+
+Finds a link in the currently fetched page. It returns a
+L<WWW::Mechanize::Link> object which describes the link. (You'll
+probably be most interested in the C<url()> property.) If it fails
+to find a link it returns undef.
+
+You can take the URL part and pass it to the C<get()> method. If
+that's your plan, you might as well use the C<follow_link()> method
+directly, since it does the C<get()> for you automatically.
+
+Note that C<< <FRAME SRC="..."> >> tags are parsed out of the the HTML
+and treated as links so this method works with them.
+
+You can select which link to find by passing in one or more of these
+key/value pairs:
+
+=over 4
+
+=item * C<< text => 'string', >> and C<< text_regex => qr/regex/, >>
+
+C<text> matches the text of the link against I<string>, which must be an
+exact match. To select a link with text that is exactly "download", use
+
+ $mech->find_link( text => 'download' );
+
+C<text_regex> matches the text of the link against I<regex>. To select a
+link with text that has "download" anywhere in it, regardless of case, use
+
+ $mech->find_link( text_regex => qr/download/i );
+
+Note that the text extracted from the page's links are trimmed. For
+example, C<< <a> foo </a> >> is stored as 'foo', and searching for
+leading or trailing spaces will fail.
+
+=item * C<< url => 'string', >> and C<< url_regex => qr/regex/, >>
+
+Matches the URL of the link against I<string> or I<regex>, as appropriate.
+The URL may be a relative URL, like F<foo/bar.html>, depending on how
+it's coded on the page.
+
+=item * C<< url_abs => string >> and C<< url_abs_regex => regex >>
+
+Matches the absolute URL of the link against I<string> or I<regex>,
+as appropriate. The URL will be an absolute URL, even if it's relative
+in the page.
+
+=item * C<< name => string >> and C<< name_regex => regex >>
+
+Matches the name of the link against I<string> or I<regex>, as appropriate.
+
+=item * C<< id => string >> and C<< id_regex => regex >>
+
+Matches the attribute 'id' of the link against I<string> or
+I<regex>, as appropriate.
+
+=item * C<< class => string >> and C<< class_regex => regex >>
+
+Matches the attribute 'class' of the link against I<string> or
+I<regex>, as appropriate.
+
+=item * C<< tag => string >> and C<< tag_regex => regex >>
+
+Matches the tag that the link came from against I<string> or I<regex>,
+as appropriate. The C<tag_regex> is probably most useful to check for
+more than one tag, as in:
+
+ $mech->find_link( tag_regex => qr/^(a|frame)$/ );
+
+The tags and attributes looked at are defined below, at
+L<< $mech->find_link() : link format >>.
+
+=back
+
+If C<n> is not specified, it defaults to 1. Therefore, if you don't
+specify any parms, this method defaults to finding the first link on the
+page.
+
+Note that you can specify multiple text or URL parameters, which
+will be ANDed together. For example, to find the first link with
+text of "News" and with "cnn.com" in the URL, use:
+
+ $mech->find_link( text => 'News', url_regex => qr/cnn\.com/ );
+
+The return value is a reference to an array containing a
+L<WWW::Mechanize::Link> object for every link in C<< $self->content >>.
+
+The links come from the following:
+
+=over 4
+
+=item C<< <a href=...> >>
+
+=item C<< <area href=...> >>
+
+=item C<< <frame src=...> >>
+
+=item C<< <iframe src=...> >>
+
+=item C<< <link href=...> >>
+
+=item C<< <meta content=...> >>
+
+=back
+
+=cut
+
+sub find_link {
+ my $self = shift;
+ my %parms = ( n=>1, @_ );
+
+ my $wantall = ( $parms{n} eq 'all' );
+
+ $self->_clean_keys( \%parms, qr/^(n|(text|url|url_abs|name|tag|id|class)(_regex)?)$/ );
+
+ my @links = $self->links or return;
+
+ my $nmatches = 0;
+ my @matches;
+ for my $link ( @links ) {
+ if ( _match_any_link_parms($link,\%parms) ) {
+ if ( $wantall ) {
+ push( @matches, $link );
+ }
+ else {
+ ++$nmatches;
+ return $link if $nmatches >= $parms{n};
+ }
+ }
+ } # for @links
+
+ if ( $wantall ) {
+ return @matches if wantarray;
+ return \@matches;
+ }
+
+ return;
+} # find_link
+
+# Used by find_links to check for matches
+# The logic is such that ALL parm criteria that are given must match
+sub _match_any_link_parms {
+ my $link = shift;
+ my $p = shift;
+
+ # No conditions, anything matches
+ return 1 unless keys %$p;
+
+ return if defined $p->{url} && !($link->url eq $p->{url} );
+ return if defined $p->{url_regex} && !($link->url =~ $p->{url_regex} );
+ return if defined $p->{url_abs} && !($link->url_abs eq $p->{url_abs} );
+ return if defined $p->{url_abs_regex} && !($link->url_abs =~ $p->{url_abs_regex} );
+ return if defined $p->{text} && !(defined($link->text) && $link->text eq $p->{text} );
+ return if defined $p->{text_regex} && !(defined($link->text) && $link->text =~ $p->{text_regex} );
+ return if defined $p->{name} && !(defined($link->name) && $link->name eq $p->{name} );
+ return if defined $p->{name_regex} && !(defined($link->name) && $link->name =~ $p->{name_regex} );
+ return if defined $p->{tag} && !($link->tag && $link->tag eq $p->{tag} );
+ return if defined $p->{tag_regex} && !($link->tag && $link->tag =~ $p->{tag_regex} );
+
+ return if defined $p->{id} && !($link->attrs->{id} && $link->attrs->{id} eq $p->{id} );
+ return if defined $p->{id_regex} && !($link->attrs->{id} && $link->attrs->{id} =~ $p->{id_regex} );
+ return if defined $p->{class} && !($link->attrs->{class} && $link->attrs->{class} eq $p->{class} );
+ return if defined $p->{class_regex} && !($link->attrs->{class} && $link->attrs->{class} =~ $p->{class_regex} );
+
+ # Success: everything that was defined passed.
+ return 1;
+
+}
+
+# Cleans the %parms parameter for the find_link and find_image methods.
+sub _clean_keys {
+ my $self = shift;
+ my $parms = shift;
+ my $rx_keyname = shift;
+
+ for my $key ( keys %$parms ) {
+ my $val = $parms->{$key};
+ if ( $key !~ qr/$rx_keyname/ ) {
+ $self->warn( qq{Unknown link-finding parameter "$key"} );
+ delete $parms->{$key};
+ next;
+ }
+
+ my $key_regex = ( $key =~ /_regex$/ );
+ my $val_regex = ( ref($val) eq 'Regexp' );
+
+ if ( $key_regex ) {
+ if ( !$val_regex ) {
+ $self->warn( qq{$val passed as $key is not a regex} );
+ delete $parms->{$key};
+ next;
+ }
+ }
+ else {
+ if ( $val_regex ) {
+ $self->warn( qq{$val passed as '$key' is a regex} );
+ delete $parms->{$key};
+ next;
+ }
+ if ( $val =~ /^\s|\s$/ ) {
+ $self->warn( qq{'$val' is space-padded and cannot succeed} );
+ delete $parms->{$key};
+ next;
+ }
+ }
+ } # for keys %parms
+
+ return;
+} # _clean_keys()
+
+
+=head2 $mech->find_all_links( ... )
+
+Returns all the links on the current page that match the criteria. The
+method for specifying link criteria is the same as in C<L</find_link()>>.
+Each of the links returned is a L<WWW::Mechanize::Link> object.
+
+In list context, C<find_all_links()> returns a list of the links.
+Otherwise, it returns a reference to the list of links.
+
+C<find_all_links()> with no parameters returns all links in the
+page.
+
+=cut
+
+sub find_all_links {
+ my $self = shift;
+ return $self->find_link( @_, n=>'all' );
+}
+
+=head2 $mech->find_all_inputs( ... criteria ... )
+
+find_all_inputs() returns an array of all the input controls in the
+current form whose properties match all of the regexes passed in.
+The controls returned are all descended from HTML::Form::Input.
+
+If no criteria are passed, all inputs will be returned.
+
+If there is no current page, there is no form on the current
+page, or there are no submit controls in the current form
+then the return will be an empty array.
+
+You may use a regex or a literal string:
+
+ # get all textarea controls whose names begin with "customer"
+ my @customer_text_inputs = $mech->find_all_inputs(
+ type => 'textarea',
+ name_regex => qr/^customer/,
+ );
+
+ # get all text or textarea controls called "customer"
+ my @customer_text_inputs = $mech->find_all_inputs(
+ type_regex => qr/^(text|textarea)$/,
+ name => 'customer',
+ );
+
+=cut
+
+sub find_all_inputs {
+ my $self = shift;
+ my %criteria = @_;
+
+ my $form = $self->current_form() or return;
+
+ my @found;
+ foreach my $input ( $form->inputs ) { # check every pattern for a match on the current hash
+ my $matched = 1;
+ foreach my $criterion ( sort keys %criteria ) { # Sort so we're deterministic
+ my $field = $criterion;
+ my $is_regex = ( $field =~ s/(?:_regex)$// );
+ my $what = $input->{$field};
+ $matched = defined($what) && (
+ $is_regex
+ ? ( $what =~ $criteria{$criterion} )
+ : ( $what eq $criteria{$criterion} )
+ );
+ last if !$matched;
+ }
+ push @found, $input if $matched;
+ }
+ return @found;
+}
+
+=head2 $mech->find_all_submits( ... criteria ... )
+
+C<find_all_submits()> does the same thing as C<find_all_inputs()>
+except that it only returns controls that are submit controls,
+ignoring other types of input controls like text and checkboxes.
+
+=cut
+
+sub find_all_submits {
+ my $self = shift;
+
+ return $self->find_all_inputs( @_, type_regex => qr/^(submit|image)$/ );
+}
+
+
+=head1 IMAGE METHODS
+
+=head2 $mech->images
+
+Lists all the images on the current page. Each image is a
+WWW::Mechanize::Image object. In list context, returns a list of all
+images. In scalar context, returns an array reference of all images.
+
+=cut
+
+sub images {
+ my $self = shift;
+
+ $self->_extract_images() unless $self->{images};
+
+ return @{$self->{images}} if wantarray;
+ return $self->{images};
+}
+
+=head2 $mech->find_image()
+
+Finds an image in the current page. It returns a
+L<WWW::Mechanize::Image> object which describes the image. If it fails
+to find an image it returns undef.
+
+You can select which image to find by passing in one or more of these
+key/value pairs:
+
+=over 4
+
+=item * C<< alt => 'string' >> and C<< alt_regex => qr/regex/, >>
+
+C<alt> matches the ALT attribute of the image against I<string>, which must be an
+exact match. To select a image with an ALT tag that is exactly "download", use
+
+ $mech->find_image( alt => 'download' );
+
+C<alt_regex> matches the ALT attribute of the image against a regular
+expression. To select an image with an ALT attribute that has "download"
+anywhere in it, regardless of case, use
+
+ $mech->find_image( alt_regex => qr/download/i );
+
+=item * C<< url => 'string', >> and C<< url_regex => qr/regex/, >>
+
+Matches the URL of the image against I<string> or I<regex>, as appropriate.
+The URL may be a relative URL, like F<foo/bar.html>, depending on how
+it's coded on the page.
+
+=item * C<< url_abs => string >> and C<< url_abs_regex => regex >>
+
+Matches the absolute URL of the image against I<string> or I<regex>,
+as appropriate. The URL will be an absolute URL, even if it's relative
+in the page.
+
+=item * C<< tag => string >> and C<< tag_regex => regex >>
+
+Matches the tag that the image came from against I<string> or I<regex>,
+as appropriate. The C<tag_regex> is probably most useful to check for
+more than one tag, as in:
+
+ $mech->find_image( tag_regex => qr/^(img|input)$/ );
+
+The tags supported are C<< <img> >> and C<< <input> >>.
+
+=back
+
+If C<n> is not specified, it defaults to 1. Therefore, if you don't
+specify any parms, this method defaults to finding the first image on the
+page.
+
+Note that you can specify multiple ALT or URL parameters, which
+will be ANDed together. For example, to find the first image with
+ALT text of "News" and with "cnn.com" in the URL, use:
+
+ $mech->find_image( image => 'News', url_regex => qr/cnn\.com/ );
+
+The return value is a reference to an array containing a
+L<WWW::Mechanize::Image> object for every image in C<< $self->content >>.
+
+=cut
+
+sub find_image {
+ my $self = shift;
+ my %parms = ( n=>1, @_ );
+
+ my $wantall = ( $parms{n} eq 'all' );
+
+ $self->_clean_keys( \%parms, qr/^(n|(alt|url|url_abs|tag)(_regex)?)$/ );
+
+ my @images = $self->images or return;
+
+ my $nmatches = 0;
+ my @matches;
+ for my $image ( @images ) {
+ if ( _match_any_image_parms($image,\%parms) ) {
+ if ( $wantall ) {
+ push( @matches, $image );
+ }
+ else {
+ ++$nmatches;
+ return $image if $nmatches >= $parms{n};
+ }
+ }
+ } # for @images
+
+ if ( $wantall ) {
+ return @matches if wantarray;
+ return \@matches;
+ }
+
+ return;
+}
+
+# Used by find_images to check for matches
+# The logic is such that ALL parm criteria that are given must match
+sub _match_any_image_parms {
+ my $image = shift;
+ my $p = shift;
+
+ # No conditions, anything matches
+ return 1 unless keys %$p;
+
+ return if defined $p->{url} && !($image->url eq $p->{url} );
+ return if defined $p->{url_regex} && !($image->url =~ $p->{url_regex} );
+ return if defined $p->{url_abs} && !($image->url_abs eq $p->{url_abs} );
+ return if defined $p->{url_abs_regex} && !($image->url_abs =~ $p->{url_abs_regex} );
+ return if defined $p->{alt} && !(defined($image->alt) && $image->alt eq $p->{alt} );
+ return if defined $p->{alt_regex} && !(defined($image->alt) && $image->alt =~ $p->{alt_regex} );
+ return if defined $p->{tag} && !($image->tag && $image->tag eq $p->{tag} );
+ return if defined $p->{tag_regex} && !($image->tag && $image->tag =~ $p->{tag_regex} );
+
+ # Success: everything that was defined passed.
+ return 1;
+}
+
+
+=head2 $mech->find_all_images( ... )
+
+Returns all the images on the current page that match the criteria. The
+method for specifying image criteria is the same as in C<L</find_image()>>.
+Each of the images returned is a L<WWW::Mechanize::Image> object.
+
+In list context, C<find_all_images()> returns a list of the images.
+Otherwise, it returns a reference to the list of images.
+
+C<find_all_images()> with no parameters returns all images in the page.
+
+=cut
+
+sub find_all_images {
+ my $self = shift;
+ return $self->find_image( @_, n=>'all' );
+}
+
+=head1 FORM METHODS
+
+These methods let you work with the forms on a page. The idea is
+to choose a form that you'll later work with using the field methods
+below.
+
+=head2 $mech->forms
+
+Lists all the forms on the current page. Each form is an L<HTML::Form>
+object. In list context, returns a list of all forms. In scalar
+context, returns an array reference of all forms.
+
+=cut
+
+sub forms {
+ my $self = shift;
+
+ $self->_extract_forms() unless $self->{forms};
+
+ return @{$self->{forms}} if wantarray;
+ return $self->{forms};
+}
+
+sub current_form {
+ my $self = shift;
+
+ if ( !$self->{current_form} ) {
+ $self->form_number(1);
+ }
+
+ return $self->{current_form};
+}
+
+=head2 $mech->form_number($number)
+
+Selects the I<number>th form on the page as the target for subsequent
+calls to C<L</field()>> and C<L</click()>>. Also returns the form that was
+selected.
+
+If it is found, the form is returned as an L<HTML::Form> object and set internally
+for later use with Mech's form methods such as C<L</field()>> and C<L</click()>>.
+
+Emits a warning and returns undef if no form is found.
+
+The first form is number 1, not zero.
+
+=cut
+
+sub form_number {
+ my ($self, $form) = @_;
+ # XXX Should we die if no $form is defined? Same question for form_name()
+
+ my $forms = $self->forms;
+ if ( $forms->[$form-1] ) {
+ $self->{current_form} = $forms->[$form-1];
+ return $self->{current_form};
+ }
+
+ return;
+}
+
+=head2 $mech->form_name( $name )
+
+Selects a form by name. If there is more than one form on the page
+with that name, then the first one is used, and a warning is
+generated.
+
+If it is found, the form is returned as an L<HTML::Form> object and
+set internally for later use with Mech's form methods such as
+C<L</field()>> and C<L</click()>>.
+
+Returns undef if no form is found.
+
+=cut
+
+sub form_name {
+ my ($self, $form) = @_;
+
+ my $temp;
+ my @matches = grep {defined($temp = $_->attr('name')) and ($temp eq $form) } $self->forms;
+
+ my $nmatches = @matches;
+ if ( $nmatches > 0 ) {
+ if ( $nmatches > 1 ) {
+ $self->warn( "There are $nmatches forms named $form. The first one was used." )
+ }
+ return $self->{current_form} = $matches[0];
+ }
+
+ return;
+}
+
+=head2 $mech->form_id( $name )
+
+Selects a form by ID. If there is more than one form on the page
+with that ID, then the first one is used, and a warning is generated.
+
+If it is found, the form is returned as an L<HTML::Form> object and
+set internally for later use with Mech's form methods such as
+C<L</field()>> and C<L</click()>>.
+
+Returns undef if no form is found.
+
+=cut
+
+sub form_id {
+ my ($self, $formid) = @_;
+
+ my $temp;
+ my @matches = grep { defined($temp = $_->attr('id')) and ($temp eq $formid) } $self->forms;
+ if ( @matches ) {
+ $self->warn( 'There are ', scalar @matches, " forms with ID $formid. The first one was used." )
+ if @matches > 1;
+ return $self->{current_form} = $matches[0];
+ }
+ else {
+ $self->warn( qq{ There is no form with ID "$formid"} );
+ return undef;
+ }
+}
+
+
+=head2 $mech->form_with_fields( @fields )
+
+Selects a form by passing in a list of field names it must contain. If there
+is more than one form on the page with that matches, then the first one is used,
+and a warning is generated.
+
+If it is found, the form is returned as an L<HTML::Form> object and set internally
+for later used with Mech's form methods such as C<L</field()>> and C<L</click()>>.
+
+Returns undef if no form is found.
+
+Note that this functionality requires libwww-perl 5.69 or higher.
+
+=cut
+
+sub form_with_fields {
+ my ($self, @fields) = @_;
+ die 'no fields provided' unless scalar @fields;
+
+ my @matches;
+ FORMS: for my $form (@{ $self->forms }) {
+ my @fields_in_form = $form->param();
+ for my $field (@fields) {
+ next FORMS unless grep { $_ eq $field } @fields_in_form;
+ }
+ push @matches, $form;
+ }
+
+ my $nmatches = @matches;
+ if ( $nmatches > 0 ) {
+ if ( $nmatches > 1 ) {
+ $self->warn( "There are $nmatches forms with the named fields. The first one was used." )
+ }
+ return $self->{current_form} = $matches[0];
+ }
+ else {
+ $self->warn( qq{There is no form with the requested fields} );
+ return undef;
+ }
+}
+
+=head1 FIELD METHODS
+
+These methods allow you to set the values of fields in a given form.
+
+=head2 $mech->field( $name, $value, $number )
+
+=head2 $mech->field( $name, \@values, $number )
+
+Given the name of a field, set its value to the value specified.
+This applies to the current form (as set by the L</form_name()> or
+L</form_number()> method or defaulting to the first form on the
+page).
+
+The optional I<$number> parameter is used to distinguish between two fields
+with the same name. The fields are numbered from 1.
+
+=cut
+
+sub field {
+ my ($self, $name, $value, $number) = @_;
+ $number ||= 1;
+
+ my $form = $self->current_form();
+ if ($number > 1) {
+ $form->find_input($name, undef, $number)->value($value);
+ }
+ else {
+ if ( ref($value) eq 'ARRAY' ) {
+ $form->param($name, $value);
+ }
+ else {
+ $form->value($name => $value);
+ }
+ }
+}
+
+=head2 $mech->select($name, $value)
+
+=head2 $mech->select($name, \@values)
+
+Given the name of a C<select> field, set its value to the value
+specified. If the field is not C<< <select multiple> >> and the
+C<$value> is an array, only the B<first> value will be set. [Note:
+the documentation previously claimed that only the last value would
+be set, but this was incorrect.] Passing C<$value> as a hash with
+an C<n> key selects an item by number (e.g.
+C<< {n => 3} >> or C<< {n => [2,4]} >>).
+The numbering starts at 1. This applies to the current form.
+
+If you have a field with C<< <select multiple> >> and you pass a single
+C<$value>, then C<$value> will be added to the list of fields selected,
+without clearing the others. However, if you pass an array reference,
+then all previously selected values will be cleared.
+
+Returns true on successfully setting the value. On failure, returns
+false and calls C<< $self>warn() >> with an error message.
+
+=cut
+
+sub select {
+ my ($self, $name, $value) = @_;
+
+ my $form = $self->current_form();
+
+ my $input = $form->find_input($name);
+ if (!$input) {
+ $self->warn( qq{Input "$name" not found} );
+ return;
+ }
+
+ if ($input->type ne 'option') {
+ $self->warn( qq{Input "$name" is not type "select"} );
+ return;
+ }
+
+ # For $mech->select($name, {n => 3}) or $mech->select($name, {n => [2,4]}),
+ # transform the 'n' number(s) into value(s) and put it in $value.
+ if (ref($value) eq 'HASH') {
+ for (keys %$value) {
+ $self->warn(qq{Unknown select value parameter "$_"})
+ unless $_ eq 'n';
+ }
+
+ if (defined($value->{n})) {
+ my @inputs = $form->find_input($name, 'option');
+ my @values = ();
+ # distinguish between multiple and non-multiple selects
+ # (see INPUTS section of `perldoc HTML::Form`)
+ if (@inputs == 1) {
+ @values = $inputs[0]->possible_values();
+ }
+ else {
+ foreach my $input (@inputs) {
+ my @possible = $input->possible_values();
+ push @values, pop @possible;
+ }
+ }
+
+ my $n = $value->{n};
+ if (ref($n) eq 'ARRAY') {
+ $value = [];
+ for (@$n) {
+ unless (/^\d+$/) {
+ $self->warn(qq{"n" value "$_" is not a positive integer});
+ return;
+ }
+ push @$value, $values[$_ - 1]; # might be undef
+ }
+ }
+ elsif (!ref($n) && $n =~ /^\d+$/) {
+ $value = $values[$n - 1]; # might be undef
+ }
+ else {
+ $self->warn('"n" value is not a positive integer or an array ref');
+ return;
+ }
+ }
+ else {
+ $self->warn('Hash value is invalid');
+ return;
+ }
+ } # hashref
+
+ if (ref($value) eq 'ARRAY') {
+ $form->param($name, $value);
+ return 1;
+ }
+
+ $form->value($name => $value);
+ return 1;
+}
+
+=head2 $mech->set_fields( $name => $value ... )
+
+This method sets multiple fields of the current form. It takes a list
+of field name and value pairs. If there is more than one field with
+the same name, the first one found is set. If you want to select which
+of the duplicate field to set, use a value which is an anonymous array
+which has the field value and its number as the 2 elements.
+
+ # set the second foo field
+ $mech->set_fields( $name => [ 'foo', 2 ] );
+
+The fields are numbered from 1.
+
+This applies to the current form.
+
+=cut
+
+sub set_fields {
+ my $self = shift;
+ my %fields = @_;
+
+ my $form = $self->current_form or $self->die( 'No form defined' );
+
+ while ( my ( $field, $value ) = each %fields ) {
+ if ( ref $value eq 'ARRAY' ) {
+ $form->find_input( $field, undef,
+ $value->[1])->value($value->[0] );
+ }
+ else {
+ $form->value($field => $value);
+ }
+ } # while
+} # set_fields()
+
+=head2 $mech->set_visible( @criteria )
+
+This method sets fields of the current form without having to know
+their names. So if you have a login screen that wants a username and
+password, you do not have to fetch the form and inspect the source (or
+use the F<mech-dump> utility, installed with WWW::Mechanize) to see
+what the field names are; you can just say
+
+ $mech->set_visible( $username, $password );
+
+and the first and second fields will be set accordingly. The method
+is called set_I<visible> because it acts only on visible fields;
+hidden form inputs are not considered. The order of the fields is
+the order in which they appear in the HTML source which is nearly
+always the order anyone viewing the page would think they are in,
+but some creative work with tables could change that; caveat user.
+
+Each element in C<@criteria> is either a field value or a field
+specifier. A field value is a scalar. A field specifier allows
+you to specify the I<type> of input field you want to set and is
+denoted with an arrayref containing two elements. So you could
+specify the first radio button with
+
+ $mech->set_visible( [ radio => 'KCRW' ] );
+
+Field values and specifiers can be intermixed, hence
+
+ $mech->set_visible( 'fred', 'secret', [ option => 'Checking' ] );
+
+would set the first two fields to "fred" and "secret", and the I<next>
+C<OPTION> menu field to "Checking".
+
+The possible field specifier types are: "text", "password", "hidden",
+"textarea", "file", "image", "submit", "radio", "checkbox" and "option".
+
+C<set_visible> returns the number of values set.
+
+=cut
+
+sub set_visible {
+ my $self = shift;
+
+ my $form = $self->current_form;
+ my @inputs = $form->inputs;
+
+ my $num_set = 0;
+ for my $value ( @_ ) {
+ # Handle type/value pairs an arrayref
+ if ( ref $value eq 'ARRAY' ) {
+ my ( $type, $value ) = @$value;
+ while ( my $input = shift @inputs ) {
+ next if $input->type eq 'hidden';
+ if ( $input->type eq $type ) {
+ $input->value( $value );
+ $num_set++;
+ last;
+ }
+ } # while
+ }
+ # by default, it's a value
+ else {
+ while ( my $input = shift @inputs ) {
+ next if $input->type eq 'hidden';
+ $input->value( $value );
+ $num_set++;
+ last;
+ } # while
+ }
+ } # for
+
+ return $num_set;
+} # set_visible()
+
+=head2 $mech->tick( $name, $value [, $set] )
+
+"Ticks" the first checkbox that has both the name and value associated
+with it on the current form. Dies if there is no named check box for
+that value. Passing in a false value as the third optional argument
+will cause the checkbox to be unticked.
+
+=cut
+
+sub tick {
+ my $self = shift;
+ my $name = shift;
+ my $value = shift;
+ my $set = @_ ? shift : 1; # default to 1 if not passed
+
+ # loop though all the inputs
+ my $index = 0;
+ while ( my $input = $self->current_form->find_input( $name, 'checkbox', $index ) ) {
+ # Can't guarantee that the first element will be undef and the second
+ # element will be the right name
+ foreach my $val ($input->possible_values()) {
+ next unless defined $val;
+ if ($val eq $value) {
+ $input->value($set ? $value : undef);
+ return;
+ }
+ }
+
+ # move onto the next input
+ $index++;
+ } # while
+
+ # got self far? Didn't find anything
+ $self->warn( qq{No checkbox "$name" for value "$value" in form} );
+} # tick()
+
+=head2 $mech->untick($name, $value)
+
+Causes the checkbox to be unticked. Shorthand for
+C<tick($name,$value,undef)>
+
+=cut
+
+sub untick {
+ shift->tick(shift,shift,undef);
+}
+
+=head2 $mech->value( $name [, $number] )
+
+Given the name of a field, return its value. This applies to the current
+form.
+
+The optional I<$number> parameter is used to distinguish between two fields
+with the same name. The fields are numbered from 1.
+
+If the field is of type file (file upload field), the value is always
+cleared to prevent remote sites from downloading your local files.
+To upload a file, specify its file name explicitly.
+
+=cut
+
+sub value {
+ my $self = shift;
+ my $name = shift;
+ my $number = shift || 1;
+
+ my $form = $self->current_form;
+ if ( $number > 1 ) {
+ return $form->find_input( $name, undef, $number )->value();
+ }
+ else {
+ return $form->value( $name );
+ }
+} # value
+
+=head2 $mech->click( $button [, $x, $y] )
+
+Has the effect of clicking a button on the current form. The first
+argument is the name of the button to be clicked. The second and
+third arguments (optional) allow you to specify the (x,y) coordinates
+of the click.
+
+If there is only one button on the form, C<< $mech->click() >> with
+no arguments simply clicks that one button.
+
+Returns an L<HTTP::Response> object.
+
+=cut
+
+sub click {
+ my ($self, $button, $x, $y) = @_;
+ for ($x, $y) { $_ = 1 unless defined; }
+ my $request = $self->current_form->click($button, $x, $y);
+ return $self->request( $request );
+}
+
+=head2 $mech->click_button( ... )
+
+Has the effect of clicking a button on the current form by specifying
+its name, value, or index. Its arguments are a list of key/value
+pairs. Only one of name, number, input or value must be specified in
+the keys.
+
+=over 4
+
+=item * C<< name => name >>
+
+Clicks the button named I<name> in the current form.
+
+=item * C<< number => n >>
+
+Clicks the I<n>th button in the current form. Numbering starts at 1.
+
+=item * C<< value => value >>
+
+Clicks the button with the value I<value> in the current form.
+
+=item * C<< input => $inputobject >>
+
+Clicks on the button referenced by $inputobject, an instance of
+L<HTML::Form::SubmitInput> obtained e.g. from
+
+ $mech->current_form()->find_input( undef, 'submit' )
+
+$inputobject must belong to the current form.
+
+=item * C<< x => x >>
+
+=item * C<< y => y >>
+
+These arguments (optional) allow you to specify the (x,y) coordinates
+of the click.
+
+=back
+
+=cut
+
+sub click_button {
+ my $self = shift;
+ my %args = @_;
+
+ for ( keys %args ) {
+ if ( !/^(number|name|value|input|x|y)$/ ) {
+ $self->warn( qq{Unknown click_button parameter "$_"} );
+ }
+ }
+
+ for ($args{x}, $args{y}) {
+ $_ = 1 unless defined;
+ }
+
+ my $form = $self->current_form or $self->die( 'click_button: No form has been selected' );
+
+ my $request;
+ if ( $args{name} ) {
+ $request = $form->click( $args{name}, $args{x}, $args{y} );
+ }
+ elsif ( $args{number} ) {
+ my $input = $form->find_input( undef, 'submit', $args{number} );
+ $request = $input->click( $form, $args{x}, $args{y} );
+ }
+ elsif ( $args{input} ) {
+ $request = $args{input}->click( $form, $args{x}, $args{y} );
+ }
+ elsif ( $args{value} ) {
+ my $i = 1;
+ while ( my $input = $form->find_input(undef, 'submit', $i) ) {
+ if ( $args{value} && ($args{value} eq $input->value) ) {
+ $request = $input->click( $form, $args{x}, $args{y} );
+ last;
+ }
+ $i++;
+ } # while
+ } # $args{value}
+
+ return $self->request( $request );
+}
+
+=head2 $mech->submit()
+
+Submits the page, without specifying a button to click. Actually,
+no button is clicked at all.
+
+Returns an L<HTTP::Response> object.
+
+This used to be a synonym for C<< $mech->click( 'submit' ) >>, but is no
+longer so.
+
+=cut
+
+sub submit {
+ my $self = shift;
+
+ my $request = $self->current_form->make_request;
+ return $self->request( $request );
+}
+
+=head2 $mech->submit_form( ... )
+
+This method lets you select a form from the previously fetched page,
+fill in its fields, and submit it. It combines the form_number/form_name,
+set_fields and click methods into one higher level call. Its arguments
+are a list of key/value pairs, all of which are optional.
+
+=over 4
+
+=item * C<< fields => \%fields >>
+
+Specifies the fields to be filled in the current form.
+
+=item * C<< with_fields => \%fields >>
+
+Probably all you need for the common case. It combines a smart form selector
+and data setting in one operation. It selects the first form that contains all
+fields mentioned in C<\%fields>. This is nice because you don't need to know
+the name or number of the form to do this.
+
+(calls C<L</form_with_fields()>> and C<L</set_fields()>>).
+
+If you choose this, the form_number, form_name, form_id and fields options will be ignored.
+
+=item * C<< form_number => n >>
+
+Selects the I<n>th form (calls C<L</form_number()>>). If this parm is not
+specified, the currently-selected form is used.
+
+=item * C<< form_name => name >>
+
+Selects the form named I<name> (calls C<L</form_name()>>)
+
+=item * C<< form_id => ID >>
+
+Selects the form with ID I<ID> (calls C<L</form_id()>>)
+
+=item * C<< button => button >>
+
+Clicks on button I<button> (calls C<L</click()>>)
+
+=item * C<< x => x, y => y >>
+
+Sets the x or y values for C<L</click()>>
+
+=back
+
+If no form is selected, the first form found is used.
+
+If I<button> is not passed, then the C<L</submit()>> method is used instead.
+
+If you want to submit a file and get its content from a scalar rather
+than a file in the filesystem, you can use:
+
+ $mech->submit_form(with_fields => { logfile => [ [ undef, 'whatever', Content => $content ], 1 ] } );
+
+Returns an L<HTTP::Response> object.
+
+=cut
+
+sub submit_form {
+ my( $self, %args ) = @_;
+
+ for ( keys %args ) {
+ if ( !/^(form_(number|name|fields|id)|(with_)?fields|button|x|y)$/ ) {
+ # XXX Why not die here?
+ $self->warn( qq{Unknown submit_form parameter "$_"} );
+ }
+ }
+
+ my $fields;
+ for (qw/with_fields fields/) {
+ if ($args{$_}) {
+ if ( ref $args{$_} eq 'HASH' ) {
+ $fields = $args{$_};
+ }
+ else {
+ die "$_ arg to submit_form must be a hashref";
+ }
+ last;
+ }
+ }
+
+ if ( $args{with_fields} ) {
+ $fields || die q{must submit some 'fields' with with_fields};
+ $self->form_with_fields(keys %{$fields}) or die "There is no form with the requested fields";
+ }
+ elsif ( my $form_number = $args{form_number} ) {
+ $self->form_number( $form_number ) or die "There is no form numbered $form_number";
+ }
+ elsif ( my $form_name = $args{form_name} ) {
+ $self->form_name( $form_name ) or die qq{There is no form named "$form_name"};
+ }
+ elsif ( my $form_id = $args{form_id} ) {
+ $self->form_id( $form_id ) or die qq{There is no form with ID "$form_id"};
+ }
+ else {
+ # No form selector was used.
+ # Maybe a form was set separately, or we'll default to the first form.
+ }
+
+ $self->set_fields( %{$fields} ) if $fields;
+
+ my $response;
+ if ( $args{button} ) {
+ $response = $self->click( $args{button}, $args{x} || 0, $args{y} || 0 );
+ }
+ else {
+ $response = $self->submit();
+ }
+
+ return $response;
+}
+
+=head1 MISCELLANEOUS METHODS
+
+=head2 $mech->add_header( name => $value [, name => $value... ] )
+
+Sets HTTP headers for the agent to add or remove from the HTTP request.
+
+ $mech->add_header( Encoding => 'text/klingon' );
+
+If a I<value> is C<undef>, then that header will be removed from any
+future requests. For example, to never send a Referer header:
+
+ $mech->add_header( Referer => undef );
+
+If you want to delete a header, use C<delete_header>.
+
+Returns the number of name/value pairs added.
+
+B<NOTE>: This method was very different in WWW::Mechanize before 1.00.
+Back then, the headers were stored in a package hash, not as a member of
+the object instance. Calling C<add_header()> would modify the headers
+for every WWW::Mechanize object, even after your object no longer existed.
+
+=cut
+
+sub add_header {
+ my $self = shift;
+ my $npairs = 0;
+
+ while ( @_ ) {
+ my $key = shift;
+ my $value = shift;
+ ++$npairs;
+
+ $self->{headers}{$key} = $value;
+ }
+
+ return $npairs;
+}
+
+=head2 $mech->delete_header( name [, name ... ] )
+
+Removes HTTP headers from the agent's list of special headers. For
+instance, you might need to do something like:
+
+ # Don't send a Referer for this URL
+ $mech->add_header( Referer => undef );
+
+ # Get the URL
+ $mech->get( $url );
+
+ # Back to the default behavior
+ $mech->delete_header( 'Referer' );
+
+=cut
+
+sub delete_header {
+ my $self = shift;
+
+ while ( @_ ) {
+ my $key = shift;
+
+ delete $self->{headers}{$key};
+ }
+
+ return;
+}
+
+
+=head2 $mech->quiet(true/false)
+
+Allows you to suppress warnings to the screen.
+
+ $mech->quiet(0); # turns on warnings (the default)
+ $mech->quiet(1); # turns off warnings
+ $mech->quiet(); # returns the current quietness status
+
+=cut
+
+sub quiet {
+ my $self = shift;
+
+ $self->{quiet} = $_[0] if @_;
+
+ return $self->{quiet};
+}
+
+=head2 $mech->stack_depth( $max_depth )
+
+Get or set the page stack depth. Use this if you're doing a lot of page
+scraping and running out of memory.
+
+A value of 0 means "no history at all." By default, the max stack depth
+is humongously large, effectively keeping all history.
+
+=cut
+
+sub stack_depth {
+ my $self = shift;
+ $self->{stack_depth} = shift if @_;
+ return $self->{stack_depth};
+}
+
+=head2 $mech->save_content( $filename )
+
+Dumps the contents of C<< $mech->content >> into I<$filename>.
+I<$filename> will be overwritten. Dies if there are any errors.
+
+If the content type does not begin with "text/", then the content
+is saved in binary mode.
+
+=cut
+
+sub save_content {
+ my $self = shift;
+ my $filename = shift;
+
+ open( my $fh, '>', $filename ) or $self->die( "Unable to create $filename: $!" );
+ binmode $fh unless $self->content_type =~ m{^text/};
+ print {$fh} $self->content or $self->die( "Unable to write to $filename: $!" );
+ close $fh or $self->die( "Unable to close $filename: $!" );
+
+ return;
+}
+
+
+=head2 $mech->dump_headers( [$fh] )
+
+Prints a dump of the HTTP response headers for the most recent
+response. If I<$fh> is not specified or is undef, it dumps to
+STDOUT.
+
+Unlike the rest of the dump_* methods, you cannot specify a filehandle
+to print to.
+
+=cut
+
+sub dump_headers {
+ my $self = shift;
+ my $fh = shift || \*STDOUT;
+
+ print {$fh} $self->response->headers_as_string;
+
+ return;
+}
+
+
+=head2 $mech->dump_links( [[$fh], $absolute] )
+
+Prints a dump of the links on the current page to I<$fh>. If I<$fh>
+is not specified or is undef, it dumps to STDOUT.
+
+If I<$absolute> is true, links displayed are absolute, not relative.
+
+=cut
+
+sub dump_links {
+ my $self = shift;
+ my $fh = shift || \*STDOUT;
+ my $absolute = shift;
+
+ for my $link ( $self->links ) {
+ my $url = $absolute ? $link->url_abs : $link->url;
+ $url = '' if not defined $url;
+ print {$fh} $url, "\n";
+ }
+ return;
+}
+
+=head2 $mech->dump_images( [[$fh], $absolute] )
+
+Prints a dump of the images on the current page to I<$fh>. If I<$fh>
+is not specified or is undef, it dumps to STDOUT.
+
+If I<$absolute> is true, links displayed are absolute, not relative.
+
+=cut
+
+sub dump_images {
+ my $self = shift;
+ my $fh = shift || \*STDOUT;
+ my $absolute = shift;
+
+ for my $image ( $self->images ) {
+ my $url = $absolute ? $image->url_abs : $image->url;
+ $url = '' if not defined $url;
+ print {$fh} $url, "\n";
+ }
+ return;
+}
+
+=head2 $mech->dump_forms( [$fh] )
+
+Prints a dump of the forms on the current page to I<$fh>. If I<$fh>
+is not specified or is undef, it dumps to STDOUT.
+
+=cut
+
+sub dump_forms {
+ my $self = shift;
+ my $fh = shift || \*STDOUT;
+
+ for my $form ( $self->forms ) {
+ print {$fh} $form->dump, "\n";
+ }
+ return;
+}
+
+=head2 $mech->dump_text( [$fh] )
+
+Prints a dump of the text on the current page to I<$fh>. If I<$fh>
+is not specified or is undef, it dumps to STDOUT.
+
+=cut
+
+sub dump_text {
+ my $self = shift;
+ my $fh = shift || \*STDOUT;
+ my $absolute = shift;
+
+ print {$fh} $self->text, "\n";
+
+ return;
+}
+
+
+=head1 OVERRIDDEN LWP::UserAgent METHODS
+
+=head2 $mech->clone()
+
+Clone the mech object. The clone will be using the same cookie jar
+as the original mech.
+
+=cut
+
+sub clone {
+ my $self = shift;
+ my $clone = $self->SUPER::clone();
+
+ $clone->cookie_jar( $self->cookie_jar );
+
+ return $clone;
+}
+
+
+=head2 $mech->redirect_ok()
+
+An overloaded version of C<redirect_ok()> in L<LWP::UserAgent>.
+This method is used to determine whether a redirection in the request
+should be followed.
+
+Note that WWW::Mechanize's constructor pushes POST on to the agent's
+C<requests_redirectable> list.
+
+=cut
+
+sub redirect_ok {
+ my $self = shift;
+ my $prospective_request = shift;
+ my $response = shift;
+
+ my $ok = $self->SUPER::redirect_ok( $prospective_request, $response );
+ if ( $ok ) {
+ $self->{redirected_uri} = $prospective_request->uri;
+ }
+
+ return $ok;
+}
+
+
+=head2 $mech->request( $request [, $arg [, $size]])
+
+Overloaded version of C<request()> in L<LWP::UserAgent>. Performs
+the actual request. Normally, if you're using WWW::Mechanize, it's
+because you don't want to deal with this level of stuff anyway.
+
+Note that C<$request> will be modified.
+
+Returns an L<HTTP::Response> object.
+
+=cut
+
+sub request {
+ my $self = shift;
+ my $request = shift;
+
+ $request = $self->_modify_request( $request );
+
+ if ( $request->method eq 'GET' || $request->method eq 'POST' ) {
+ $self->_push_page_stack();
+ }
+
+ return $self->_update_page($request, $self->_make_request( $request, @_ ));
+}
+
+=head2 $mech->update_html( $html )
+
+Allows you to replace the HTML that the mech has found. Updates the
+forms and links parse-trees that the mech uses internally.
+
+Say you have a page that you know has malformed output, and you want to
+update it so the links come out correctly:
+
+ my $html = $mech->content;
+ $html =~ s[</option>.{0,3}</td>][</option></select></td>]isg;
+ $mech->update_html( $html );
+
+This method is also used internally by the mech itself to update its
+own HTML content when loading a page. This means that if you would
+like to I<systematically> perform the above HTML substitution, you
+would overload I<update_html> in a subclass thusly:
+
+ package MyMech;
+ use base 'WWW::Mechanize';
+
+ sub update_html {
+ my ($self, $html) = @_;
+ $html =~ s[</option>.{0,3}</td>][</option></select></td>]isg;
+ $self->WWW::Mechanize::update_html( $html );
+ }
+
+If you do this, then the mech will use the tidied-up HTML instead of
+the original both when parsing for its own needs, and for returning to
+you through L</content>.
+
+Overloading this method is also the recommended way of implementing
+extra validation steps (e.g. link checkers) for every HTML page
+received. L</warn> and L</die> would then come in handy to signal
+validation errors.
+
+=cut
+
+sub update_html {
+ my $self = shift;
+ my $html = shift;
+
+ $self->_reset_page;
+ $self->{ct} = 'text/html';
+ $self->{content} = $html;
+
+ return;
+}
+
+=head2 $mech->credentials( $username, $password )
+
+Provide credentials to be used for HTTP Basic authentication for
+all sites and realms until further notice.
+
+The four argument form described in L<LWP::UserAgent> is still
+supported.
+
+=cut
+
+sub credentials {
+ my $self = shift;
+
+ # The lastest LWP::UserAgent also supports 2 arguments,
+ # in which case the first is host:port
+ if (@_ == 4 || (@_ == 2 && $_[0] =~ /:\d+$/)) {
+ return $self->SUPER::credentials(@_);
+ }
+
+ @_ == 2
+ or $self->die( 'Invalid # of args for overridden credentials()' );
+
+ return @$self{qw( __username __password )} = @_;
+}
+
+=head2 $mech->get_basic_credentials( $realm, $uri, $isproxy )
+
+Returns the credentials for the realm and URI.
+
+=cut
+
+sub get_basic_credentials {
+ my $self = shift;
+ my @cred = grep { defined } @$self{qw( __username __password )};
+ return @cred if @cred == 2;
+ return $self->SUPER::get_basic_credentials(@_);
+}
+
+=head2 $mech->clear_credentials()
+
+Remove any credentials set up with C<credentials()>.
+
+=cut
+
+sub clear_credentials {
+ my $self = shift;
+ delete @$self{qw( __username __password )};
+}
+
+=head1 INHERITED UNCHANGED LWP::UserAgent METHODS
+
+As a sublass of L<LWP::UserAgent>, WWW::Mechanize inherits all of
+L<LWP::UserAgent>'s methods. Many of which are overridden or
+extended. The following methods are inherited unchanged. View the
+L<LWP::UserAgent> documentation for their implementation descriptions.
+
+This is not meant to be an inclusive list. LWP::UA may have added
+others.
+
+=head2 $mech->head()
+
+Inherited from L<LWP::UserAgent>.
+
+=head2 $mech->post()
+
+Inherited from L<LWP::UserAgent>.
+
+=head2 $mech->mirror()
+
+Inherited from L<LWP::UserAgent>.
+
+=head2 $mech->simple_request()
+
+Inherited from L<LWP::UserAgent>.
+
+=head2 $mech->is_protocol_supported()
+
+Inherited from L<LWP::UserAgent>.
+
+=head2 $mech->prepare_request()
+
+Inherited from L<LWP::UserAgent>.
+
+=head2 $mech->progress()
+
+Inherited from L<LWP::UserAgent>.
+
+=head1 INTERNAL-ONLY METHODS
+
+These methods are only used internally. You probably don't need to
+know about them.
+
+=head2 $mech->_update_page($request, $response)
+
+Updates all internal variables in $mech as if $request was just
+performed, and returns $response. The page stack is B<not> altered by
+this method, it is up to caller (e.g. L</request>) to do that.
+
+=cut
+
+sub _update_page {
+ my ($self, $request, $res) = @_;
+
+ $self->{req} = $request;
+ $self->{redirected_uri} = $request->uri->as_string;
+
+ $self->{res} = $res;
+
+ $self->{status} = $res->code;
+ $self->{base} = $res->base;
+ $self->{ct} = $res->content_type || '';
+
+ if ( $res->is_success ) {
+ $self->{uri} = $self->{redirected_uri};
+ $self->{last_uri} = $self->{uri};
+ }
+
+ if ( $res->is_error ) {
+ if ( $self->{autocheck} ) {
+ $self->die( 'Error ', $request->method, 'ing ', $request->uri, ': ', $res->message );
+ }
+ }
+
+ $self->_reset_page;
+
+ # Try to decode the content. Undef will be returned if there's nothing to decompress.
+ # See docs in HTTP::Message for details. Do we need to expose the options there?
+ my $content = $res->decoded_content();
+ $content = $res->content if (not defined $content);
+
+ $content .= _taintedness();
+
+ if ($self->is_html) {
+ $self->update_html($content);
+ }
+ else {
+ $self->{content} = $content;
+ }
+
+ return $res;
+} # _update_page
+
+our $_taintbrush;
+
+# This is lifted wholesale from Test::Taint
+sub _taintedness {
+ return $_taintbrush if defined $_taintbrush;
+
+ # Somehow we need to get some taintedness into our $_taintbrush.
+ # Let's try the easy way first. Either of these should be
+ # tainted, unless somebody has untainted them, so this
+ # will almost always work on the first try.
+ # (Unless, of course, taint checking has been turned off!)
+ $_taintbrush = substr("$0$^X", 0, 0);
+ return $_taintbrush if _is_tainted( $_taintbrush );
+
+ # Let's try again. Maybe somebody cleaned those.
+ $_taintbrush = substr(join('', grep { defined } @ARGV, %ENV), 0, 0);
+ return $_taintbrush if _is_tainted( $_taintbrush );
+
+ # If those don't work, go try to open some file from some unsafe
+ # source and get data from them. That data is tainted.
+ # (Yes, even reading from /dev/null works!)
+ for my $filename ( qw(/dev/null / . ..), values %INC, $0, $^X ) {
+ if ( open my $fh, '<', $filename ) {
+ my $data;
+ if ( defined sysread $fh, $data, 1 ) {
+ $_taintbrush = substr( $data, 0, 0 );
+ last if _is_tainted( $_taintbrush );
+ }
+ }
+ }
+
+ # Sanity check
+ die "Our taintbrush should have zero length!" if length $_taintbrush;
+
+ return $_taintbrush;
+}
+
+sub _is_tainted {
+ no warnings qw(void uninitialized);
+
+ return !eval { join('', shift), kill 0; 1 };
+} # _is_tainted
+
+
+=head2 $mech->_modify_request( $req )
+
+Modifies a L<HTTP::Request> before the request is sent out,
+for both GET and POST requests.
+
+We add a C<Referer> header, as well as header to note that we can accept gzip
+encoded content, if L<Compress::Zlib> is installed.
+
+=cut
+
+sub _modify_request {
+ my $self = shift;
+ my $req = shift;
+
+ # add correct Accept-Encoding header to restore compliance with
+ # http://www.freesoft.org/CIE/RFC/2068/158.htm
+ # http://use.perl.org/~rhesa/journal/25952
+ if (not $req->header( 'Accept-Encoding' ) ) {
+ # "identity" means "please! unencoded content only!"
+ $req->header( 'Accept-Encoding', $HAS_ZLIB ? 'gzip' : 'identity' );
+ }
+
+ my $last = $self->{last_uri};
+ if ( $last ) {
+ $last = $last->as_string if ref($last);
+ $req->header( Referer => $last );
+ }
+ while ( my($key,$value) = each %{$self->{headers}} ) {
+ if ( defined $value ) {
+ $req->header( $key => $value );
+ }
+ else {
+ $req->remove_header( $key );
+ }
+ }
+
+ return $req;
+}
+
+
+=head2 $mech->_make_request()
+
+Convenience method to make it easier for subclasses like
+L<WWW::Mechanize::Cached> to intercept the request.
+
+=cut
+
+sub _make_request {
+ my $self = shift;
+ return $self->SUPER::request(@_);
+}
+
+=head2 $mech->_reset_page()
+
+Resets the internal fields that track page parsed stuff.
+
+=cut
+
+sub _reset_page {
+ my $self = shift;
+
+ $self->{links} = undef;
+ $self->{images} = undef;
+ $self->{forms} = undef;
+ $self->{current_form} = undef;
+ $self->{title} = undef;
+ $self->{text} = undef;
+
+ return;
+}
+
+=head2 $mech->_extract_links()
+
+Extracts links from the content of a webpage, and populates the C<{links}>
+property with L<WWW::Mechanize::Link> objects.
+
+=cut
+
+my %link_tags = (
+ a => 'href',
+ area => 'href',
+ frame => 'src',
+ iframe => 'src',
+ link => 'href',
+ meta => 'content',
+);
+
+sub _extract_links {
+ my $self = shift;
+
+
+ $self->{links} = [];
+ if ( defined $self->{content} ) {
+ my $parser = HTML::TokeParser->new(\$self->{content});
+ while ( my $token = $parser->get_tag( keys %link_tags ) ) {
+ my $link = $self->_link_from_token( $token, $parser );
+ push( @{$self->{links}}, $link ) if $link;
+ } # while
+ }
+
+ return;
+}
+
+
+my %image_tags = (
+ img => 'src',
+ input => 'src',
+);
+
+sub _extract_images {
+ my $self = shift;
+
+ $self->{images} = [];
+
+ if ( defined $self->{content} ) {
+ my $parser = HTML::TokeParser->new(\$self->{content});
+ while ( my $token = $parser->get_tag( keys %image_tags ) ) {
+ my $image = $self->_image_from_token( $token, $parser );
+ push( @{$self->{images}}, $image ) if $image;
+ } # while
+ }
+
+ return;
+}
+
+sub _image_from_token {
+ my $self = shift;
+ my $token = shift;
+ my $parser = shift;
+
+ my $tag = $token->[0];
+ my $attrs = $token->[1];
+
+ if ( $tag eq 'input' ) {
+ my $type = $attrs->{type} or return;
+ return unless $type eq 'image';
+ }
+
+ require WWW::Mechanize::Image;
+ return
+ WWW::Mechanize::Image->new({
+ tag => $tag,
+ base => $self->base,
+ url => $attrs->{src},
+ name => $attrs->{name},
+ height => $attrs->{height},
+ width => $attrs->{width},
+ alt => $attrs->{alt},
+ });
+}
+
+sub _link_from_token {
+ my $self = shift;
+ my $token = shift;
+ my $parser = shift;
+
+ my $tag = $token->[0];
+ my $attrs = $token->[1];
+ my $url = $attrs->{$link_tags{$tag}};
+
+ my $text;
+ my $name;
+ if ( $tag eq 'a' ) {
+ $text = $parser->get_trimmed_text("/$tag");
+ $text = '' unless defined $text;
+
+ my $onClick = $attrs->{onclick};
+ if ( $onClick && ($onClick =~ /^window\.open\(\s*'([^']+)'/) ) {
+ $url = $1;
+ }
+ } # a
+
+ # Of the tags we extract from, only 'AREA' has an alt tag
+ # The rest should have a 'name' attribute.
+ # ... but we don't do anything with that bit of wisdom now.
+
+ $name = $attrs->{name};
+
+ if ( $tag eq 'meta' ) {
+ my $equiv = $attrs->{'http-equiv'};
+ my $content = $attrs->{'content'};
+ return unless $equiv && (lc $equiv eq 'refresh') && defined $content;
+
+ if ( $content =~ /^\d+\s*;\s*url\s*=\s*(\S+)/i ) {
+ $url = $1;
+ $url =~ s/^"(.+)"$/$1/ or $url =~ s/^'(.+)'$/$1/;
+ }
+ else {
+ undef $url;
+ }
+ } # meta
+
+ return unless defined $url; # probably just a name link or <AREA NOHREF...>
+
+ require WWW::Mechanize::Link;
+ return
+ WWW::Mechanize::Link->new({
+ url => $url,
+ text => $text,
+ name => $name,
+ tag => $tag,
+ base => $self->base,
+ attrs => $attrs,
+ });
+} # _link_from_token
+
+
+sub _extract_forms {
+ my $self = shift;
+
+ my @forms = HTML::Form->parse( $self->content, $self->base );
+ $self->{forms} = \@forms;
+ for my $form ( @forms ) {
+ for my $input ($form->inputs) {
+ if ($input->type eq 'file') {
+ $input->value( undef );
+ }
+ }
+ }
+
+ return;
+}
+
+=head2 $mech->_push_page_stack()
+
+The agent keeps a stack of visited pages, which it can pop when it needs
+to go BACK and so on.
+
+The current page needs to be pushed onto the stack before we get a new
+page, and the stack needs to be popped when BACK occurs.
+
+Neither of these take any arguments, they just operate on the $mech
+object.
+
+=cut
+
+sub _push_page_stack {
+ my $self = shift;
+
+ my $req = $self->{req};
+ my $res = $self->{res};
+
+ return unless $req && $res && $self->stack_depth;
+
+ # Don't push anything if it's a virgin object
+ my $stack = $self->{page_stack} ||= [];
+ if ( @{$stack} >= $self->stack_depth ) {
+ shift @{$stack};
+ }
+ push( @{$stack}, { req => $req, res => $res } );
+
+ return 1;
+}
+
+=head2 warn( @messages )
+
+Centralized warning method, for diagnostics and non-fatal problems.
+Defaults to calling C<CORE::warn>, but may be overridden by setting
+C<onwarn> in the constructor.
+
+=cut
+
+sub warn {
+ my $self = shift;
+
+ return unless my $handler = $self->{onwarn};
+
+ return if $self->quiet;
+
+ return $handler->(@_);
+}
+
+=head2 die( @messages )
+
+Centralized error method. Defaults to calling C<CORE::die>, but
+may be overridden by setting C<onerror> in the constructor.
+
+=cut
+
+sub die {
+ my $self = shift;
+
+ return unless my $handler = $self->{onerror};
+
+ return $handler->(@_);
+}
+
+
+# NOT an object method!
+sub _warn {
+ require Carp;
+ return &Carp::carp; ## no critic
+}
+
+# NOT an object method!
+sub _die {
+ require Carp;
+ return &Carp::croak; ## no critic
+}
+
+1; # End of module
+
+__END__
+
+=head1 WWW::MECHANIZE'S GIT REPOSITORY
+
+WWW::Mechanize is hosted at GitHub, though the bug tracker still
+lives at Google Code.
+
+Repository: https://github.com/bestpractical/www-mechanize/.
+Bugs: http://code.google.com/p/www-mechanize/issues
+
+=head1 OTHER DOCUMENTATION
+
+=head2 I<Spidering Hacks>, by Kevin Hemenway and Tara Calishain
+
+I<Spidering Hacks> from O'Reilly
+(L<http://www.oreilly.com/catalog/spiderhks/>) is a great book for anyone
+wanting to know more about screen-scraping and spidering.
+
+There are six hacks that use Mech or a Mech derivative:
+
+=over 4
+
+=item #21 WWW::Mechanize 101
+
+=item #22 Scraping with WWW::Mechanize
+
+=item #36 Downloading Images from Webshots
+
+=item #44 Archiving Yahoo! Groups Messages with WWW::Yahoo::Groups
+
+=item #64 Super Author Searching
+
+=item #73 Scraping TV Listings
+
+=back
+
+The book was also positively reviewed on Slashdot:
+L<http://books.slashdot.org/article.pl?sid=03/12/11/2126256>
+
+=head1 ONLINE RESOURCES AND SUPPORT
+
+=over 4
+
+=item * WWW::Mechanize mailing list
+
+The Mech mailing list is at
+L<http://groups.google.com/group/www-mechanize-users> and is specific
+to Mechanize, unlike the LWP mailing list below. Although it is a
+users list, all development discussion takes place here, too.
+
+=item * LWP mailing list
+
+The LWP mailing list is at
+L<http://lists.perl.org/showlist.cgi?name=libwww>, and is more
+user-oriented and well-populated than the WWW::Mechanize list.
+
+=item * Perlmonks
+
+L<http://perlmonks.org> is an excellent community of support, and
+many questions about Mech have already been answered there.
+
+=item * L<WWW::Mechanize::Examples>
+
+A random array of examples submitted by users, included with the
+Mechanize distribution.
+
+=back
+
+=head1 ARTICLES ABOUT WWW::MECHANIZE
+
+=over 4
+
+=item * L<http://www-128.ibm.com/developerworks/linux/library/wa-perlsecure.html>
+
+IBM article "Secure Web site access with Perl"
+
+=item * L<http://www.oreilly.com/catalog/googlehks2/chapter/hack84.pdf>
+
+Leland Johnson's hack #84 in I<Google Hacks, 2nd Edition> is
+an example of a production script that uses WWW::Mechanize and
+HTML::TableContentParser. It takes in keywords and returns the estimated
+price of these keywords on Google's AdWords program.
+
+=item * L<http://www.perl.com/pub/a/2004/06/04/recorder.html>
+
+Linda Julien writes about using HTTP::Recorder to create WWW::Mechanize
+scripts.
+
+=item * L<http://www.developer.com/lang/other/article.php/3454041>
+
+Jason Gilmore's article on using WWW::Mechanize for scraping sales
+information from Amazon and eBay.
+
+=item * L<http://www.perl.com/pub/a/2003/01/22/mechanize.html>
+
+Chris Ball's article about using WWW::Mechanize for scraping TV
+listings.
+
+=item * L<http://www.stonehenge.com/merlyn/LinuxMag/col47.html>
+
+Randal Schwartz's article on scraping Yahoo News for images. It's
+already out of date: He manually walks the list of links hunting
+for matches, which wouldn't have been necessary if the C<find_link()>
+method existed at press time.
+
+=item * L<http://www.perladvent.org/2002/16th/>
+
+WWW::Mechanize on the Perl Advent Calendar, by Mark Fowler.
+
+=item * L<http://www.linux-magazin.de/Artikel/ausgabe/2004/03/perl/perl.html>
+
+Michael Schilli's article on Mech and L<WWW::Mechanize::Shell> for the
+German magazine I<Linux Magazin>.
+
+=back
+
+=head2 Other modules that use Mechanize
+
+Here are modules that use or subclass Mechanize. Let me know of any others:
+
+=over 4
+
+=item * L<Finance::Bank::LloydsTSB>
+
+=item * L<HTTP::Recorder>
+
+Acts as a proxy for web interaction, and then generates WWW::Mechanize scripts.
+
+=item * L<Win32::IE::Mechanize>
+
+Just like Mech, but using Microsoft Internet Explorer to do the work.
+
+=item * L<WWW::Bugzilla>
+
+=item * L<WWW::CheckSite>
+
+=item * L<WWW::Google::Groups>
+
+=item * L<WWW::Hotmail>
+
+=item * L<WWW::Mechanize::Cached>
+
+=item * L<WWW::Mechanize::FormFiller>
+
+=item * L<WWW::Mechanize::Shell>
+
+=item * L<WWW::Mechanize::Sleepy>
+
+=item * L<WWW::Mechanize::SpamCop>
+
+=item * L<WWW::Mechanize::Timed>
+
+=item * L<WWW::SourceForge>
+
+=item * L<WWW::Yahoo::Groups>
+
+=back
+
+=head1 ACKNOWLEDGEMENTS
+
+Thanks to the numerous people who have helped out on WWW::Mechanize in
+one way or another, including
+Kirrily Robert for the original C<WWW::Automate>,
+Lyle Hopkins,
+Damien Clark,
+Ansgar Burchardt,
+Gisle Aas,
+Jeremy Ary,
+Hilary Holz,
+Rafael Kitover,
+Norbert Buchmuller,
+Dave Page,
+David Sainty,
+H.Merijn Brand,
+Matt Lawrence,
+Michael Schwern,
+Adriano Ferreira,
+Miyagawa,
+Peteris Krumins,
+Rafael Kitover,
+David Steinbrunner,
+Kevin Falcone,
+Mike O'Regan,
+Mark Stosberg,
+Uri Guttman,
+Peter Scott,
+Phillipe Bruhat,
+Ian Langworth,
+John Beppu,
+Gavin Estey,
+Jim Brandt,
+Ask Bjoern Hansen,
+Greg Davies,
+Ed Silva,
+Mark-Jason Dominus,
+Autrijus Tang,
+Mark Fowler,
+Stuart Children,
+Max Maischein,
+Meng Wong,
+Prakash Kailasa,
+Abigail,
+Jan Pazdziora,
+Dominique Quatravaux,
+Scott Lanning,
+Rob Casey,
+Leland Johnson,
+Joshua Gatcomb,
+Julien Beasley,
+Abe Timmerman,
+Peter Stevens,
+Pete Krawczyk,
+Tad McClellan,
+and the late great Iain Truskett.
+
+=head1 COPYRIGHT
+
+Copyright (c) 2005-2010 Andy Lester. All rights reserved. This program is
+free software; you can redistribute it and/or modify it under the same
+terms as Perl itself.
+
+=cut
--- /dev/null
+=head1 NAME
+
+WWW::Mechanize::Cookbook - Recipes for using WWW::Mechanize
+
+=head1 INTRODUCTION
+
+First, please note that many of these are possible just using
+L<LWP::UserAgent>. Since C<WWW::Mechanize> is a subclass of
+L<LWP::UserAgent>, whatever works on C<LWP::UserAgent> should work
+on C<WWW::Mechanize>. See the L<lwpcook> man page included with
+the L<libwww-perl> distribution.
+
+=head1 BASICS
+
+=head2 Launch the WWW::Mechanize browser
+
+ use WWW::Mechanize;
+
+ my $mech = WWW::Mechanize->new( autocheck => 1 );
+
+The C<< autocheck => 1 >> tells Mechanize to die if any IO fails,
+so you don't have to manually check. It's easier that way. If you
+want to do your own error checking, leave it out.
+
+=head2 Fetch a page
+
+ $mech->get( "http://search.cpan.org" );
+ print $mech->content;
+
+C<< $mech->content >> contains the raw HTML from the web page. It
+is not parsed or handled in any way, at least through the C<content>
+method.
+
+=head2 Fetch a page into a file
+
+Sometimes you want to dump your results directly into a file. For
+example, there's no reason to read a JPEG into memory if you're
+only going to write it out immediately. This can also help with
+memory issues on large files.
+
+ $mech->get( "http://www.cpan.org/src/stable.tar.gz",
+ ":content_file" => "stable.tar.gz" );
+
+=head2 Fetch a password-protected page
+
+Generally, just call C<credentials> before fetching the page.
+
+ $mech->credentials( 'admin' => 'password' );
+ $mech->get( 'http://10.11.12.13/password.html' );
+ print $mech->content();
+
+=head1 LINKS
+
+=head2 Find all image links
+
+Find all links that point to a JPEG, GIF or PNG.
+
+ my @links = $mech->find_all_links(
+ tag => "a", url_regex => qr/\.(jpe?g|gif|png)$/i );
+
+=head2 Find all download links
+
+Find all links that have the word "download" in them.
+
+ my @links = $mech->find_all_links(
+ tag => "a", text_regex => qr/\bdownload\b/i );
+
+=head1 APPLICATIONS
+
+=head2 Check all pages on a web site
+
+Use Abe Timmerman's L<WWW::CheckSite>
+L<http://search.cpan.org/dist/WWW-CheckSite/>
+
+=head1 SEE ALSO
+
+L<WWW::Mechanize>
+
+=head1 AUTHORS
+
+Copyright 2005-2010 Andy Lester C<< <andy@petdance.com> >>
+
+Later contributions by Peter Scott, Mark Stosberg and others. See
+Acknowledgements section in L<WWW::Mechanize> for more.
+
+=cut
--- /dev/null
+=head1 NAME
+
+WWW::Mechanize::Examples - Sample programs that use WWW::Mechanize
+
+=head1 SYNOPSIS
+
+Plenty of people have learned WWW::Mechanize, and now, you can too!
+
+Following are user-supplied samples of WWW::Mechanize in action.
+If you have samples you'd like to contribute, please send 'em to
+C<< <andy@petdance.com> >>.
+
+You can also look at the F<t/*.t> files in the distribution.
+
+Please note that these examples are not intended to do any specific task.
+For all I know, they're no longer functional because the sites they
+hit have changed. They're here to give examples of how people have
+used WWW::Mechanize.
+
+Note that the examples are in reverse order of my having received them,
+so the freshest examples are always at the top.
+
+=head2 Starbucks Density Calculator, by Nat Torkington
+
+Here's a pair of programs from Nat Torkington, editor for O'Reilly Media
+and co-author of the I<Perl Cookbook>.
+
+=over 4
+
+Rael [Dornfest] discovered that you can easily find out how many Starbucks
+there are in an area by searching for "Starbucks". So I wrote a silly
+scraper for some old census data and came up with some Starbucks density
+figures. There's no meaning to these numbers thanks to errors from using
+old census data coupled with false positives in Yahoo search (e.g.,
+"Dodie Starbuck-Your Style Desgn" in Portland OR). But it was fun to
+waste a night on.
+
+Here are the top twenty cities in descending order of population,
+with the amount of territory each Starbucks has. E.g., A New York NY
+Starbucks covers 1.7 square miles of ground.
+
+ New York, NY 1.7
+ Los Angeles, CA 1.2
+ Chicago, IL 1.0
+ Houston, TX 4.6
+ Philadelphia, PA 6.8
+ San Diego, CA 2.7
+ Detroit, MI 19.9
+ Dallas, TX 2.7
+ Phoenix, AZ 4.1
+ San Antonio, TX 12.3
+ San Jose, CA 1.1
+ Baltimore, MD 3.9
+ Indianapolis, IN 12.1
+ San Francisco, CA 0.5
+ Jacksonville, FL 39.9
+ Columbus, OH 7.3
+ Milwaukee, WI 5.1
+ Memphis, TN 15.1
+ Washington, DC 1.4
+ Boston, MA 0.5
+
+=back
+
+C<get_pop_data>
+
+ #!/usr/bin/perl -w
+
+ use WWW::Mechanize;
+ use Storable;
+
+ $url = 'http://www.census.gov/population/www/documentation/twps0027.html';
+ $m = WWW::Mechanize->new();
+ $m->get($url);
+
+ $c = $m->content;
+
+ $c =~ m{<A NAME=.tabA.>(.*?)</TABLE>}s
+ or die "Can't find the population table\n";
+ $t = $1;
+ @outer = $t =~ m{<TR.*?>(.*?)</TR>}gs;
+ shift @outer;
+ foreach $r (@outer) {
+ @bits = $r =~ m{<TD.*?>(.*?)</TD>}gs;
+ for ($x = 0; $x < @bits; $x++) {
+ $b = $bits[$x];
+ @v = split /\s*<BR>\s*/, $b;
+ foreach (@v) { s/^\s+//; s/\s+$// }
+ push @{$data[$x]}, @v;
+ }
+ }
+
+ for ($y = 0; $y < @{$data[0]}; $y++) {
+ $data{$data[1][$y]} = {
+ NAME => $data[1][$y],
+ RANK => $data[0][$y],
+ POP => comma_free($data[2][$y]),
+ AREA => comma_free($data[3][$y]),
+ DENS => comma_free($data[4][$y]),
+ };
+ }
+
+ store(\%data, "cities.dat");
+
+ sub comma_free {
+ my $n = shift;
+ $n =~ s/,//;
+ return $n;
+ }
+
+
+C<plague_of_coffee>
+
+ #!/usr/bin/perl -w
+
+ use WWW::Mechanize;
+ use strict;
+ use Storable;
+
+ $SIG{__WARN__} = sub {} ; # ssssssh
+
+ my $Cities = retrieve("cities.dat");
+
+ my $m = WWW::Mechanize->new();
+ $m->get("http://local.yahoo.com/");
+
+ my @cities = sort { $Cities->{$a}{RANK} <=> $Cities->{$b}{RANK} } keys %$Cities;
+ foreach my $c ( @cities ) {
+ my $fields = {
+ 'stx' => "starbucks",
+ 'csz' => $c,
+ };
+
+ my $r = $m->submit_form(form_number => 2,
+ fields => $fields);
+ die "Couldn't submit form" unless $r->is_success;
+
+ my $hits = number_of_hits($r);
+ # my $ppl = sprintf("%d", 1000 * $Cities->{$c}{POP} / $hits);
+ # print "$c has $hits Starbucks. That's one for every $ppl people.\n";
+ my $density = sprintf("%.1f", $Cities->{$c}{AREA} / $hits);
+ print "$c : $density\n";
+ }
+
+ sub number_of_hits {
+ my $r = shift;
+ my $c = $r->content;
+ if ($c =~ m{\d+ out of <b>(\d+)</b> total results for}) {
+ return $1;
+ }
+ if ($c =~ m{Sorry, no .*? found in or near}) {
+ return 0;
+ }
+ if ($c =~ m{Your search matched multiple cities}) {
+ warn "Your search matched multiple cities\n";
+ return 0;
+ }
+ if ($c =~ m{Sorry we couldn.t find that location}) {
+ warn "No cities\n";
+ return 0;
+ }
+ if ($c =~ m{Could not find.*?, showing results for}) {
+ warn "No matches\n";
+ return 0;
+ }
+ die "Unknown response\n$c\n";
+ }
+
+
+
+=head2 pb-upload, by John Beppu
+
+This program takes filenames of images from the command line and
+uploads them to a www.photobucket.com folder. John Beppu, the author, says:
+
+=over 4
+
+I had 92 pictures I wanted to upload, and doing it through a browser
+would've been torture. But thanks to mech, all I had to do was
+`./pb.upload *.jpg` and watch it do its thing. It felt good.
+If I had more time, I'd implement WWW::Photobucket on top of
+WWW::Mechanize.
+
+=back
+
+ #!/usr/bin/perl -w -T
+
+ use strict;
+ use WWW::Mechanize;
+
+ my $login = "login_name";
+ my $password = "password";
+ my $folder = "folder";
+
+ my $url = "http://img78.photobucket.com/albums/v281/$login/$folder/";
+
+ # login to your photobucket.com account
+ my $mech = WWW::Mechanize->new();
+ $mech->get($url);
+ $mech->submit_form(
+ form_number => 1,
+ fields => { password => $password },
+ );
+ die unless ($mech->success);
+
+ # upload image files specified on command line
+ foreach (@ARGV) {
+ print "$_\n";
+ $mech->form_number(2);
+ $mech->field('the_file[]' => $_);
+ $mech->submit();
+ }
+
+=head2 listmod, by Ian Langworth
+
+Ian Langworth contributes this little gem that will bring joy to
+beleagured mailing list admins. It discards spam messages through
+mailman's web interface.
+
+
+ #!/arch/unix/bin/perl
+ use strict;
+ use warnings;
+ #
+ # listmod - fast alternative to mailman list interface
+ #
+ # usage: listmod crew XXXXXXXX
+ #
+
+ die "usage: $0 <listname> <password>\n" unless @ARGV == 2;
+ my ($listname, $password) = @ARGV;
+
+ use CGI qw(unescape);
+
+ use WWW::Mechanize;
+ my $m = WWW::Mechanize->new( autocheck => 1 );
+
+ use Term::ReadLine;
+ my $term = Term::ReadLine->new($0);
+
+ # submit the form, get the cookie, go to the list admin page
+ $m->get("https://lists.ccs.neu.edu/bin/admindb/$listname");
+ $m->set_visible( $password );
+ $m->click;
+
+ # exit if nothing to do
+ print "There are no pending requests.\n" and exit
+ if $m->content =~ /There are no pending requests/;
+
+ # select the first form and examine its contents
+ $m->form_number(1);
+ my $f = $m->current_form or die "Couldn't get first form!\n";
+
+ # get me the base form element for each email item
+ my @items = map {m/^.+?-(.+)/} grep {m/senderbanp/} $f->param
+ or die "Couldn't get items in first form!\n";
+
+ # iterate through items, prompt user, commit actions
+ foreach my $item (@items) {
+
+ # show item info
+ my $sender = unescape($item);
+ my ($subject) = [$f->find_input("senderbanp-$item")->value_names]->[1]
+ =~ /Subject:\s+(.+?)\s+Size:/g;
+
+ # prompt user
+ my $choice = '';
+ while ( $choice !~ /^[DAX]$/ ) {
+ print "$sender\: '$subject'\n";
+ $choice = uc $term->readline("Action: defer/accept/discard [dax]: ");
+ print "\n\n";
+ }
+
+ # set button
+ $m->field("senderaction-$item" => {D=>0,A=>1,X=>3}->{$choice});
+ }
+
+ # submit actions
+ $m->click;
+
+=head2 ccdl, by Andy Lester
+
+Steve McConnell, author of the landmark I<Code Complete> has put
+up the chapters for the 2nd edition in PDF format on his website.
+I needed to download them to take to Kinko's to have printed. This
+little program did it for me.
+
+
+ #!/usr/bin/perl -w
+
+ use strict;
+ use WWW::Mechanize;
+
+ my $start = "http://www.stevemcconnell.com/cc2/cc.htm";
+
+ my $mech = WWW::Mechanize->new( autocheck => 1 );
+ $mech->get( $start );
+
+ my @links = $mech->find_all_links( url_regex => qr/\d+.+\.pdf$/ );
+
+ for my $link ( @links ) {
+ my $url = $link->url_abs;
+ my $filename = $url;
+ $filename =~ s[^.+/][];
+
+ print "Fetching $url";
+ $mech->get( $url, ':content_file' => $filename );
+
+ print " ", -s $filename, " bytes\n";
+ }
+
+=head2 quotes.pl, by Andy Lester
+
+This was a program that was going to get a hack in I<Spidering Hacks>,
+but got cut at the last minute, probably because it's against IMDB's TOS
+to scrape from it. I present it here as an example, not a suggestion
+that you break their TOS.
+
+Last I checked, it didn't work because their HTML didn't match, but it's
+still good as sample code.
+
+ #!/usr/bin/perl -w
+
+ use strict;
+
+ use WWW::Mechanize;
+ use Getopt::Long;
+ use Text::Wrap;
+
+ my $match = undef;
+ my $random = undef;
+ GetOptions(
+ "match=s" => \$match,
+ "random" => \$random,
+ ) or exit 1;
+
+ my $movie = shift @ARGV or die "Must specify a movie\n";
+
+ my $quotes_page = get_quotes_page( $movie );
+ my @quotes = extract_quotes( $quotes_page );
+
+ if ( $match ) {
+ $match = quotemeta($match);
+ @quotes = grep /$match/i, @quotes;
+ }
+
+ if ( $random ) {
+ print $quotes[rand @quotes];
+ }
+ else {
+ print join( "\n", @quotes );
+ }
+
+
+ sub get_quotes_page {
+ my $movie = shift;
+
+ my $mech = WWW::Mechanize->new;
+ $mech->get( "http://www.imdb.com/search" );
+ $mech->success or die "Can't get the search page";
+
+ $mech->submit_form(
+ form_number => 2,
+ fields => {
+ title => $movie,
+ restrict => "Movies only",
+ },
+ );
+
+ my @links = $mech->find_all_links( url_regex => qr[^/Title] )
+ or die "No matches for \"$movie\" were found.\n";
+
+ # Use the first link
+ my ( $url, $title ) = @{$links[0]};
+
+ warn "Checking $title...\n";
+
+ $mech->get( $url );
+ my $link = $mech->find_link( text_regex => qr/Memorable Quotes/i )
+ or die qq{"$title" has no quotes in IMDB!\n};
+
+ warn "Fetching quotes...\n\n";
+ $mech->get( $link->[0] );
+
+ return $mech->content;
+ }
+
+
+ sub extract_quotes {
+ my $page = shift;
+
+ # Nibble away at the unwanted HTML at the beginnning...
+ $page =~ s/.+Memorable Quotes//si;
+ $page =~ s/.+?(<a name)/$1/si;
+
+ # ... and the end of the page
+ $page =~ s/Browse titles in the movie quotes.+$//si;
+ $page =~ s/<p.+$//g;
+
+ # Quotes separated by an <HR> tag
+ my @quotes = split( /<hr.+?>/, $page );
+
+ for my $quote ( @quotes ) {
+ my @lines = split( /<br>/, $quote );
+ for ( @lines ) {
+ s/<[^>]+>//g; # Strip HTML tags
+ s/\s+/ /g; # Squash whitespace
+ s/^ //; # Strip leading space
+ s/ $//; # Strip trailing space
+ s/"/"/g; # Replace HTML entity quotes
+
+ # Word-wrap to fit in 72 columns
+ $Text::Wrap::columns = 72;
+ $_ = wrap( '', ' ', $_ );
+ }
+ $quote = join( "\n", @lines );
+ }
+
+ return @quotes;
+ }
+
+=head2 cpansearch.pl, by Ed Silva
+
+A quick little utility to search the CPAN and fire up a browser
+with a results page.
+
+ #!/usr/bin/perl
+
+ # turn on perl's safety features
+ use strict;
+ use warnings;
+
+ # work out the name of the module we're looking for
+ my $module_name = $ARGV[0]
+ or die "Must specify module name on command line";
+
+ # create a new browser
+ use WWW::Mechanize;
+ my $browser = WWW::Mechanize->new();
+
+ # tell it to get the main page
+ $browser->get("http://search.cpan.org/");
+
+ # okay, fill in the box with the name of the
+ # module we want to look up
+ $browser->form_number(1);
+ $browser->field("query", $module_name);
+ $browser->click();
+
+ # click on the link that matches the module name
+ $browser->follow_link( text_regex => $module_name );
+
+ my $url = $browser->uri;
+
+ # launch a browser...
+ system('galeon', $url);
+
+ exit(0);
+
+
+=head2 lj_friends.cgi, by Matt Cashner
+
+ #!/usr/bin/perl
+
+ # Provides an rss feed of a paid user's LiveJournal friends list
+ # Full entries, protected entries, etc.
+ # Add to your favorite rss reader as
+ # http://your.site.com/cgi-bin/lj_friends.cgi?user=USER&password=PASSWORD
+
+ use warnings;
+ use strict;
+
+ use WWW::Mechanize;
+ use CGI;
+
+ my $cgi = CGI->new();
+ my $form = $cgi->Vars;
+
+ my $agent = WWW::Mechanize->new();
+
+ $agent->get('http://www.livejournal.com/login.bml');
+ $agent->form_number('3');
+ $agent->field('user',$form->{user});
+ $agent->field('password',$form->{password});
+ $agent->submit();
+ $agent->get('http://www.livejournal.com/customview.cgi?user='.$form->{user}.'&styleid=225596&checkcookies=1');
+ print "Content-type: text/plain\n\n";
+ print $agent->content();
+
+=head2 Hacking Movable Type, by Dan Rinzel
+
+ use strict;
+ use WWW::Mechanize;
+
+ # a tool to automatically post entries to a moveable type weblog, and set arbitrary creation dates
+
+ my $mech = WWW::Mechanize->new();
+ my $entry;
+ $entry->{title} = "Test AutoEntry Title";
+ $entry->{btext} = "Test AutoEntry Body";
+ $entry->{date} = '2002-04-15 14:18:00';
+ my $start = qq|http://my.blog.site/mt.cgi|;
+
+ $mech->get($start);
+ $mech->field('username','und3f1n3d');
+ $mech->field('password','obscur3d');
+ $mech->submit(); # to get login cookie
+ $mech->get(qq|$start?__mode=view&_type=entry&blog_id=1|);
+ $mech->form_name('entry_form');
+ $mech->field('title',$entry->{title});
+ $mech->field('category_id',1); # adjust as needed
+ $mech->field('text',$entry->{btext});
+ $mech->field('status',2); # publish, or 1 = draft
+ $results = $mech->submit();
+
+ # if we're ok with this entry being datestamped "NOW" (no {date} in %entry)
+ # we're done. Otherwise, time to be tricksy
+ # MT returns a 302 redirect from this form. the redirect itself contains a <body onload=""> handler
+ # which takes the user to an editable version of the form where the create date can be edited
+ # MT date format of YYYY-MM-DD HH:MI:SS is the only one that won't error out
+
+ if ($entry->{date} && $entry->{date} =~ /^\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2}/) {
+ # travel the redirect
+ $results = $mech->get($results->{_headers}->{location});
+ $results->{_content} =~ /<body onLoad="([^\"]+)"/is;
+ my $js = $1;
+ $js =~ /\'([^']+)\'/;
+ $results = $mech->get($start.$1);
+ $mech->form_name('entry_form');
+ $mech->field('created_on_manual',$entry->{date});
+ $mech->submit();
+ }
+
+=head2 get-despair, by Randal Schwartz
+
+Randal submitted this bot that walks the despair.com site sucking down
+all the pictures.
+
+ use strict;
+ $|++;
+
+ use WWW::Mechanize;
+ use File::Basename;
+
+ my $m = WWW::Mechanize->new;
+
+ $m->get("http://www.despair.com/indem.html");
+
+ my @top_links = @{$m->links};
+
+ for my $top_link_num (0..$#top_links) {
+ next unless $top_links[$top_link_num][0] =~ /^http:/;
+
+ $m->follow_link( n=>$top_link_num ) or die "can't follow $top_link_num";
+
+ print $m->uri, "\n";
+ for my $image (grep m{^http://store4}, map $_->[0], @{$m->links}) {
+ my $local = basename $image;
+ print " $image...", $m->mirror($image, $local)->message, "\n"
+ }
+
+ $m->back or die "can't go back";
+ }
--- /dev/null
+=head1 NAME
+
+WWW::Mechanize::FAQ - Frequently Asked Questions about WWW::Mechanize
+
+=head1 How to get help with WWW::Mechanize
+
+If your question isn't answered here in the FAQ, please turn to the
+communities at:
+
+=over
+
+=item * L<http://perlmonks.org>
+
+=item * The libwww-perl mailing list at L<http://lists.perl.org>
+
+=back
+
+=head1 JavaScript
+
+=head2 I have this web page that has JavaScript on it, and my Mech program doesn't work.
+
+That's because WWW::Mechanize doesn't operate on the JavaScript. It only
+understands the HTML parts of the page.
+
+=head2 I thought Mech was supposed to work like a web browser.
+
+It does pretty much, but it doesn't support JavaScript.
+
+I added some basic attempts at picking up URLs in C<window.open()>
+calls and return them in C<< $mech->links >>. They work sometimes.
+
+Since Javascript is completely visible to the client, it cannot be used
+to prevent a scraper from following links. But it can make life difficult. If
+you want to scrape specific pages, then a solution is always possible.
+
+One typical use of Javascript is to perform argument checking before
+posting to the server. The URL you want is probably just buried in the
+Javascript function. Do a regular expression match on
+C<< $mech->content() >>
+to find the link that you want and C<< $mech->get >> it directly (this
+assumes that you know what you are looking for in advance).
+
+In more difficult cases, the Javascript is used for URL mangling to
+satisfy the needs of some middleware. In this case you need to figure
+out what the Javascript is doing (why are these URLs always really
+long?). There is probably some function with one or more arguments which
+calculates the new URL. Step one: using your favorite browser, get the
+before and after URLs and save them to files. Edit each file, converting
+the the argument separators ('?', '&' or ';') into newlines. Now it is
+easy to use diff or comm to find out what Javascript did to the URL.
+Step 2 - find the function call which created the URL - you will need
+to parse and interpret its argument list. The Javascript Debugger in the
+Firebug extension for Firefox helps with the analysis. At this point, it is
+fairly trivial to write your own function which emulates the Javascript
+for the pages you want to process.
+
+Here's annother approach that answers the question, "It works in Firefox,
+but why not Mech?" Everything the web server knows about the client is
+present in the HTTP request. If two requests are identical, the results
+should be identical. So the real question is "What is different between
+the mech request and the Firefox request?"
+
+The Firefox extension "Tamper Data" is an effective tool for examining
+the headers of the requests to the server. Compare that with what LWP
+is sending. Once the two are identical, the action of the server should
+be the same as well.
+
+I say "should", because this is an oversimplification - some values
+are naturally unique, e.g. a SessionID, but if a SessionID is present,
+that is probably sufficient, even though the value will be different
+between the LWP request and the Firefox request. The server could use
+the session to store information which is troublesome, but that's not
+the first place to look (and highly unlikely to be relevant when you
+are requesting the login page of your site).
+
+Generally the problem is to be found in missing or incorrect POSTDATA
+arguments, Cookies, User-Agents, Accepts, etc. If you are using mech,
+then redirects and cookies should not be a problem, but are listed here
+for completeness. If you are missing headers, C<< $mech->add_header >>
+can be used to add the headers that you need.
+
+=head2 Which modules work like Mechanize and have JavaScript support?
+
+In no particular order: L<Gtk2::WebKit::Mechanize>, L<Win32::IE::Mechanize>,
+L<WWW::Mechanize::Firefox>, L<WWW::Scripter>, L<WWW::Selenium>
+
+=head1 How do I do X?
+
+=head2 Can I do [such-and-such] with WWW::Mechanize?
+
+If it's possible with LWP::UserAgent, then yes. WWW::Mechanize is
+a subclass of L<LWP::UserAgent>, so all the wondrous magic of that
+class is inherited.
+
+=head2 How do I use WWW::Mechanize through a proxy server?
+
+See the docs in L<LWP::UserAgent> on how to use the proxy. Short version:
+
+ $mech->proxy(['http', 'ftp'], 'http://proxy.example.com:8000/');
+
+or get the specs from the environment:
+
+ $mech->env_proxy();
+
+ # Environment set like so:
+ gopher_proxy=http://proxy.my.place/
+ wais_proxy=http://proxy.my.place/
+ no_proxy="localhost,my.domain"
+ export gopher_proxy wais_proxy no_proxy
+
+=head2 How can I see what fields are on the forms?
+
+Use the mech-dump utility, optionally installed with Mechanize.
+
+ $ mech-dump --forms http://search.cpan.org
+ Dumping forms
+ GET http://search.cpan.org/search
+ query=
+ mode=all (option) [*all|module|dist|author]
+ <NONAME>=CPAN Search (submit)
+
+=head2 How do I get Mech to handle authentication?
+
+ use MIME::Base64;
+
+ my $agent = WWW::Mechanize->new();
+ my @args = (
+ Authorization => "Basic " .
+ MIME::Base64::encode( USER . ':' . PASS )
+ );
+
+ $agent->credentials( ADDRESS, REALM, USER, PASS );
+ $agent->get( URL, @args );
+
+If you want to use the credentials for all future requests, you can
+also use the L<LWP::UserAgent> C<default_header()> method instead
+of the extra arguments to C<get()>
+
+ $mech->default_header(
+ Authorization => 'Basic ' . encode_base64( USER . ':' . PASSWORD ) );
+
+=head2 How can I get WWW::Mechanize to execute this JavaScript?
+
+You can't. JavaScript is entirely client-based, and WWW::Mechanize
+is a client that doesn't understand JavaScript. See the top part
+of this FAQ.
+
+=head2 How do I check a checkbox that doesn't have a value defined?
+
+Set it to to the value of "on".
+
+ $mech->field( my_checkbox => 'on' );
+
+=head2 How do I handle frames?
+
+You don't deal with them as frames, per se, but as links. Extract
+them with
+
+ my @frame_links = $mech->find_link( tag => "frame" );
+
+=head2 How do I get a list of HTTP headers and their values?
+
+All L<HTTP::Headers> methods work on a L<HTTP::Response> object which is
+returned by the I<get()>, I<reload()>, I<response()/res()>, I<click()>,
+I<submit_form()>, and I<request()> methods.
+
+ my $mech = WWW::Mechanize->new( autocheck => 1 );
+ $mech->get( 'http://my.site.com' );
+ my $res = $mech->response();
+ for my $key ( $response->header_field_names() ) {
+ print $key, " : ", $response->header( $key ), "\n";
+ }
+
+=head2 How do I enable keep-alive?
+
+Since L<WWW::Mechanize> is a subclass of L<LWP::UserAgent>, you can
+use the same mechanism to enable keep-alive:
+
+ use LWP::ConnCache;
+ ...
+ $mech->conn_cache(LWP::ConnCache->new);
+
+=head2 How can I change/specify the action parameter of an HTML form?
+
+You can access the action of the form by utilizing the L<HTML::Form>
+object returned from one of the specifying form methods.
+
+Using C<< $mech->form_number($number) >>:
+
+ my $mech = WWW::mechanize->new;
+ $mech->get('http://someurlhere.com');
+ # Access the form using its Zero-Based Index by DOM order
+ $mech->form_number(0)->action('http://newAction'); #ABS URL
+
+Using C<< $mech->form_name($number) >>:
+
+ my $mech = WWW::mechanize->new;
+ $mech->get('http://someurlhere.com');
+ #Access the form using its Zero-Based Index by DOM order
+ $mech->form_name('trgForm')->action('http://newAction'); #ABS URL
+
+=head2 How do I save an image? How do I save a large tarball?
+
+An image is just content. You get the image and save it.
+
+ $mech->get( 'photo.jpg' );
+ $mech->save_content( '/path/to/my/directory/photo.jpg' );
+
+You can also save any content directly to disk using the C<:content_file>
+flag to C<get()>, which is part of L<LWP::UserAgent>.
+
+ $mech->get( 'http://www.cpan.org/src/stable.tar.gz',
+ ':content_file' => 'stable.tar.gz' );
+
+=head2 How do I pick a specific value from a C<< <select> >> list?
+
+Find the C<HTML::Form::ListInput> in the page.
+
+ my ($listbox) = $mech->find_all_inputs( name => 'listbox' );
+
+Then create a hash for the lookup:
+
+ my %name_lookup;
+ @name_lookup{ $listbox->value_names } = $listbox->possible_values;
+ my $value = $name_lookup{ 'Name I want' };
+
+If you have duplicate names, this method won't work, and you'll
+have to loop over C<< $listbox->value_names >> and
+C<< $listbox->possible_values >> in parallel until you find a
+matching name.
+
+=head2 How do I get Mech to not follow redirects?
+
+You use functionality in LWP::UserAgent, not Mech itself.
+
+ $mech->requests_redirectable( [] );
+
+Or you can set C<max_redirect>:
+
+ $mech->max_redirect( 0 );
+
+Both these options can also be set in the constructor. Mech doesn't
+understand them, so will pass them through to the LWP::UserAgent
+constructor.
+
+
+=head1 Why doesn't this work: Debugging your Mechanize program
+
+=head2 My Mech program doesn't work, but it works in the browser.
+
+Mechanize acts like a browser, but apparently something you're doing
+is not matching the browser's behavior. Maybe it's expecting a
+certain web client, or maybe you've not handling a field properly.
+For some reason, your Mech problem isn't doing exactly what the
+browser is doing, and when you find that, you'll have the answer.
+
+=head2 My Mech program gets these 500 errors.
+
+A 500 error from the web server says that the program on the server
+side died. Probably the web server program was expecting certain
+inputs that you didn't supply, and instead of handling it nicely,
+the program died.
+
+Whatever the cause of the 500 error, if it works in the browser,
+but not in your Mech program, you're not acting like the browser.
+See the previous question.
+
+=head2 Why doesn't my program handle this form correctly?
+
+Run F<mech-dump> on your page and see what it says.
+
+F<mech-dump> is a marvelous diagnostic tool for figuring out what forms
+and fields are on the page. Say you're scraping CNN.com, you'd get this:
+
+ $ mech-dump http://www.cnn.com/
+ GET http://search.cnn.com/cnn/search
+ source=cnn (hidden readonly)
+ invocationType=search/top (hidden readonly)
+ sites=web (radio) [*web/The Web ??|cnn/CNN.com ??]
+ query= (text)
+ <NONAME>=Search (submit)
+
+ POST http://cgi.money.cnn.com/servlets/quote_redirect
+ query= (text)
+ <NONAME>=GET (submit)
+
+ POST http://polls.cnn.com/poll
+ poll_id=2112 (hidden readonly)
+ question_1=<UNDEF> (radio) [1/Simplistic option|2/VIEW RESULTS]
+ <NONAME>=VOTE (submit)
+
+ GET http://search.cnn.com/cnn/search
+ source=cnn (hidden readonly)
+ invocationType=search/bottom (hidden readonly)
+ sites=web (radio) [*web/??CNN.com|cnn/??]
+ query= (text)
+ <NONAME>=Search (submit)
+
+Four forms, including the first one duplicated at the end. All the
+fields, all their defaults, lovingly generated by HTML::Form's C<dump>
+method.
+
+If you want to run F<mech-dump> on something that doesn't lend itself
+to a quick URL fetch, then use the C<save_content()> method to write
+the HTML to a file, and run F<mech-dump> on the file.
+
+=head2 Why don't https:// URLs work?
+
+You need either L<IO::Socket::SSL> or L<Crypt::SSLeay> installed.
+
+=head2 Why do I get "Input 'fieldname' is readonly"?
+
+You're trying to change the value of a hidden field and you have
+warnings on.
+
+First, make sure that you actually mean to change the field that you're
+changing, and that you don't have a typo. Usually, hidden variables are
+set by the site you're working on for a reason. If you change the value,
+you might be breaking some functionality by faking it out.
+
+If you really do want to change a hidden value, make the changes in a
+scope that has warnings turned off:
+
+ {
+ local $^W = 0;
+ $agent->field( name => $value );
+ }
+
+=head2 I tried to [such-and-such] and I got this weird error.
+
+Are you checking your errors?
+
+Are you sure?
+
+Are you checking that your action succeeded after every action?
+
+Are you sure?
+
+For example, if you try this:
+
+ $mech->get( "http://my.site.com" );
+ $mech->follow_link( "foo" );
+
+and the C<get> call fails for some reason, then the Mech internals
+will be unusable for the C<follow_link> and you'll get a weird
+error. You B<must>, after every action that GETs or POSTs a page,
+check that Mech succeeded, or all bets are off.
+
+ $mech->get( "http://my.site.com" );
+ die "Can't even get the home page: ", $mech->response->status_line
+ unless $mech->success;
+
+ $mech->follow_link( "foo" );
+ die "Foo link failed: ", $mech->response->status_line
+ unless $mech->success;
+
+=head2 How do I figure out why C<< $mech->get($url) >> doesn't work?
+
+There are many reasons why a C<< get() >> can fail. The server can take
+you to someplace you didn't expect. It can generate redirects which are
+not properly handled. You can get time-outs. Servers are down more often
+than you think! etc, etc, etc. A couple of places to start:
+
+=over 4
+
+=item 1 Check C<< $mech->status() >> after each call
+
+=item 2 Check the URL with C<< $mech->uri() >> to see where you ended up
+
+=item 3 Try debugging with C<< LWP::Debug >>.
+
+=back
+
+If things are really strange, turn on debugging with
+C<< use LWP::Debug qw(+); >>
+Just put this in the main program. This causes LWP to print out a trace
+of the HTTP traffic between client and server and can be used to figure
+out what is happening at the protocol level.
+
+It is also useful to set many traps to verify that processing is
+proceeding as expected. A Mech program should always have an "I didn't
+expect to get here" or "I don't recognize the page that I am processing"
+case and bail out.
+
+Since errors can be transient, by the time you notice that the error
+has occurred, it might not be possible to reproduce it manually. So
+for automated processing it is useful to email yourself the following
+information:
+
+=over 4
+
+=item * where processing is taking place
+
+=item * An Error Message
+
+=item * $mech->uri
+
+=item * $mech->content
+
+=back
+
+You can also save the content of the page with C<< $mech->save_content( 'filename.html' ); >>
+
+=head2 I submitted a form, but the server ignored everything! I got an empty form back!
+
+The post is handled by application software. It is common for PHP
+programmers to use the same file both to display a form and to process
+the arguments returned. So the first task of the application programmer
+is to decide whether there are arguments to processes. The program can
+check whether a particular parameter has been set, whether a hidden
+parameter has been set, or whether the submit button has been clicked.
+(There are probably other ways that I haven't thought of).
+
+In any case, if your form is not setting the parameter (e.g. the submit
+button) which the web application is keying on (and as an outsider there
+is no way to know what it is keying on), it will not notice that the form
+has been submitted. Try using C<< $mech->click() >> instead of
+C<< $mech->submit() >> or vice-versa.
+
+=head2 I've logged in to the server, but I get 500 errors when I try to get to protected content.
+
+Some web sites use distributed databases for their processing. It
+can take a few seconds for the login/session information to percolate
+through to all the servers. For human users with their slow reaction
+times, this is not a problem, but a Perl script can outrun the server.
+So try adding a C<sleep(5)> between logging in and actually doing anything
+(the optimal delay must be determined experimentally).
+
+=head2 Mech is a big memory pig! I'm running out of RAM!
+
+Mech keeps a history of every page, and the state it was in. It actually
+keeps a clone of the full Mech object at every step along the way.
+
+You can limit this stack size with the C<stack_depth> parm in the C<new()>
+constructor. If you set stack_size to 0, Mech will not keep any history.
+
+=head1 AUTHOR
+
+Copyright 2005-2009 Andy Lester C<< <andy at petdance.com> >>
+
+=cut
--- /dev/null
+package WWW::Mechanize::Image;
+# vi:et:sw=4 ts=4
+
+use strict;
+use warnings;
+
+=head1 NAME
+
+WWW::Mechanize::Image - Image object for WWW::Mechanize
+
+=head1 SYNOPSIS
+
+Image object to encapsulate all the stuff that Mech needs
+
+=head1 Constructor
+
+=head2 new()
+
+Creates and returns a new C<WWW::Mechanize::Image> object.
+
+ my $image = WWW::Mechanize::Image->new( {
+ url => $url,
+ base => $base,
+ tag => $tag,
+ name => $name, # From the INPUT tag
+ height => $height, # optional
+ width => $width, # optional
+ alt => $alt, # optional
+ } );
+
+=cut
+
+sub new {
+ my $class = shift;
+ my $parms = shift || {};
+
+ my $self = bless {}, $class;
+
+ for my $parm ( qw( url base tag height width alt name ) ) {
+ # Check for what we passed in, not whether it's defined
+ $self->{$parm} = $parms->{$parm} if exists $parms->{$parm};
+ }
+
+ # url and tag are always required
+ for ( qw( url tag ) ) {
+ exists $self->{$_} or die "WWW::Mechanize::Image->new must have a $_ argument";
+ }
+
+ return $self;
+}
+
+=head1 Accessors
+
+=head2 $link->url()
+
+URL from the link
+
+=head2 $link->base()
+
+Base URL to which the links are relative.
+
+=head2 $link->name()
+
+Name for the field from the NAME attribute, if any.
+
+=head2 $link->tag()
+
+Tag name (either "image" or "input")
+
+=head2 $link->height()
+
+Image height
+
+=head2 $link->width()
+
+Image width
+
+=head2 $link->alt()
+
+ALT attribute from the source tag, if any.
+
+=cut
+
+sub url { return ($_[0])->{url}; }
+sub base { return ($_[0])->{base}; }
+sub name { return ($_[0])->{name}; }
+sub tag { return ($_[0])->{tag}; }
+sub height { return ($_[0])->{height}; }
+sub width { return ($_[0])->{width}; }
+sub alt { return ($_[0])->{alt}; }
+
+=head2 $link->URI()
+
+Returns the URL as a L<URI::URL> object.
+
+=cut
+
+sub URI {
+ my $self = shift;
+
+ require URI::URL;
+ my $URI = URI::URL->new( $self->url, $self->base );
+
+ return $URI;
+}
+
+=head2 $link->url_abs()
+
+Returns the URL as an absolute URL string.
+
+=cut
+
+sub url_abs {
+ my $self = shift;
+
+ return $self->URI->abs;
+}
+
+=head1 SEE ALSO
+
+L<WWW::Mechanize> and L<WWW::Mechanize::Link>
+
+=head1 COPYRIGHT & LICENSE
+
+Copyright 2004-2010 Andy Lester.
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+=over 4
+
+=item * the GNU General Public License as published by the Free
+Software Foundation; either version 1, or (at your option) any later
+version, or
+
+=item * the Artistic License version 2.0.
+
+=back
+
+=cut
+
+1;
--- /dev/null
+package WWW::Mechanize::Link;
+
+use strict;
+use warnings;
+
+=head1 NAME
+
+WWW::Mechanize::Link - Link object for WWW::Mechanize
+
+=head1 SYNOPSIS
+
+Link object to encapsulate all the stuff that Mech needs but nobody
+wants to deal with as an array.
+
+=head1 Constructor
+
+=head2 new()
+
+ my $link = WWW::Mechanize::Link->new( {
+ url => $url,
+ text => $text,
+ name => $name,
+ tag => $tag,
+ base => $base,
+ attr => $attr_href,
+ } );
+
+For compatibility, this older interface is also supported:
+
+ new( $url, $text, $name, $tag, $base, $attr_href )
+
+Creates and returns a new C<WWW::Mechanize::Link> object.
+
+=cut
+
+sub new {
+ my $class = shift;
+
+ my $self;
+
+ # The order of the first four must stay as they are for
+ # compatibility with older code.
+ if ( ref $_[0] eq 'HASH' ) {
+ $self = [ @{$_[0]}{ qw( url text name tag base attrs ) } ];
+ }
+ else {
+ $self = [ @_ ];
+ }
+
+ return bless $self, $class;
+}
+
+=head1 Accessors
+
+=head2 $link->url()
+
+URL from the link
+
+=head2 $link->text()
+
+Text of the link
+
+=head2 $link->name()
+
+NAME attribute from the source tag, if any.
+
+=head2 $link->tag()
+
+Tag name (one of: "a", "area", "frame", "iframe" or "meta").
+
+=head2 $link->base()
+
+Base URL to which the links are relative.
+
+=head2 $link->attrs()
+
+Returns hash ref of all the attributes and attribute values in the tag.
+
+=cut
+
+sub url { return ($_[0])->[0]; }
+sub text { return ($_[0])->[1]; }
+sub name { return ($_[0])->[2]; }
+sub tag { return ($_[0])->[3]; }
+sub base { return ($_[0])->[4]; }
+sub attrs { return ($_[0])->[5]; }
+
+=head2 $link->URI()
+
+Returns the URL as a L<URI::URL> object.
+
+=cut
+
+sub URI {
+ my $self = shift;
+
+ require URI::URL;
+ my $URI = URI::URL->new( $self->url, $self->base );
+
+ return $URI;
+}
+
+=head2 $link->url_abs()
+
+Returns a L<URI::URL> object for the absolute form of the string.
+
+=cut
+
+sub url_abs {
+ my $self = shift;
+
+ return $self->URI->abs;
+}
+
+=head1 SEE ALSO
+
+L<WWW::Mechanize> and L<WWW::Mechanize::Image>
+
+=head1 COPYRIGHT & LICENSE
+
+Copyright 2004-2010 Andy Lester.
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of either:
+
+=over 4
+
+=item * the GNU General Public License as published by the Free
+Software Foundation; either version 1, or (at your option) any later
+version, or
+
+=item * the Artistic License version 2.0.
+
+=back
+
+=cut
+
+# vi:et:sw=4 ts=4
+
+1;