6 use vars
qw($VERSION $TRANSLATE_UNDERSCORE);
9 # The $TRANSLATE_UNDERSCORE variable controls whether '_' can be used
10 # as a replacement for '-' in header field names.
11 $TRANSLATE_UNDERSCORE = 1 unless defined $TRANSLATE_UNDERSCORE;
13 # "Good Practice" order of HTTP message headers:
19 my @general_headers = qw(
20 Cache-Control Connection Date Pragma Trailer Transfer-Encoding Upgrade
24 my @request_headers = qw(
25 Accept Accept-Charset Accept-Encoding Accept-Language
26 Authorization Expect From Host
27 If-Match If-Modified-Since If-None-Match If-Range If-Unmodified-Since
28 Max-Forwards Proxy-Authorization Range Referer TE User-Agent
31 my @response_headers = qw(
32 Accept-Ranges Age ETag Location Proxy-Authenticate Retry-After Server
36 my @entity_headers = qw(
37 Allow Content-Encoding Content-Language Content-Length Content-Location
38 Content-MD5 Content-Range Content-Type Expires Last-Modified
41 my %entity_header = map { lc($_) => 1 } @entity_headers;
50 # Make alternative representations of @header_order. This is used
51 # for sorting and case matching.
59 $header_order{$lc} = ++$i;
60 $standard_case{$lc} = $_;
69 my $self = bless {}, $class;
70 $self->header(@_) if @_; # set up initial headers
78 Carp
::croak
('Usage: $h->header($field, ...)') unless @_;
83 my $op = @_ ? ($seen{lc($field)}++ ? 'PUSH' : 'SET') : 'GET';
84 @old = $self->_header($field, shift, $op);
86 return @old if wantarray;
87 return $old[0] if @old <= 1;
101 return $self->_header(@_, 'PUSH_H') if @_ == 2;
103 $self->_header(splice(@_, 0, 2), 'PUSH_H');
110 Carp
::croak
('Usage: $h->init_header($field, $val)') if @_ != 3;
111 shift-
>_header(@_, 'INIT');
117 my($self, @fields) = @_;
120 foreach $field (@fields) {
121 $field =~ tr/_/-/ if $field !~ /^:/ && $TRANSLATE_UNDERSCORE;
122 my $v = delete $self->{lc $field};
123 push(@values, ref($v) eq 'ARRAY' ? @$v : $v) if defined $v;
128 sub remove_content_headers
131 unless (defined(wantarray)) {
132 # fast branch that does not create return object
133 delete @$self{grep $entity_header{$_} || /^content-/, keys %$self};
137 my $c = ref($self)->new;
138 for my $f (grep $entity_header{$_} || /^content-/, keys %$self) {
139 $c->{$f} = delete $self->{$f};
147 my($self, $field, $val, $op) = @_;
149 unless ($field =~ /^:/) {
150 $field =~ tr/_/-/ if $TRANSLATE_UNDERSCORE;
153 unless(defined $standard_case{$field}) {
154 # generate a %standard_case entry for this field
155 $old =~ s/\b(\w)/\u$1/g;
156 $standard_case{$field} = $old;
160 $op ||= defined($val) ? 'SET' : 'GET';
161 if ($op eq 'PUSH_H') {
162 # Like PUSH but where we don't care about the return value
163 if (exists $self->{$field}) {
164 my $h = $self->{$field};
165 if (ref($h) eq 'ARRAY') {
166 push(@$h, ref($val) eq "ARRAY" ? @$val : $val);
169 $self->{$field} = [$h, ref($val) eq "ARRAY" ? @$val : $val]
173 $self->{$field} = $val;
177 my $h = $self->{$field};
178 my @old = ref($h) eq 'ARRAY' ? @$h : (defined($h) ? ($h) : ());
180 unless ($op eq 'GET' || ($op eq 'INIT' && @old)) {
182 my @new = ($op eq 'PUSH') ? @old : ();
183 if (ref($val) ne 'ARRAY') {
189 $self->{$field} = @new > 1 ? \
@new : $new[0];
191 elsif ($op ne 'PUSH') {
192 delete $self->{$field};
199 sub _sorted_field_names
203 ($header_order{$a} || 999) <=> ($header_order{$b} || 999) ||
209 sub header_field_names
{
211 return map $standard_case{$_} || $_, @{ $self->_sorted_field_names },
219 my($self, $sub) = @_;
221 for $key (@{ $self->_sorted_field_names }) {
222 next if substr($key, 0, 1) eq '_';
223 my $vals = $self->{$key};
224 if (ref($vals) eq 'ARRAY') {
227 $sub->($standard_case{$key} || $key, $val);
231 $sub->($standard_case{$key} || $key, $vals);
239 my($self, $endl) = @_;
240 $endl = "\n" unless defined $endl;
243 for my $key (@{ $self->_sorted_field_names }) {
244 next if index($key, '_') == 0;
245 my $vals = $self->{$key};
246 if ( ref($vals) eq 'ARRAY' ) {
247 for my $val (@$vals) {
248 my $field = $standard_case{$key} || $key;
250 if ( index($val, "\n") >= 0 ) {
251 $val = _process_newline
($val, $endl);
253 push @result, $field . ': ' . $val;
257 my $field = $standard_case{$key} || $key;
259 if ( index($vals, "\n") >= 0 ) {
260 $vals = _process_newline
($vals, $endl);
262 push @result, $field . ': ' . $vals;
266 join($endl, @result, '');
269 sub _process_newline
{
272 # must handle header values with embedded newlines with care
273 s/\s+$//; # trailing newlines and space must go
274 s/\n(\x0d?\n)+/\n/g; # no empty lines
275 s/\n([^\040\t])/\n $1/g; # intial space for continuation
276 s/\n/$endl/g; # substitute with requested line ending
282 if (eval { require Storable
; 1 }) {
283 *clone
= \
&Storable
::dclone
;
287 my $clone = HTTP
::Headers-
>new;
288 $self->scan(sub { $clone->push_header(@_);} );
297 my($self, $header, $time) = @_;
298 my($old) = $self->_header($header);
300 $self->_header($header, HTTP
::Date
::time2str
($time));
302 $old =~ s/;.*// if defined($old);
303 HTTP
::Date
::str2time
($old);
307 sub date
{ shift-
>_date_header('Date', @_); }
308 sub expires
{ shift-
>_date_header('Expires', @_); }
309 sub if_modified_since
{ shift-
>_date_header('If-Modified-Since', @_); }
310 sub if_unmodified_since
{ shift-
>_date_header('If-Unmodified-Since', @_); }
311 sub last_modified
{ shift-
>_date_header('Last-Modified', @_); }
313 # This is used as a private LWP extension. The Client-Date header is
314 # added as a timestamp to a response when it has been received.
315 sub client_date
{ shift-
>_date_header('Client-Date', @_); }
317 # The retry_after field is dual format (can also be a expressed as
318 # number of seconds from now), so we don't provide an easy way to
319 # access it until we have know how both these interfaces can be
320 # addressed. One possibility is to return a negative value for
321 # relative seconds and a positive value for epoch based time values.
322 #sub retry_after { shift->_date_header('Retry-After', @_); }
326 my $ct = $self->{'content-type'};
327 $self->{'content-type'} = shift if @_;
328 $ct = $ct->[0] if ref($ct) eq 'ARRAY';
329 return '' unless defined($ct) && length($ct);
330 my @ct = split(/;\s*/, $ct, 2);
335 wantarray ? @ct : $ct[0];
338 sub content_type_charset
{
340 require HTTP
::Headers
::Util
;
341 my $h = $self->{'content-type'};
342 $h = $h->[0] if ref($h);
343 $h = "" unless defined $h;
344 my @v = HTTP
::Headers
::Util
::split_header_words
($h);
346 my($ct, undef, %ct_param) = @{$v[0]};
347 my $charset = $ct_param{charset
};
353 $charset = uc($charset);
354 $charset =~ s/^\s+//; $charset =~ s/\s+\z//;
355 undef($charset) if $charset eq "";
357 return $ct, $charset if wantarray;
360 return undef, undef if wantarray;
364 sub content_is_text
{
366 return $self->content_type =~ m
,^text
/,;
369 sub content_is_html
{
371 return $self->content_type eq 'text/html' || $self->content_is_xhtml;
374 sub content_is_xhtml
{
375 my $ct = shift-
>content_type;
376 return $ct eq "application/xhtml+xml" ||
377 $ct eq "application/vnd.wap.xhtml+xml";
381 my $ct = shift-
>content_type;
382 return 1 if $ct eq "text/xml";
383 return 1 if $ct eq "application/xml";
384 return 1 if $ct =~ /\+xml$/;
390 if (@_ && $_[0] =~ /#/) {
391 # Strip fragment per RFC 2616, section 14.36.
395 $uri->fragment(undef);
402 ($self->_header('Referer', @_))[0];
404 *referrer
= \
&referer
; # on tchrist's request
406 sub title
{ (shift-
>_header('Title', @_))[0] }
407 sub content_encoding
{ (shift-
>_header('Content-Encoding', @_))[0] }
408 sub content_language
{ (shift-
>_header('Content-Language', @_))[0] }
409 sub content_length
{ (shift-
>_header('Content-Length', @_))[0] }
411 sub user_agent
{ (shift-
>_header('User-Agent', @_))[0] }
412 sub server
{ (shift-
>_header('Server', @_))[0] }
414 sub from
{ (shift-
>_header('From', @_))[0] }
415 sub warning
{ (shift-
>_header('Warning', @_))[0] }
417 sub www_authenticate
{ (shift-
>_header('WWW-Authenticate', @_))[0] }
418 sub authorization
{ (shift-
>_header('Authorization', @_))[0] }
420 sub proxy_authenticate
{ (shift-
>_header('Proxy-Authenticate', @_))[0] }
421 sub proxy_authorization
{ (shift-
>_header('Proxy-Authorization', @_))[0] }
423 sub authorization_basic
{ shift-
>_basic_auth("Authorization", @_) }
424 sub proxy_authorization_basic
{ shift-
>_basic_auth("Proxy-Authorization", @_) }
427 require MIME
::Base64
;
428 my($self, $h, $user, $passwd) = @_;
429 my($old) = $self->_header($h);
431 Carp
::croak
("Basic authorization user name can't contain ':'")
433 $passwd = '' unless defined $passwd;
434 $self->_header($h => 'Basic ' .
435 MIME
::Base64
::encode
("$user:$passwd", ''));
437 if (defined $old && $old =~ s/^\s*Basic\s+//) {
438 my $val = MIME
::Base64
::decode
($old);
439 return $val unless wantarray;
440 return split(/:/, $val, 2);
452 HTTP::Headers - Class encapsulating HTTP Message headers
456 require HTTP::Headers;
457 $h = HTTP::Headers->new;
459 $h->header('Content-Type' => 'text/plain'); # set
460 $ct = $h->header('Content-Type'); # get
461 $h->remove_header('Content-Type'); # delete
465 The C<HTTP::Headers> class encapsulates HTTP-style message headers.
466 The headers consist of attribute-value pairs also called fields, which
467 may be repeated, and which are printed in a particular order. The
468 field names are cases insensitive.
470 Instances of this class are usually created as member variables of the
471 C<HTTP::Request> and C<HTTP::Response> classes, internal to the
474 The following methods are available:
478 =item $h = HTTP::Headers->new
480 Constructs a new C<HTTP::Headers> object. You might pass some initial
481 attribute-value pairs as parameters to the constructor. I<E.g.>:
483 $h = HTTP::Headers->new(
484 Date => 'Thu, 03 Feb 1994 00:00:00 GMT',
485 Content_Type => 'text/html; version=3.2',
486 Content_Base => 'http://www.perl.org/');
488 The constructor arguments are passed to the C<header> method which is
493 Returns a copy of this C<HTTP::Headers> object.
495 =item $h->header( $field )
497 =item $h->header( $field => $value )
499 =item $h->header( $f1 => $v1, $f2 => $v2, ... )
501 Get or set the value of one or more header fields. The header field
502 name ($field) is not case sensitive. To make the life easier for perl
503 users who wants to avoid quoting before the => operator, you can use
504 '_' as a replacement for '-' in header names.
506 The header() method accepts multiple ($field => $value) pairs, which
507 means that you can update several fields with a single invocation.
509 The $value argument may be a plain string or a reference to an array
510 of strings for a multi-valued field. If the $value is provided as
511 C<undef> then the field is removed. If the $value is not given, then
512 that header field will remain unchanged.
514 The old value (or values) of the last of the header fields is returned.
515 If no such field exists C<undef> will be returned.
517 A multi-valued field will be returned as separate values in list
518 context and will be concatenated with ", " as separator in scalar
519 context. The HTTP spec (RFC 2616) promise that joining multiple
520 values in this way will not change the semantic of a header field, but
521 in practice there are cases like old-style Netscape cookies (see
522 L<HTTP::Cookies>) where "," is used as part of the syntax of a single
527 $header->header(MIME_Version => '1.0',
528 User_Agent => 'My-Web-Client/0.01');
529 $header->header(Accept => "text/html, text/plain, image/*");
530 $header->header(Accept => [qw(text/html text/plain image/*)]);
531 @accepts = $header->header('Accept'); # get multiple values
532 $accepts = $header->header('Accept'); # get values as a single string
534 =item $h->push_header( $field => $value )
536 =item $h->push_header( $f1 => $v1, $f2 => $v2, ... )
538 Add a new field value for the specified header field. Previous values
539 for the same field are retained.
541 As for the header() method, the field name ($field) is not case
542 sensitive and '_' can be used as a replacement for '-'.
544 The $value argument may be a scalar or a reference to a list of
547 $header->push_header(Accept => 'image/jpeg');
548 $header->push_header(Accept => [map "image/$_", qw(gif png tiff)]);
550 =item $h->init_header( $field => $value )
552 Set the specified header to the given value, but only if no previous
553 value for that field is set.
555 The header field name ($field) is not case sensitive and '_'
556 can be used as a replacement for '-'.
558 The $value argument may be a scalar or a reference to a list of
561 =item $h->remove_header( $field, ... )
563 This function removes the header fields with the specified names.
565 The header field names ($field) are not case sensitive and '_'
566 can be used as a replacement for '-'.
568 The return value is the values of the fields removed. In scalar
569 context the number of fields removed is returned.
571 Note that if you pass in multiple field names then it is generally not
572 possible to tell which of the returned values belonged to which field.
574 =item $h->remove_content_headers
576 This will remove all the header fields used to describe the content of
577 a message. All header field names prefixed with C<Content-> fall
578 into this category, as well as C<Allow>, C<Expires> and
579 C<Last-Modified>. RFC 2616 denotes these fields as I<Entity Header
582 The return value is a new C<HTTP::Headers> object that contains the
583 removed headers only.
587 This will remove all header fields.
589 =item $h->header_field_names
591 Returns the list of distinct names for the fields present in the
592 header. The field names have case as suggested by HTTP spec, and the
593 names are returned in the recommended "Good Practice" order.
595 In scalar context return the number of distinct field names.
597 =item $h->scan( \&process_header_field )
599 Apply a subroutine to each header field in turn. The callback routine
600 is called with two parameters; the name of the field and a single
601 value (a string). If a header field is multi-valued, then the
602 routine is called once for each value. The field name passed to the
603 callback routine has case as suggested by HTTP spec, and the headers
604 will be visited in the recommended "Good Practice" order.
606 Any return values of the callback routine are ignored. The loop can
607 be broken by raising an exception (C<die>), but the caller of scan()
608 would have to trap the exception itself.
612 =item $h->as_string( $eol )
614 Return the header fields as a formatted MIME header. Since it
615 internally uses the C<scan> method to build the string, the result
616 will use case as suggested by HTTP spec, and it will follow
617 recommended "Good Practice" of ordering the header fields. Long header
618 values are not folded.
620 The optional $eol parameter specifies the line ending sequence to
621 use. The default is "\n". Embedded "\n" characters in header field
622 values will be substituted with this line ending sequence.
626 =head1 CONVENIENCE METHODS
628 The most frequently used headers can also be accessed through the
629 following convenience methods. Most of these methods can both be used to read
630 and to set the value of a header. The header value is set if you pass
631 an argument to the method. The old header value is always returned.
632 If the given header did not exist then C<undef> is returned.
634 Methods that deal with dates/times always convert their value to system
635 time (seconds since Jan 1, 1970) and they also expect this kind of
636 value when the header value is set.
642 This header represents the date and time at which the message was
645 $h->date(time); # set current date
649 This header gives the date and time after which the entity should be
652 =item $h->if_modified_since
654 =item $h->if_unmodified_since
656 These header fields are used to make a request conditional. If the requested
657 resource has (or has not) been modified since the time specified in this field,
658 then the server will return a C<304 Not Modified> response instead of
661 =item $h->last_modified
663 This header indicates the date and time at which the resource was last
666 # check if document is more than 1 hour old
667 if (my $last_mod = $h->last_modified) {
668 if ($last_mod < time - 60*60) {
673 =item $h->content_type
675 The Content-Type header field indicates the media type of the message
678 $h->content_type('text/html');
680 The value returned will be converted to lower case, and potential
681 parameters will be chopped off and returned as a separate value if in
682 an array context. If there is no such header field, then the empty
683 string is returned. This makes it safe to do the following:
685 if ($h->content_type eq 'text/html') {
686 # we enter this place even if the real header value happens to
687 # be 'TEXT/HTML; version=3.0'
691 =item $h->content_type_charset
693 Returns the upper-cased charset specified in the Content-Type header. In list
694 context return the lower-cased bare content type followed by the upper-cased
695 charset. Both values will be C<undef> if not specified in the header.
697 =item $h->content_is_text
699 Returns TRUE if the Content-Type header field indicate that the
702 =item $h->content_is_html
704 Returns TRUE if the Content-Type header field indicate that the
705 content is some kind of HTML (including XHTML). This method can't be
706 used to set Content-Type.
708 =item $h->content_is_xhtml
710 Returns TRUE if the Content-Type header field indicate that the
711 content is XHTML. This method can't be used to set Content-Type.
713 =item $h->content_is_xml
715 Returns TRUE if the Content-Type header field indicate that the
716 content is XML. This method can't be used to set Content-Type.
718 =item $h->content_encoding
720 The Content-Encoding header field is used as a modifier to the
721 media type. When present, its value indicates what additional
722 encoding mechanism has been applied to the resource.
724 =item $h->content_length
726 A decimal number indicating the size in bytes of the message content.
728 =item $h->content_language
730 The natural language(s) of the intended audience for the message
731 content. The value is one or more language tags as defined by RFC
732 1766. Eg. "no" for some kind of Norwegian and "en-US" for English the
733 way it is written in the US.
737 The title of the document. In libwww-perl this header will be
738 initialized automatically from the E<lt>TITLE>...E<lt>/TITLE> element
739 of HTML documents. I<This header is no longer part of the HTTP
744 This header field is used in request messages and contains information
745 about the user agent originating the request. I<E.g.>:
747 $h->user_agent('Mozilla/5.0 (compatible; MSIE 7.0; Windows NT 6.0)');
751 The server header field contains information about the software being
752 used by the originating server program handling the request.
756 This header should contain an Internet e-mail address for the human
757 user who controls the requesting user agent. The address should be
758 machine-usable, as defined by RFC822. E.g.:
760 $h->from('King Kong <king@kong.com>');
762 I<This header is no longer part of the HTTP standard.>
766 Used to specify the address (URI) of the document from which the
767 requested resource address was obtained.
769 The "Free On-line Dictionary of Computing" as this to say about the
772 <World-Wide Web> A misspelling of "referrer" which
773 somehow made it into the {HTTP} standard. A given {web
774 page}'s referer (sic) is the {URL} of whatever web page
775 contains the link that the user followed to the current
776 page. Most browsers pass this information as part of a
781 By popular demand C<referrer> exists as an alias for this method so you
782 can avoid this misspelling in your programs and still send the right
785 When setting the referrer, this method removes the fragment from the
786 given URI if it is present, as mandated by RFC2616. Note that
787 the removal does I<not> happen automatically if using the header(),
788 push_header() or init_header() methods to set the referrer.
790 =item $h->www_authenticate
792 This header must be included as part of a C<401 Unauthorized> response.
793 The field value consist of a challenge that indicates the
794 authentication scheme and parameters applicable to the requested URI.
796 =item $h->proxy_authenticate
798 This header must be included in a C<407 Proxy Authentication Required>
801 =item $h->authorization
803 =item $h->proxy_authorization
805 A user agent that wishes to authenticate itself with a server or a
806 proxy, may do so by including these headers.
808 =item $h->authorization_basic
810 This method is used to get or set an authorization header that use the
811 "Basic Authentication Scheme". In array context it will return two
812 values; the user name and the password. In scalar context it will
813 return I<"uname:password"> as a single string value.
815 When used to set the header value, it expects two arguments. I<E.g.>:
817 $h->authorization_basic($uname, $password);
819 The method will croak if the $uname contains a colon ':'.
821 =item $h->proxy_authorization_basic
823 Same as authorization_basic() but will set the "Proxy-Authorization"
828 =head1 NON-CANONICALIZED FIELD NAMES
830 The header field name spelling is normally canonicalized including the
831 '_' to '-' translation. There are some application where this is not
832 appropriate. Prefixing field names with ':' allow you to force a
833 specific spelling. For example if you really want a header field name
834 to show up as C<foo_bar> instead of "Foo-Bar", you might set it like
837 $h->header(":foo_bar" => 1);
839 These field names are returned with the ':' intact for
840 $h->header_field_names and the $h->scan callback, but the colons do
841 not show in $h->as_string.
845 Copyright 1995-2005 Gisle Aas.
847 This library is free software; you can redistribute it and/or
848 modify it under the same terms as Perl itself.