package urlparse; =head1 NAME urls - perl module to deal with parsing and building URL's in accordance with RFC2396. =head1 SYNOPSIS use urlparse qw(&parseuri &builduri); ($scheme, $authority, $path, $query, $fragment) = &parseuri ($given); $url = &builduri ($scheme, $authority, $path, $query, $fragment); =head1 DESCRIPTION There are a lot of "gotcha's" in dealing with parsing and building URL's. There is enough confusion, and missimplemention, that they finally issued an RFC (RFC 2396) that clairifies the procedures. This code just implements that RFC. =head1 AUTHOR John Halleck =head1 COPYRIGHT (C) Copyright 2000 by John Halleck. =cut require 5.0; # Not a prayer with perl 4 use strict; use vars qw ($VERSION @ISA @EXPORT_OK); @ISA = qw (Exporter); @EXPORT_OK = qw(&parseuri &builduri); $VERSION = 1.0; # ============== Deal with URL's. sub parseuri { # parse a uri in the manner recomended by RFC 2396 # (We shamelessly steal the regular expression from that document # to insure we comply with it.) my $given = shift; die ("No URI given") if !defined $given; my ($scheme, $authority, $path, $query, $fragment) = ($given =~ m;^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?$;) [2-1, 4-1, 5-1, 7-1, 9-1] ; # Note shift by one to match perl convention. return ($scheme, $authority, $path, $query, $fragment); } # ------------------ sub builduri { # build a URI from it's components. (RFC 2396 5.2.7) my $scheme = shift; my $authority = shift; my $path = shift; $path = '' if !defined $path; # Really an error, but what to do? my $query = shift; my $fragment = shift; # Code directly transliterated from the RFC. % escaping added to match parsing # restrictions in parseuri. (Which are the official ones) my $result = ''; if (defined $scheme) { $scheme =~ s/:/%3A/g; $scheme =~ s;/;%2F;g; $scheme =~ s/\?/%3F/g; $scheme =~ s/\#/%23/g; $result .= $scheme . ':'; } if (defined $authority) { $authority =~ s:/:%2F:g; $authority =~ s/\?/%3F/g; $authority =~ s/\#/%23/g; $result .= '//' . $authority; } $path =~ s/\?/%3F/g; $path =~ s/\#/%23/g; $result .= $path; if (defined $query) { $query =~ s/\#/%23/g; $result .= '?' . $query; } if (defined $fragment) { $result .= '#' . $fragment; } # print "DEBUG: builduri returning $result\n"; return $result; } 1; # End Package