#
# URL.pl - package to parse WWW URLs
#
# Jack Lund 9/3/93 zippy@ccwf.cc.utexas.edu
#

package url;

# Default port numbers for URL services

$ftp_port = 21;
$http_port = 80;
$gopher_port = 70;
$telnet_port = 23;
$wais_port = 210;
$news_port = 119;

# syntax: &url'parse_url(URL)
# returns array containing following:
# 	protocol	protocol string from url. ex: "gopher", "http".
#	host		host that specified protocol server is running on
#	port		port that server answers on
# the rest of the array is protocol-dependant. See code for details.
#

sub parse_url {
    local($url) = @_;

    if ($url =~ m#^(\w+):#) {
	$1 =~ s/[A-Z]/[a-z]/g;
	$protocol = $1;
    } else {
	return undef;
    }

    if ($protocol eq "file" || $protocol eq "ftp") {

# URL of type: file://hostname[:port]/path

	if ($url =~ m#^\s*\w+://([^ \t/:]+):?(\d*)(/.*)$#) {
	    $1 =~ s/[A-Z]/[a-z]/;
	    $host = $1;
	    $port = ($2 ne "" ? $2 : $ftp_port);
	    $path = $3;
	    return ($protocol, $host, $port, $path);
	}

# URL of type: file:/path

	if ($url =~ m#^\s*\w+:(/.*)$#) {
	    $host = `hostname`;  # Current host
	    $port = undef;
	    return ($protocol, $host, $port, $1);
	}
	return undef;
    }

    if ($protocol eq "news") {

# URL of type: news://host[:port]/article

	if ($url =~ m#^\s*\w+://([^ \t:/]):?(\d*)/(.*)$#) {
	    $host = $1;
	    $port = ($2 ne "" ? $2 : $news_port);
	    $selector = $3;
	}

# URL of type: news:article

	elsif ($url =~ m#^\s*\w+:(.*)$#) {
	    $host = $ENV{"NNTPSERVER"};
	    unless ($host) {
		warn "Couldn't get NNTP server name\n";
		return undef;
	    }
	    $port = $news_port;
	    $selector = $1;
	}
	else {
	    return undef;
	}
	return ($protocol, $host, $port, $selector);
    }

# URL of type: http://host[:port]/path[?search-string]

    if ($protocol eq "http") {
	if ($url =~ m#^\s*\w+://([\w\d\.]+):?(\d*)(/[^ \t\?]+)\??(.)*$#) {
	    $1 =~ s/[A-Z]/[a-z]/g;
	    $server = $1;
	    $port = ($2 ne "" ? $2 : $http_port);
	    $path = $3;
	    $search = $4;
	    return ($protocol, $server, $port, $path, $search);
	}
	return undef;
    }

# URL of type: telnet://user@host[:port]

    if ($protocol eq "telnet") {
	if ($url =~ m#^\s*\w+://([^@]+)@([^: \t]+):?(\d*)$#) {
	    $user = $1;
	    $2 =~ s/[A-Z]/[a-z]/g;
	    $host = $2;
	    $port = (defined($3) ? $3 : $telnet_port);
	    return($protocol, $host, $port, $user);
	}

# URL of type: telnet://host[:port]

	if ($url =~ m#^\s*\w+://([^: \t]+):?(\d*)$#) {
	    $1 =~ s/[A-Z]/[a-z]/g;
	    $host = $1;
	    $port = (defined($2) ? $2 : $telnet_port);
	    return($protocol, $host, $port);
	}
	return undef;
    }

# URL of type: gopher://host[:port]/[gtype]selector-string[?search-string]

    if ($protocol eq "gopher") {
	if ($url =~ m#^\s*\w+://([\w\d\.]+):?(\d*)/(\w?)([^ \t\?]*)\??(.*)$#) {
	    $1 =~ s/[A-Z]/[a-z]/g;
	    $server = $1;
	    $port = ($2 ne "" ? $2 : $gopher_port);
	    $gtype = ($3 ne "" ? $3 : 1);
	    $selector = $4;
	    $search = $5;
	    return ($protocol, $server, $port, $gtype, $selector, $search);
	}
	return undef;
    }

# URL of type: wais://host[:port]/database?search-string

    if ($protocol eq "wais") {
	if ($url =~ m#^\s\w+://([\w\d\.]+):?(\d*)/([^\?]+)\??(.*)$#) {
	    $1 =~ s/[A-Z]/[a-z]/g;
	    $server = $1;
	    $port = (defined($2) ? $2 : $wais_port);
	    $database = $3;
	    $search = $4;
	    return ($protocol, $server, $port, $database, $search);
	}
	return undef;
    }
}

1;

# Jack Lund                       Email: zippy@ccwf.cc.utexas.edu
# Graphics Services               Phone: (512) 471-3241
# UT Austin Computation Center
# WWW: <A HREF="http://wwwhost.cc.utexas.edu/test/zippy/zippy.html">Zippy</A>!

