#------------------------------------------------------------- #------------------------------------------------------------- use Socket; #----------------------------------------------------------------- #----------------------------------------------------------------- sub open_tcp { my ($FS, $dest, $port) = @_; my $proto = getprotobyname ('tcp'); socket ($FS, PF_INET, SOCK_STREAM, $proto); my $sin = sockaddr_in ($port, inet_aton ($dest)); connect ($FS, $sin) || return undef; my $old_fh = select ($FS); $| = 1; # unbuffer select ($old_fh); 1; } #------------------------------------------------------------- # Given a URL, get the data from that URL by querying the # associated web server. # # Returns: the (usually html) text. #------------------------------------------------------------- sub url_get { my ($url, $include_header_info) = @_; my @lines; my $rc = 0; # Break the URL into a remote host name and a target # directory. my ($service, $remote, $target) = split_url ($url); if (! defined ($service)) { $rc = 1; return $rc; } $cmd = "GET $target HTTP/1.0\nHost: $remote\n\n"; #-------------------------------------------------------- # Establish socket connection to the http server. #-------------------------------------------------------- if (! defined open_tcp (SOCK, $remote, 80)) { print "Error connecting to server at $url\n"; return undef; } # Request and receive the page. print SOCK $cmd; $in_header = 1; while () { if ($in_header == 0) { push @lines, $_; } elsif ($_ =~ /^\s*\r?\n$/) { $in_header = 0; } else { chop; # Process header fields if ($in_header == 1) { ($http, $respcode, $other) = split (' ', $_); $in_header = 2; } else { $colonpos = index ($_, ":"); if ($colonpos >= 1) { $key = substr ($_, 0, $colonpos); $value = substr ($_, $colonpos+1); $value =~ s/^\s+//; $value =~ s/\s+$//; $header{"$key"} = $value; } } } } close SOCK; # print "RESPONSE CODE = $respcode\n"; foreach $val (keys %header) { # print $val . " = " . $header{$val} . "\n"; } # Page redirect: recursively try again. if ($respcode == 302) { # print "Trying again...\n"; ($rc, @lines) = url_get ($header{"Location"}, $include_header_info); } return ($rc, @lines); } #----------------------------------------------------------------- # Break the URL into a remote host name and a target # directory. #----------------------------------------------------------------- sub split_url { my ($url) = @_; if ($url =~ "^([^:]+)://([^/]+)(/?.*)") { $service = $1; $remote = $2; $target = $3; } if (! defined ($remote)) { return undef; } if ($service eq "") { $service = "??"; } if ($target eq "") { $target = "/"; } return ($service, $remote, $target); } 1;