#!/usr/bin/perl -wT

# xml-fetch.pl - query tags in an HTML or XML file
# by Jonathan Eisenzopf. v1.0 990214
# Copyright (c) 2012 quinstreet.com LLC. All Rights Reserved.
# Originally published and documented at http://www.webreference.com
# You may use this code on a Web site only if this entire
# copyright notice appears unchanged and you publicly display
# on the Web site a link to http://www.webreference.com/perl/.
#
# Contact eisen@quinstreet.com for all other uses.

use strict;
use CGI;
use HTTP::Request;
use LWP::UserAgent;

my ($key,$attr,%h_attr,$text,@entities,$content);
my $entity = "";

my $query = new CGI;
print $query->header;

&printError("You must complete both fields!") 
    unless ($query->param('url') && $query->param('fields')); 

my $ua = new LWP::UserAgent;
$ua->agent("xml-fetch/1.0");
$ua->max_size([1000000]);

my $request = new HTTP::Request GET => $query->param('url');
my $response = $ua->request($request);

&printError($response->code.": Error retrieving URL ".$query->param('url'))
    unless ($response->is_success);

@entities = split(/,/,$query->param('fields'));
$content = $response->content;

&Print_Header;
foreach $entity (@entities) {
    &Print_Entity_Head($entity);
    while ($content =~ /<$entity\s*(.*?)(\/>|>(.*?)<\/$entity>)/gsi) {
	($text,%h_attr) = "";
	my @attribs = split(/"\s+/,$1);
	$text = $3 if $3;
	foreach $attr (@attribs) {
	    next if !$attr;
	    my ($key,$value) = split(/=/,$attr);
	    $value =~ s/\"//g;
	    $h_attr{$key} = $value;
	}
	&Print_Element(\%h_attr,$text);
    }
}

print "</table></center></body></html>\n";
print "<HR>$content" if $query->param('display');
# End of main code

# Functions
sub printError {
    my $message = shift;
    print <<HTML;
<html>
<head><title>xml-fetch ERROR</title></head>
<body bgcolor="#FFFFFF">
<H1>xml-fetch ERROR</H1>
<font color="#ff0000">$message</font>
</body>
</html>
HTML

    exit;
}

sub Print_Header {
print "<html><head><title>XML-Fetch Results</title></head>\n";
print "<body bgcolor=\"#FFFFFF\"><center><H1>XML-Fetch Results</H1>\n";
print "<B>URL:</B> ",$query->param('url'),"<BR><p>\n";
print <<HTML;
<table border="1" cellpadding="2" cellspacing="1" width="100%">
HTML
}

sub Print_Entity_Head {
    my $entity = shift;
    print <<HTML;
<TR bgcolor="#003399">
   <TH colspan="2"><font color="#FFFFFF" face="helvetica,arial">$entity</font></TH>
</TR>

<TR bgcolor="#ffffcc">
   <TH>Attributes (Name = Value)</TH>
   <TH>Content</TH>
</TR>
HTML
}

sub Print_Element {
    my ($hash,$text) = @_;
    my $key;

    print <<HTML;
<TR bgcolor="#cccccc"><td>
   <table align="center" border="1" cellpadding="1" cellspacing="1" width="100%">
HTML

    foreach $key (keys(%$hash)) {
	print <<HTML;
<tr align="center" bgcolor="#cccccc">
   <td>$key = $hash->{$key}</td>
</tr>
HTML
    }
    print <<HTML;
</table></td>
<td>$text</td></tr>

HTML
}



