1 #!/usr/bin/perl -wT 2 3 # xml-fetch.pl - query tags in an HTML or XML file 4 # by Jonathan Eisenzopf. v1.0 990214 5 # Copyright (c) 2012 quinstreet.com LLC. All Rights Reserved. 6 # Originally published and documented at http://www.webreference.com 7 # You may use this code on a Web site only if this entire 8 # copyright notice appears unchanged and you publicly display 9 # on the Web site a link to http://www.webreference.com/perl/. 10 # 11 # Contact eisen@quinstreet.com for all other uses. 12 13 use strict; 14 use CGI; 15 use HTTP::Request; 16 use LWP::UserAgent; 17 18 my ($key,$attr,%h_attr,$text,@entities,$content); 19 my $entity = ""; 20 21 my $query = new CGI; 22 print $query->header; 23 24 &printError("You must complete both fields!") 25 unless ($query->param('url') && $query->param('fields')); 26 27 my $ua = new LWP::UserAgent; 28 $ua->agent("xml-fetch/1.0"); 29 $ua->max_size([1000000]); 30 31 my $request = new HTTP::Request GET => $query->param('url'); 32 my $response = $ua->request($request); 33 34 &printError($response->code.": Error retrieving URL ".$query->param('url')) 35 unless ($response->code == 200); 36 37 @entities = split(/,/,$query->param('fields')); 38 $content = $response->content; 39 40 &Print_Header; 41 foreach $entity (@entities) { 42 &Print_Entity_Head($entity); 43 while ($content =~ /<$entity\s*(.*?)(\/>|>(.*?)<\/$entity>)/gsi) { 44 ($text,%h_attr) = ""; 45 my @attribs = split(/"\s+/,$1); 46 $text = $3 if $3; 47 foreach $attr (@attribs) { 48 next if !$attr; 49 my ($key,$value) = split(/=/,$attr); 50 $value =~ s/\"//g; 51 $h_attr{$key} = $value; 52 } 53 &Print_Element(\%h_attr,$text); 54 } 55 } 56 57 print "\n"; 58 print "
$content" if $query->param('display'); 59 # End of main code 60 61 # Functions 62 sub printError { 63 my $message = shift; 64 print < 66 xml-fetch ERROR 67 68

xml-fetch ERROR

69 $message 70 71 72 HTML 73 74 exit; 75 } 76 77 sub Print_Header { 78 print "XML-Fetch Results\n"; 79 print "

XML-Fetch Results

\n"; 80 print "URL: ",$query->param('url'),"

\n"; 81 print < 83 HTML 84 } 85 86 sub Print_Entity_Head { 87 my $entity = shift; 88 print < 90 $entity 91 92 93 94 Attributes (Name = Value) 95 Content 96 97 HTML 98 } 99 100 sub Print_Element { 101 my ($hash,$text) = @_; 102 my $key; 103 104 print < 106 107 HTML 108 109 foreach $key (keys(%$hash)) { 110 print < 112 113 114 HTML 115 } 116 print < 118 119 120 HTML 121 } 122 123 124
$key = $hash->{$key}
$text