#!/usr/bin/perl
# -*- mode: perl -*-

# $Id: annoprox,v 1.10 2004/06/09 09:19:06 eric Exp $

#Copyright Massachusetts Institute of Technology, 2003.
#Written by Eric Prud'hommeaux for the World Wide Web Consortium

$REVISION = '$Id: annoprox,v 1.10 2004/06/09 09:19:06 eric Exp $ ';

#BEGIN {unshift@INC,('../../..');}
use strict;

package miniRdfApp;
use W3C::Rdf::RdfApp;
@miniRdfApp::ISA = qw(W3C::Rdf::RdfApp);
use W3C::Util::Exception;

sub new {
    my ($proto) = @_;
    my $class = ref $proto || $proto;
    my $self = $class->SUPER::new();
    bless ($self, $class);
    $self->{ARGS} = {};
    $self->prepareParser;
    return $self;
}

package auxReqSession;
use W3C::Http::ProxySession qw($SUMMARY $ERROR $SOCKETS $OBJECTS $PROTOCOL $SELECT $DAEMON $PROXY_ACTION);
@auxReqSession::ISA = qw(W3C::Http::ProxySession);
use W3C::Util::Exception;



package annoProxSession;
use W3C::Http::ProxySession qw($SUMMARY $ERROR $SOCKETS $OBJECTS $PROTOCOL $SELECT $DAEMON $PROXY_ACTION);
@annoProxSession::ISA = qw(W3C::Http::ProxySession);
use W3C::Util::Exception;

use XML::DOM;
use W3C::Rdf::RdfDB;
use W3C::Rdf::Atoms qw($RDF_SCHEMA_URI);
use W3C::XML::InputSource;

# Get some handy namespace constants.
use W3C::Annotations::AnnotationApp qw($NS_ANNOTATION $NS_THREAD $NS_HTTP $NS_DC
				       $NS_PALM $NS_ATTRIBUTIONS);

#sub htmlStarter {
sub processRequest {
    my ($self) = @_;
    $self->SUPER::processRequest();

    my $atoms = new W3C::Rdf::Atoms();
    $self->{DB} = new W3C::Rdf::RdfDB(-atomDictionary => new W3C::Rdf::Atoms(), -lazyReification => 1);
    $self->{RDF_APP} = new miniRdfApp();
    $self->{RDF_APP}->setRdfDB($self->{DB});
    
    # select a datasources
    my @queryServers = ('http://annotest.w3.org/annotations', 
			'http://annodev.w3.org/annotations', 
			'http://iggy.w3.org/annotations');
    for my $queryServer (@queryServers) {
	$self->queryDocument($queryServer);
    }
}

sub queryDocument {
    my ($self, $queryServer) = @_;

    # Construct the query.
    $self->{ANNOT_ALGAE_QUERY} = "(${RDF_SCHEMA_URI}type ?annotation ${NS_ANNOTATION}Annotation) 
       (${NS_ANNOTATION}annotates ?annotation $self->{URI})
       (${NS_ANNOTATION}context ?annotation ?context)
       (${NS_ANNOTATION}body ?annotation ?body)
       (${NS_HTTP}Body ?body ?bodyData) 
       (${NS_HTTP}ContentType ?body ?contentType)";

    my $dataSource;
    if (0) {
	$dataSource = "$queryServer?w3c_annotates=$self->{URI}";
    } else {
	my $remoteQuery = "(
 ask '($self->{ANNOT_ALGAE_QUERY})
 collect ())";
	use CGI;
	$dataSource = "$queryServer?w3c_algaeQuery=".CGI::escape($remoteQuery);
    }

    # Create a user agent object
    use LWP::UserAgent;
    my $ua = LWP::UserAgent->new;
    $ua->agent('annoprox 0.1');
    my $req = HTTP::Request->new(GET => $dataSource);
    $req->header(Accept => "text/rdf, */*;q=0.1");
    my $res = $ua->request($req);
    if ($res->is_success) {
	$self->{DATA_URI} = $dataSource;
	$self->rdfHandler(\$res->content);
    }
}

sub rdfHandler {
    my ($self, $pText) = @_;
    my $inputSource = new W3C::XML::InputSource($pText);
    $inputSource->setPublicId($self->{DATA_URI});
    $self->{RDF_APP}->parseOne($inputSource);
}

sub htmlHandler {
    my ($self, $pText) = @_;

    # transform $$pText here
    $$pText = &linkToAnnotations($$pText, $self->gatherMods());

    my $newlen = length($$pText);
    $self->{ReplyHeaders} =~ s/\nContent-Length: \d+/\nContent-Length: $newlen/i;
    $self->{Buffer} = join("", $self->{ReplyHeaders}, $$pText);
}



sub gatherMods {
    my ($self) = @_;
    # Construct the query.
    my $algae = $self->{DB}->getAlgaeInterface;
    my $algaeQuery = "(
 ask '($self->{ANNOT_ALGAE_QUERY}) 
 collect '(?context ?body ?contentType ?bodyData))";
    $algae->{-rdfApp} = new miniRdfApp();
    $self->{PROXY}->log($SUMMARY, $algaeQuery);
    my @mods;
    eval {
	my ($nodes, $selects, $messages, $proofs) = $algae->algae($algaeQuery, undef, {-uniqueResults => 1});
	if (@$nodes) {
	    push (@mods, map {[$_->[0]->isa('W3C::Rdf::String') ? $_->[0]->getString : $_->[0]->getUri, 
			       $_->[1]->getUri, 
			       $_->[2]->getString, 
			       $_->[3]->getString]} @$nodes);
	}
    }; if ($@) {
	#if (my $ex = &catch('W3C::Util::Exception')) {
	#} else {&throw()}
	if (my $ex = &catch('W3C::Util::NoSuchResourceException')) {
	} elsif (my $ex = &catch('W3C::Util::CachedContextException')) {
	    if ($ex->{-exception} && 
		$ex->{-exception}->isa('W3C::Util::NoSuchResourceException')) {
	    } else {&throw()}
	} else {&throw()}
    }
    return @mods;
}

sub linkToAnnotations {
    my ($text, @mods) = @_;
    my $parser = new XML::DOM::Parser;
    my $doc;
    eval {
	$doc = $parser->parse($text);
    }; if ($@) {&throw()}

    my $head = &lameXPointer($doc, '/html[1]/head[1]');
    &lameCreate($doc, $head->[0], "script[\@type=\"text/javascript\"]/text(\"function showAnnotation(body){window.open(body)} function hideAnnotation(body){}\")", 0);
    # Get a list of modifications
    my @addHere;
    foreach my $mod (@mods) {
	my ($context, $body, $contentType, $bodyData) = @$mod;
	# Only contexts we understand.
	next if ($context !~ /xpointer\(([^\)]+)\)$/);
	my $xpointer = $1;
	if ($xpointer eq '/html[1]') {
	    $xpointer = '/html[1]/body[1]/#create(p/span[@class="eoDocAnnotations"])';
	}
	eval {
	    if (my $spot = &lameXPointer($doc, $xpointer)) {
		push (@addHere, [$spot, $body, $contentType, $bodyData]);
	    }; if ($@) {if (my $ex = &catch('W3C::Util::Exception')) {
	    } else {&throw()}}
	}
    }

    # Perform those modifications
    foreach my $addMe (@addHere) {
	my ($spot, $body, $contentType, $bodyData) = @$addMe;
	my ($node, $add) = @$spot;
	if ($add) {
	    $node = &lameCreate($doc, $node, $add, 0);
	}
	$node = &lameCreate($doc, $node, "a[\@href=\"$body\" and \@onmouseover=\"showAnnotation('$body')\" and \@onmouseout=\"hideAnnotation('$body')\"]/text(\" !ANNOT! \")", 1);
    }
    return $doc->toString;
}

sub lameXPointer {
    my ($doc, $xpointer) = @_;
    my $cur = $doc;
  SEGMENT:
    while (pos $xpointer < length $xpointer) {
	my $lastPos = pos $xpointer;
	if ($xpointer =~ m/\G\/?/gcxsi) {
	} elsif ($xpointer =~ m/\G\#(\w+)\(([^\)]+)\)/gcxsi) {
	    return [$cur, $2];
	} elsif ($xpointer =~ m/\G(\w+)\[(\d+)\]\/?/gcxsi) {
	    my ($name, $count) = ($1, $2);
	    my $in = $cur;
	    eval {
		for ($cur = $cur->getFirstChild; $count; $cur = $cur->getNextSibling) {
		    if ($cur) {
			if ($cur->getNodeType == ELEMENT_NODE) {
			    if ($cur->getNodeName eq $name) {
				if (--$count == 0) {
				    next SEGMENT;
				}
			    }
			}
		    } else {
			&throw(new W3C::Util::Exception(-message => "couldn't find $name $count in ".$in->toString));
			# return undef;
		    }
		}
	    }; if ($@) {
		&throw(new W3C::Util::CachedContextException(-str => $xpointer, -pos => $lastPos, 
							     -errorMessage => $@));
	    }
	} else {
	    my $substr = substr($xpointer, pos $xpointer);
	     &throw(new W3C::Util::Exception(-message =>  "unknown path segment: \"$substr\""));
	    # return undef;
	}
    }
    return [$cur, undef];
}

sub lameCreate {
    my ($doc, $node, $add, $before) = @_;
    while (pos $add < length $add) {
	my $lastPos = pos $add;
	if ($add =~ m/\Gtext\(\"([^\"]+)\"\)/gcxsi || 
	    $add =~ m/\Gtext\(\'([^\']+)\'\)/gcxsi) {
	    my $text = $doc->createTextNode($1);
	    $node->appendChild($text);
	} elsif ($add =~ m/\G(\w+)\/?/gcxsi) {
	    my $elementName = $1;
	    eval {
		my $element = $doc->createElement($elementName);
		if ($before) {
		    $node->getParentNode->insertBefore($element, $node);
		} else {
		    if ($node->hasChildNodes()) {
			$node->insertBefore($element, $node->getFirstChild());
		    } else {
			$node->appendChild($element);
		    }
		}
		$node = $element;
	    }; if ($@) {
		&throw(new W3C::Util::CachedContextException(-str => $add, -pos => $lastPos, 
							     -errorMessage => $@));
	    }
	} elsif ($add =~ m/\G\[/gcxsi) {
	    while ($add !~ m/\G\]\/?/gcxsi) {
		$lastPos = pos $add;
		if ($add =~ m/\G\@(\w+)\=\"([^\"]+)\"(?:\s*and\s*)?/gcxsi || 
		    $add =~ m/\G\@(\w+)\=\'([^\']+)\'(?:\s*and\s*)?/gcxsi) {
		    $node->setAttribute($1, $2);
		} else {
		    &throw(new W3C::Util::CachedContextException(-str => $add, -pos => $lastPos, 
								 -errorMessage => 'unknown sub select'));
		}
	    }
	} else {
	    my $substr = substr($add, pos $add);
	     &throw(new W3C::Util::Exception(-message => "unknown createCommand: \"$substr\""));
	    return undef;
	}
    }
    return $node;
}

package annoprox;
@annoprox::ISA = qw(W3C::Http::Proxy);
use W3C::Util::Exception;
use W3C::Http::Proxy;
use W3C::Http::ProxySession qw($SUMMARY $ERROR $SOCKETS $OBJECTS $PROTOCOL $SELECT $DAEMON $PROXY_ACTION);
# Overloaded Proxy methods.

sub createSession {
    my ($self, $new, $ip) = @_;
    return new annoProxSession($self, $new, $ip);
}

sub perLoop {
    my ($self) = @_;
    while ($self->{HUPPED}) {
	$self->log($ERROR, 'HUPPED');
	$self->{HUPPED}--;
    }
}

my $PidFile = '/tmp/annoprox.pid';
my $ErrorLog = '/tmp/annoprox.err';
my $AccessLog = '/tmp/annoprox.acc';

sub forkParent {
    my ($self, $pid) = @_;
    $self->log($DAEMON, "serverpid: $pid");
    $self->log($DAEMON, "  pidfile: $PidFile");
    $self->log($DAEMON, " errorlog: $ErrorLog");
    $self->log($DAEMON, "accesslog: $AccessLog");
}

sub forkChild {
    my ($self) = @_;
    open(STDIN,  "</dev/null") ||
	&throw(new W3C::Util::Exception(-message => "can't open /dev/null for read ($!)"));
    open(ACCESS, ">>$AccessLog") ||
	&throw(new W3C::Util::Exception(-message => "can't open $AccessLog for append ($!)"));
    open(STDERR, ">>$ErrorLog") ||
	&throw(new W3C::Util::Exception(-message => "can't open $ErrorLog for append ($!)"));
    select(STDERR); $|=1;
    select(ACCESS);

    # Replace the default Logger.
    $self->{LOGGER} = new W3C::Util::Logger(-handles => {$SUMMARY => \*STDERR, 
							 $PROTOCOL => undef, 
							 $SELECT => undef},
					    -default => \*STDERR);
}

# MAIN -- simple exception handler.
eval {
    my $proxy = new annoprox(undef, 8888, 5);
    #use sigtrap qw(die normal-signals);
    $proxy->enableSignals(HUP => sub {$proxy->{HUPPED}++});
    $proxy->addHandler('text/html', \&annoProxSession::htmlHandler, 1, undef); # \&annoProxSession::htmlStarter);
    $proxy->main($ARGV[0], $PidFile);
}; if ($@) {if (my $ex = &catch('W3C::Util::Exception')) {
	die $ex->toString;
    } else {
	die $@;
    }
}

sub Help {
    print <<EOF;
NAME

  annoprox - small HTTP proxy displaying Annotatea annotations.

SYNOPSIS

  annoprox


DESCRIPTION

  annoprox connects to an annotation server for each mime type that
  it has a prayer of annotating. If it gets back an annotation type
  that it understands, it will tweak the rendering to reflect that
  annotation context and body.


OPTIONS

  run it or don\'t.

AUTHOR

  Eric Prud\'hommeaux <eric.\@w3..org>

  Copyright 2003 by Eric Prud\'hommeaux for W3C and MIT, ERCIM, Keio

EOF
    exit(0);
}

__END__

=head1 NAME

W3C::Annotations::annoprox - an HTTP proxy for decorating pages with annotations

=head1 SYNOPSIS

    package annoprox;
    @annoprox::ISA = qw(W3C::Http::Proxy);

    sub createSession {
        my ($self, $new, $ip) = @_;
        return new W3C::Http::ProxySession($self, $new, $ip);
    }

    eval {
        my $proxy = new annoprox(undef, 8888, 5);
        #use sigtrap qw(die normal-signals);
        $proxy->enableSignals(HUP => sub {$proxy->{HUPPED}++});
        $proxy->addHandler('text/html', 
                           \&annoProxSession::htmlHandler, 1, undef);
        $proxy->main($ARGV[0], $PidFile);
    }; if ($@) {if (my $ex = &catch('W3C::Util::Exception')) {
    	die $ex->toString;
        } else {
    	die $@;
        }
    }

=head1 DESCRIPTION

C<annoprox> connects to an annotation server for each mime type that
it has a prayer of annotating. If it gets back an annotation type
that it understands, it will tweak the rendering to reflect that
annotation context and body.

This module is part of the W3C::Annotations CPAN module.

=head1 AUTHOR

Eric Prud'hommeaux <eric@w3.org>

=head1 SEE ALSO

L<W3C::Http::ProxySession>

=cut
