package TextClass;
#use strict;
use vars qw( @ISA @EXPORT );
use Exporter ();
@ISA = qw( Exporter );
use CGI qw( :standard :html3 escape unescape escapeHTML );
require 5.004;
use POSIX qw(locale_h);
use locale;
use TerminologyMapper;
use QueryFactory; # needed just to new queryfactory
use SearchSet; # new, AddQuery methods
use XPat;
use XPatResultSet;
use ProcIns;
use TextClassUtils;
use DlpsUtils qw( :DEFAULT );
# **********************************************************************
# this module is for TextClass objects, which can be subclassed
# for different search and filtering behavior
#
# The structure of this object is:
# TextClass Object->
# {'collid'} # these from coll info database
# {'collname'}
# {'subclass'}
# {'subclassModule'}
# {'qtytexts'}
# {'homesite'}
# {'host'}
# {'dd'}
# {'map'}
# {'patexec'}
# {'port'}
# {'lel'}
# {'termsearch'}
# {'regionsearch'}
# {'termmapper'} # TerminologyMapper object made from {'map'}
# {'shouldmap'}
# {'queryfactory'}
#
# Other things get added along the way: SearchSet and XPatResultSet objects
#
# **********************************************************************
# some package globals used in filtering
my %HIstarts = (
'italic' => ' ',
'italics' => ' ',
'italics, underlined' => ' ',
'italics?' => ' ',
'smcap' => ' ',
'underlined' => ' ',
'gothic' => ' ',
'underlined 2x' => ' ',
'underlined 3x' => ' ',
'indented' => ' ',
);
my %HIends = (
'italic' => ' ',
'italics' => ' ',
'italics, underlined' => ' ',
'italics?' => ' ',
'smcap' => ' ',
'underlined' => ' ',
'gothic' => ' ',
'underlined 2x' => ' ',
'underlined 3x' => ' ',
'indented' => '',
);
# ----------------------------------------------------------------------
# NAME : new
# PURPOSE : create new TextClass object
#
# CALLED BY : main
# CALLS : TextClass->_initialize
# INPUT : $collid, $collname, $subclass, $subclassModule,
# $qtytexts, $homesite, $host, $dd, $map, $patexec,
# $port, $lel, $termsearch, $regionsearch
# RETURNS : NONE
# NOTES :
# ----------------------------------------------------------------------
sub new
{
my $class = shift;
my $self = {};
bless $self, $class;
$self->_initialize(@_);
return $self;
}
# ----------------------------------------------------------------------
# NAME : _initialize
# PURPOSE : create structure for TextClass object
# CALLED BY : new
# CALLS :
# INPUT : see new
# RETURNS :
# NOTES :
# ----------------------------------------------------------------------
sub _initialize
{
my $self = shift;
my (
$collid, $collname, $subclass, $subclassModule,
$qtytexts, $homesite, $host, $webdir, $objdir,
$dd, $wwdd, $map, $patexec,
$port, $lel, $termsearch, $regionsearch,
$wwrealms, $wwrealmsenglish,
$genres, $genders, $periods, $languages,
$locale,
) = @_;
$self->{'collid'} = $collid;
$self->{'collname'} = $collname;
$self->{'subclass'} = $subclass;
$self->{'subclassModule'} = $subclassModule;
$self->{'qtytexts'} = $qtytexts;
$self->{'homesite'} = $homesite;
$self->{'host'} = $host;
$self->{'webdir'} = $webdir;
$self->{'objdir'} = $objdir;
$self->{'dd'} = $dd;
$self->{'wwdd'} = $wwdd;
$self->{'map'} = $map;
$self->{'patexec'} = $patexec;
$self->{'port'} = $port;
$self->{'lel'} = $lel;
$self->{'termsearch'} = $termsearch;
$self->{'regionsearch'} = $regionsearch;
$self->{'wwrealms'} = $wwrealms;
$self->{'wwrealmsenglish'} = $wwrealmsenglish;
$self->{'genres'} = $genres;
$self->{'genders'} = $genders;
$self->{'periods'} = $periods;
$self->{'languages'} = $languages;
$self->{'locale'} = $locale;
## create TerminologyMapper object for this collection
$self->{'termmapper'} =
new TerminologyMapper (
$self->{'map'},
{ 'label' => 1 },
);
$self->{'shouldmap'} =
[ 'rgn\d*', 'op\d+', 'amt\d+', ];
$self->{'pio'} = new ProcIns;
$self->{'printpsetstring'} = qq{pr.200 shift.-100 };
$self->SetCurrentIndentDepth ( 0 );
$self->SetLastIndentLabel ( '' );
# ************************************************************
#
# configuration info ( very possibly overridden in subclasses )
#
# ************************************************************
$self->{'divheadrange'} = [ 1, 2, 3, 4 ];
$self->{'fisheyethreshold'} = 50;
$self->{'headdepths'} = {
'mainheader' => 0,
'div1head' => 1,
'div2head' => 2,
'div3head' => 3,
'div4head' => 4,
'div5head' => 5,
'div6head' => 6,
'div7head' => 7,
'div8head' => 8,
'div9head' => 9,
};
$self->{'maxdepthitems'} = [
'kwic',
'LG',
'ACT',
'SCENE',
'SPEECH',
'PARAGRAPH',
'POEM',
];
}
# ----------------------------------------------------------------------
# NAME : AddQueryFactory
# PURPOSE : Initialize a QueryFactory object in this TextClass object
# CALLED BY : main::HandleSearch
# CALLS :
# INPUT : CGI object
# RETURNS :
# GLOBALS :
# SIDE-EFFECTS :
# NOTES : "type" param is for a user requested search
# "pagesearch" param is for a temp queryfactory to be used
# in doing searches for matching pages in a header view
# If there is neither, we can assume we won't need a qf to
# do any searches.
# ----------------------------------------------------------------------
sub AddQueryFactory
{
my $self = shift;
my $cgi = shift;
my $qf;
my $type = $cgi->param( 'type' );
my $pagesearch = $cgi->param( 'pagesearch' );
# check if both type and pagesearch params exist; yes, error and quit
if ( $type && $pagesearch )
{
&errorBail( qq{Cannot have both a "type" and a "pagesearch" parameter on a URL} );
}
# check if there is a pagesearch; if yes, turn it into a type so that the qf is happy
elsif ( $pagesearch )
{
$cgi->param( 'type', $cgi->param( 'pagesearch' ) );
$type = $cgi->param( 'type' );
$cgi->delete( 'pagesearch' );
}
# if there is no type, then don't bother building a qf
if ( ! $type )
{
$qf = undef;
$self->{'queryfactory'} = $qf;
}
# there is a type, create QueryFactory object for this colleciton
else
{
$qf =
QueryFactory->new (
$cgi,
$self->GetTermMapper( ),
$self->{'shouldmap'},
'label'
);
$self->{'queryfactory'} = $qf;
if ( $ENV{'DEBUG'} )
{
print qq{
Just created queryfactory $qf for TC obj: } . $self->GetValueByKey('collname') . qq{
\n};
}
}
}
# ----------------------------------------------------------------------
# NAME : AddNeededSets
# PURPOSE : Initialize a SearchSet object and an XPatResultSet object
# in this TextClass object
# CALLED BY : main::HandleSearch
# CALLS :
# INPUT :
# RETURNS :
# GLOBALS :
# SIDE-EFFECTS :
# NOTES :
# ----------------------------------------------------------------------
sub AddNeededSets
{
my $self = shift;
my $name = shift;
# attach a new SearchSet object
$self->AddSearchSet( $name );
# attach a new XPatResultSet object
$self->AddResultSet( $name );
# if a header search, we'll also need a search and result
# set for Table of Contents info
if ( $name eq 'header' )
{
$self->AddSearchSet( 'toc' );
$self->AddResultSet( 'toc' );
}
}
# add a SearchSet if it doesn't exist already
sub AddSearchSet
{
my $self = shift;
my $name = shift;
if ( ! $self->{'searchsets'}{$name} )
{
$self->{'searchsets'}{$name} = SearchSet->new( );
}
}
# add a ResultSet if it doesn't exist already
sub AddResultSet
{
my $self = shift;
my $name = shift;
if ( ! $self->{'resultsets'}{$name} )
{
$self->{'resultsets'}{$name} = XPatResultSet->new( );
}
}
# ----------------------------------------------------------------------
# NAME : SimpleResultsFrameSearches
# PURPOSE : adds the set of searches necessary for a generic simple seart
# to this TextClass object's SearchSet object
# CALLED BY : main
# CALLS : SearchSet->AddQuery
# INPUT : session id, view, CGI object
# RETURNS : NONE
# SIDE-EFFECTS : adds to SearchSet object
# NOTES :
# ----------------------------------------------------------------------
sub SimpleResultsFrameSearches
{
my $self = shift;
my ( $cgi, $sid, $ssetName, $lel ) = @_;
# grab SearchSet, QueryFactory and TermMapper objects for this TextClass obj
my $sset = $self->GetSearchSet( $ssetName );
my $qf = $self->GetQueryFactory( );
my $tm = $self->GetTermMapper( );
# get search set searches previously created for the guide frame...
my $sliceSearchName = $sset->GetNamedSearchName( 'slicesearch' ) ;
my $mainSliceName = $sset->GetNamedSearchName( 'mainslicesearch' ) ;
my $mainHeaderName = $sset->SetNamedSearchName( 'mainheader', $sid ) ;
my $kwicName = $sset->SetNamedSearchName( 'kwic', $sid ) ;
$sset->AddQuery( $mainHeaderName, # . $sid,
qq{pr.region.mainheader (region mainheader within \*$mainSliceName );}
);
if ( $lel >= 3 )
{
# query for headers of DIVs of different levels
$self->AddScopedHeads ( $ssetName, $sliceSearchName, $sid );
# query for kwic results
$sset->AddQuery( $kwicName,
$self->{'printpsetstring'} . '*' . $sliceSearchName . ';' );
}
}
# ----------------------------------------------------------------------
# NAME : BooleanResultsFrameSearches
# PURPOSE : adds the set of searches necessary for a generic boolean search
# to this TextClass object's SearchSet object
# CALLED BY : main
# CALLS : SearchSet->AddQuery
# INPUT : session id, view, CGI object
# RETURNS : NONE
# SIDE-EFFECTS : adds to SearchSet object
# NOTES :
# ----------------------------------------------------------------------
sub BooleanResultsFrameSearches
{
my $self = shift;
my ( $cgi, $sid, $ssetName ) = @_;
# grab SearchSet, QueryFactory and TermMapper objects for this TextClass obj
my $sset = $self->GetSearchSet( $ssetName );
my $qf = $self->GetQueryFactory( );
my $tm = $self->GetTermMapper( );
my $mainSliceName = $sset->GetNamedSearchName( 'mainslicesearch' ) ;
my $mainHeaderName = $sset->SetNamedSearchName( 'mainheader', $sid ) ;
$sset->AddQuery( $mainHeaderName, # . $sid,
qq{pr.region.mainheader (region mainheader within \*$mainSliceName );}
);
}
# ----------------------------------------------------------------------
# NAME : GuideFrameSearches
# PURPOSE : adds the set of searches necessary for the guide frame
# to this TextClass object's SearchSet object
# CALLED BY : main
# CALLS : SearchSet->AddQuery
# INPUT : session id, view, CGI object
# RETURNS : NONE
# SIDE-EFFECTS : adds to SearchSet object
# NOTES : These searches need to be done whether or not the guide
# frame eventually is put in the html or not, since it does
# searches and slices, etc. that are needed later
# ----------------------------------------------------------------------
sub GuideFrameSearches
{
my $self = shift;
my ( $sid, $cgi, $ssetName ) = @_;
# grab SearchSet, QueryFactory and TermMapper objects for this TextClass obj
my $sset = $self->GetSearchSet( $ssetName );
my $qf = $self->GetQueryFactory( );
my $tm = $self->GetTermMapper( );
$self->SetUpSliceSize( $cgi );
## create basic query
my $query = $qf->baseQuery( );
# mainsearch: formerly flug
my $firstSearchName = $sset->SetNamedSearchName( 'firstsearch', $sid );
# slice of mainsearch: formerly blog
my $sliceSearchName = $sset->SetNamedSearchName( 'slicesearch', $sid );
# region main including slice of slicesearch
my $mainSliceName = $sset->SetNamedSearchName( 'mainslicesearch', $sid );
# add these queries to the search set
$sset->AddQuery( $firstSearchName,
qq{$firstSearchName = $query; },
);
my $startPoint = $cgi->param( 'start' );
my $sliceQuery = qq{$sliceSearchName = subset\.} .
$startPoint . '.' . $cgi->param( 'size' ) .
qq{ \*} . $firstSearchName . ';' ;
$sset->AddQuery( $sliceSearchName,
$sliceQuery,
);
$sset->AddQuery( $mainSliceName,
qq{$mainSliceName = (region main incl *$sliceSearchName );},
);
}
# ----------------------------------------------------------------------
# NAME :
# PURPOSE :
#
# CALLED BY :
# CALLS :
# INPUT :
# RETURNS :
# GLOBALS :
# SIDE-EFFECTS :
# NOTES :
# ----------------------------------------------------------------------
sub AddScopedHeads
{
my $self = shift;
my ( $ssetName , $searchName, $sid ) = @_;
my $sset = $self->GetSearchSet( $ssetName );
foreach my $divheadNumber ( $self->GetDivHeadRange() )
{
my $divRgn = qq{DIV} . $divheadNumber;
my $fabRgn = qq{div} . $divheadNumber . qq{head};
my $fabRgnName = $sset->SetNamedSearchName( $fabRgn, $sid ) ;
$sset->AddQuery(
$fabRgnName,
qq{pr.region.$fabRgn (region $fabRgn within (region $divRgn incl \*$searchName)); }
);
}
}
# ----------------------------------------------------------------------
# NAME : SubmitSearchSet
# PURPOSE : send a SearchSet object's queries one by one to an XPat
# process, gathering up results in an XPatRequestSet object
# CALLED BY : main::HandleSearch
# CALLS : XPat->GetResultsFromQuery; XPatResultSet->AddResults
# INPUT : SearchSet obj, XPatResultSet obj
# RETURNS : NONE
# GLOBALS :
# SIDE-EFFECTS : affects XPatResultSet obj by adding results to it
# NOTES :
# ----------------------------------------------------------------------
sub SubmitSearchSet
{
my $self = shift;
my ( $name, $cgi ) = @_;
my $sset = $self->GetSearchSet( $name );
my $rset = $self->GetXPatResultSet( $name );
# get XPatResultSet object
my $xpat = $self->GetXPatObject( );
# run through all the searches in this search set
SUBMITSEARCHES:
foreach my $label ( $sset->GetSearchLabels() )
{
my $query = $sset->GetQueryByLabel($label);
# type is SSize, PSet or RSet
# result is sgml result from XPat
# $label is same label as was sent by AddSearchSet
my ( $type, $results, $resultLabel ) =
$xpat->GetResultsFromQuery( $label, $query );
if ( ( $ENV{'DEBUG'} eq 'search' ) ||
( $ENV{'DEBUG'} eq 'all' ) )
{
print "label:$label\n";
print "query:$query\n";
print "type:$type\n";
}
# print "
results: $results
";
if ( $type =~ m,error,i )
{
$self->SetStatus ( 'XPAT_SEARCH_ERROR' );
last SUBMITSEARCHES;
}
$rset->AddResults( $type, $results, $resultLabel, $cgi );
}
}
# ----------------------------------------------------------------------
# NAME : FilterRawResults
# PURPOSE : plain vanilla filter of SGML to HTML (essentially nothing
# more than < and > filtering)
# CALLED BY : main::OutputResults
# CALLS :
# INPUT : NONE
# RETURNS : string of html
# GLOBALS : NONE
# SIDE-EFFECTS :
# NOTES :
# ----------------------------------------------------------------------
sub FilterRawResults
{
my $self = shift;
my $htmlPageRef = shift;
my $name = shift;
my $rset = $self->GetXPatResultSet( $name );
my $results = '';
$rset->InitIterator();
my ( $label, $textRef ) ;
RESULTLOOP:
while ( ( $label, $textRef ) = $rset->GetNextResult())
{
# stop if were sent back undef
if ( defined ( $label ) )
{
$$textRef =~ s,<,<,g;
$$textRef =~ s,>,>,g;
# DlpsUtils routine to do entity filtering
FilterCharEnts_All( $textRef );
if ( $ENV{'DEBUG'} >=3 )
{
$results .= qq{
Results
\n};
$results .= qq{ $label ::::::: $$textRef\n};
}
}
else
{
last RESULTLOOP;
}
}
return \$results;
}
# ----------------------------------------------------------------------
# NAME : FilterResultsForHighLelHeader
# PURPOSE :
#
# CALLED BY :
# CALLS :
# INPUT :
# RETURNS :
# GLOBALS :
# SIDE-EFFECTS :
# NOTES :
# ----------------------------------------------------------------------
sub FilterResultsForHighLelHeader
{
my $self = shift;
my ( $htmlPageRef, $cgi, $name, $bbo ) = @_;
my $idno = $cgi->param( 'idno' );
my $collid = $cgi->param( 'c' );
# set up ProcIns object
my $pio = $self->{'pio'};
# this is removed from HeaderFilter so that all we have to do is send HeaderFilter
# a ref to a string (this allows it to be called from several places
my $rset = $self->GetXPatResultSet( $name );
$rset->InitIterator();
my $headRef = $rset->GetNextLabeledResult( 'mainheader' );
$pio->AddPI( 'HEADER', \&_HeaderFilter, [ $headRef ] );
$pio->AddPI( 'SEARCH_WITHIN_LINKS', \&_SearchWithinLinks, [ $self, $cgi ] );
# $pio->AddPI( 'BOOKBAG_ADD_REMOVE_ITEM',
# \&TextClassUtils::BookbagAddRemoveItemButton, [ $cgi, $idno, $collid, $bbo ] );
$pio->AddPI( 'HEADER_TOC', \&_ScopedResultsFilter, [ $self, $cgi, $name, 1, $bbo, $idno ] );
# call generic PageHandling::ProcessPIs to do the work
$pio->ProcessPIs( $htmlPageRef );
}
# ----------------------------------------------------------------------
# NAME : FilterResultsForLowLelHeader
# PURPOSE :
#
# CALLED BY :
# CALLS :
# INPUT :
# RETURNS :
# GLOBALS :
# SIDE-EFFECTS :
# NOTES :
# ----------------------------------------------------------------------
sub FilterResultsForLowLelHeader
{
my $self = shift;
my ( $htmlPageRef, $cgi, $name, $bbo ) = @_;
my $idno = $cgi->param( 'idno' );
my $collid = $cgi->param( 'c' );
# set up ProcIns object
my $pio = $self->{'pio'};
my $rset = $self->GetXPatResultSet( $name );
$rset->InitIterator();
my $headRef = $rset->GetNextLabeledResult( 'mainheader' );
$pio->AddPI( 'HEADER', \&_HeaderFilter, [ $headRef ] );
$pio->AddPI( 'SEARCH_WITHIN_LINKS', \&_SearchWithinLinks, [ $self, $cgi ] );
# $pio->AddPI( 'BOOKBAG_ADD_REMOVE_ITEM',
# \&TextClassUtils::BookbagAddRemoveItemButton, [ $cgi, $idno, $collid, $bbo ] );
$pio->AddPI( 'HEADER_TOC', \&_PagesWithHitsFilter, [ $self, $cgi, $name ] );
# call generic PageHandling::ProcessPIs to do the work
$pio->ProcessPIs( $htmlPageRef );
}
# ----------------------------------------------------------------------
# NAME :
# PURPOSE :
#
# CALLED BY :
# CALLS :
# INPUT :
# RETURNS :
# GLOBALS :
# SIDE-EFFECTS :
# NOTES :
# ----------------------------------------------------------------------
sub _PagesWithHitsFilter
{
my $self = shift;
my ( $cgi, $name ) = @_;
my $s = '';
# using $name, get $sset so we can find PB that include the hits we want
my $sset = $self->GetSearchSet( $name );
my $mainHeaderRegionName = $sset->GetNamedSearchName( 'mainheaderregion' ) ;
my $qf = $self->GetQueryFactory( );
my $baseQuery = $qf->baseQuery( );
my $tm = $self->GetTermMapper( );
my $rgn = $tm->map('ITEM', 'synthetic', 'native');
# get PBs of pages that contain the hits we need
my $search = qq{pr.region."PB-T" ( region "PB-T" within ( ( region page incl ( $baseQuery ) ) within ( $rgn incl \*$mainHeaderRegionName ) ) );};
my $xpat = $self->GetXPatObject( );
my ( $error, $pbTagResults ) = $xpat->GetSimpleResultsFromQuery( $search );
# --------------------------------------------------
# get cgi params for building links
my $c = $cgi->param( 'c' );
my $cc = $cgi->param( 'cc' );
my $xc = $cgi->param( 'xc' );
my $idno = $cgi->param( 'idno' );
my $sid = $cgi->param( 'sid' );
&TextClassUtils::StripAllRSetCruft( \$pbTagResults );
## Handling of PB tags and possible links to page images
## PB tags have a REF if there is a corresponding page image
## They are of the form:
##
# build links for those PBs that have REFs
my @pages = split ( />/, $pbTagResults );
foreach my $pageTag ( @pages )
{
$pageTag =~ m,GetSimpleResultsFromQuery( $search );
$pageHits =~ s,?SSize>,,gs;
if ( $pageHits == 1 )
{ $pageHits .= ' match'; }
else
{ $pageHits .= ' matches'; }
my $link = qq{$TextClassUtils::gPageviewerCgi\?} .
qq{c=$c\&cc=$cc\&idno=$idno\&seq=$seq\&xc=$xc\&sid=$sid};
$s .= qq{Page $page $pageHits \n};
}
return qq{Pages with matches
\n$s\n};
}
# # ----------------------------------------------------------------------
# # NAME :
# # PURPOSE :
# #
# # CALLED BY :
# # CALLS :
# # INPUT :
# # RETURNS :
# # GLOBALS :
# # SIDE-EFFECTS :
# # NOTES :
# # ----------------------------------------------------------------------
# sub BuildLinksForPBsWithHits
# {
# my ( $sRef, $cgi ) = @_;
# my $newS = '';
# my $c = $cgi->param( 'c' );
# my $cc = $cgi->param( 'cc' );
# my $xc = $cgi->param( 'xc' );
# my $idno = $cgi->param( 'idno' );
# &TextClassUtils::StripAllRSetCruft( $sRef );
# ## Handling of PB tags and possible links to page images
# ## PB tags have a REF if there is a corresponding page image
# ## They are of the form:
# ##
# # build links for those PBs that have REFs
# my @pages = split ( />/, $$sRef );
# foreach my $pageTag ( @pages )
# {
# $pageTag =~ m,Page $page \n};
# }
# $$sRef = qq{ \n} . $newS;
# }
# ----------------------------------------------------------------------
# NAME : FilterResultsForText
# PURPOSE :
#
# CALLED BY :
# CALLS :
# INPUT :
# RETURNS :
# GLOBALS :
# SIDE-EFFECTS :
# NOTES :
# ----------------------------------------------------------------------
sub FilterResultsForText
{
my $self = shift;
my ( $htmlPageRef, $cgi, $view, $bbo ) = @_;
# grab idno info from node
my $idno = $cgi->param( 'node' );
$idno =~ s,(^[^:]+).*,$1,;
# set up ProcIns object
my $pio = $self->{'pio'};
$pio->AddPI( 'SCOPED_HEADS', \&_ScopedResultsFilter, [ $self, $cgi, $view, 0, $bbo, $idno ] );
# $pio->AddPI( 'TEXT', \&FilterTextForTextView, [ $self, $cgi, $view ] );
# call generic PageHandling::ProcessPIs to do the work
$pio->ProcessPIs( $htmlPageRef );
}
# ----------------------------------------------------------------------
# NAME : FilterResultsForNotes
# PURPOSE :
#
# CALLED BY :
# CALLS :
# INPUT :
# RETURNS :
# GLOBALS :
# SIDE-EFFECTS :
# NOTES :
# ----------------------------------------------------------------------
sub FilterResultsForNotes
{
my $self = shift;
my ( $htmlPageRef, $cgi, $view, $bbo ) = @_;
# set up ProcIns object
my $pio = $self->{'pio'};
$pio->AddPI( 'SCOPEDNOTE', \&NotesResultsFilter, [ $self, $cgi, $view, $bbo ] );
# call generic PageHandling::ProcessPIs to do the work
$pio->ProcessPIs( $htmlPageRef );
}
# ----------------------------------------------------------------------
# NAME : FilterResultsForReslist
# PURPOSE : filter results for a reslist view
# CALLED BY : main::OutputResults
# CALLS :
# INPUT :
# RETURNS : string of html
# GLOBALS : NONE
# SIDE-EFFECTS :
# NOTES :
# ----------------------------------------------------------------------
sub FilterResultsForReslist
{
my $self = shift;
my ( $htmlPageRef, $cgi, $view, $lel, $bbo ) = @_;
my $idno = $cgi->param( 'idno' );
my $sid = $cgi->param( 'sid' );
# set up ProcIns object
my $pio = $self->{'pio'};
$pio->AddPI( 'RESULTS_FRAME_HEADER', \&_ResultsFrameHeaderFilter, [ $self, $cgi, $view ] );
# for fisheye and prev and next slice links
$pio->AddPI( 'SLICE_NAVIGATION_LINKS', \&_BuildSliceNavigationLinks, [ $self, $cgi, $view ] );
if ( $lel >= 3 )
{
$pio->AddPI( 'RESULTS', \&_ScopedResultsFilter, [ $self, $cgi, $view, 1, $bbo, $idno ] );
}
else
{
$pio->AddPI( 'RESULTS', \&LowLelResultsFilter, [ $self, $cgi, $view, $lel, $bbo ] );
}
# call generic PageHandling::ProcessPIs to do the work
$pio->ProcessPIs( $htmlPageRef );
}
# ----------------------------------------------------------------------
# NAME : GetQueryFactory
# PURPOSE : retrieve reference to QueryFactory object for one collection
#
# CALLED BY : $self->AddQuery
# CALLS :
# INPUT : collection id
# RETURNS : reference to QF object
# GLOBALS :
# SIDE-EFFECTS :
# NOTES :
# ----------------------------------------------------------------------
sub GetQueryFactory
{
my $self = shift;
my $collid = shift;
return ( $self->{'queryfactory'} );
}
# ----------------------------------------------------------------------
# NAME : GetQueries
# PURPOSE : return an array of [label, query] arrays
# CALLED BY : $self->HtmlDumpSearchSet
# CALLS :
# INPUT :
# RETURNS : return an array of [label, query] arrays
# GLOBALS :
# SIDE-EFFECTS :
# NOTES :
# ----------------------------------------------------------------------
sub GetQueries
{
my $self = shift;
my $collid = shift;
my @returnArray = ();
push ( @returnArray, $self->{'queries'}{'mainsearch'} );
## need to add second search here too
foreach my $label ( sort ( keys %{$self->{'queries'}} ) )
{
my $query = $self->{'queries'}{$label} ;
push ( @returnArray, [ $label, $query ] );
}
return ( @returnArray );
}
# ----------------------------------------------------------------------
# NAME : GetValueByKey
# PURPOSE : retrieve a value for a particular key, in this object
# CALLED BY : HtmlDumpCollsInfo
# CALLS : NONE
# INPUT : key string
# RETURNS : value for that key (usually string)
# GLOBALS : NONE
# SIDE-EFFECTS : NONE
# NOTES :
# ----------------------------------------------------------------------
sub GetValueByKey
{
my $self = shift;
my $key = shift;
return $self->{$key};
}
# ----------------------------------------------------------------------
# NAME : StartXPatProcess
# PURPOSE : start an XPat Process with this object's dd file, patexec
# on this object's host and attach it to this object
# CALLED BY : main::HandleSearch
# CALLS :
# INPUT :
# RETURNS :
# GLOBALS :
# SIDE-EFFECTS :
# NOTES :
# ----------------------------------------------------------------------
sub StartXPatProcess
{
my $self = shift;
my $requestingHost = shift;
$self->{'xpat'} = new XPat (
$requestingHost,
$self->{'host'},
$self->{'dd'},
$self->{'patexec'},
$self->{'port'},
);
# if XPat had an error starting up, its status should be 'ERROR'.
# if so, set this TextClass's status to False
my $xpat = $self->GetXPatObject( );
$self->SetStatus( 'OK' );
my $xpatStatus = $xpat->GetStatus();
if ( $xpatStatus ne 'OK' )
{
$self->SetStatus ( 'XPAT_START_ERROR' );
die 'XPAT_START_ERROR' ;
}
}
# ----------------------------------------------------------------------
# NAME :
# PURPOSE :
#
# CALLED BY :
# CALLS :
# INPUT :
# RETURNS :
# GLOBALS :
# SIDE-EFFECTS :
# NOTES :
# ----------------------------------------------------------------------
sub KillXPat
{
my $self = shift;
my $xpat = $self->GetXPatObject( );
if ( defined ( $xpat ) )
{
$xpat->SendCommand( 'stop' );
}
}
# ----------------------------------------------------------------------
# NAME :
# PURPOSE :
#
# CALLED BY :
# CALLS :
# INPUT :
# RETURNS :
# GLOBALS :
# SIDE-EFFECTS :
# NOTES :
# ----------------------------------------------------------------------
sub SetStatus
{
my $self = shift;
my $status = shift;
$self->{'status'} = $status;
}
# ----------------------------------------------------------------------
# NAME : GetStatus
# PURPOSE : retrieve status of this object
# CALLED BY :
# CALLS :
# INPUT : NONE
# RETURNS : boolean
# GLOBALS :
# SIDE-EFFECTS :
# NOTES :
# ----------------------------------------------------------------------
sub GetStatus
{
my $self = shift;
return $self->{'status'};
}
# ----------------------------------------------------------------------
# NAME : LowLelResultsFilter
# PURPOSE :
#
# CALLED BY :
# CALLS :
# INPUT :
# RETURNS :
# GLOBALS :
# SIDE-EFFECTS :
# NOTES :
# ----------------------------------------------------------------------
sub LowLelResultsFilter
{
my $self = shift;
my ( $cgi, $name, $lel, $bbo ) = @_;
my $collid = $self->GetValueByKey( 'collid' );
my $rset = $self->GetXPatResultSet( $name );
$rset->InitIterator();
my $results = '';
my ( $label, $textRef, $byte ) ;
my $sid = $cgi->param( 'sid' );
while ( $label = $rset->SniffNextResult( ) )
{
( $label, $textRef, $byte ) = $rset->GetNextResult( );
# need to match beginnings of labels, not eq the entire label, because
# the labels likely end with session ids
if ( $label =~ m,^mainheader, )
{
my ( $mainHeaderResults, $idno ) =
$self->FilterMainHeader ( $textRef, $byte, $name, $cgi, $bbo ) ;
$results .= $mainHeaderResults;
}
}
return $results;
}
# ----------------------------------------------------------------------
# NAME : NotesResultsFilter
# PURPOSE : create a reslist view type filtering for results from
# a simple type search
# CALLED BY :
# CALLS :
# INPUT : name of result set, boolean for indenting or not
# RETURNS :
# GLOBALS :
# SIDE-EFFECTS :
# NOTES : The two following helper functions are abstract methods
# : which are over-ridden by the subclass via $self->
# ----------------------------------------------------------------------
sub FilterNoteContentParentForPopup {}
sub FilterNoteContentForPopup {}
sub NotesResultsFilter
{
my $self = shift;
my ( $cgi, $name, $bbo ) = @_;
my $collid = $self->GetValueByKey( 'collid' );
my $rset = $self->GetXPatResultSet( $name );
$rset->InitIterator();
my $results = '';
my ( $label, $textRef, $byte ) ;
my $sid = $cgi->param( 'sid' );
while ( $label = $rset->SniffNextResult( ) )
{
( $label, $textRef, $byte ) = $rset->GetNextResult( );
# need to match beginnings of labels, not eq the entire label, because
# the labels end with session ids
if ( $label =~ m,^notecontentparentrgn, )
{
my $noteParentContent = $self->FilterNoteContentParentForPopup( $textRef );
$results = "\n\n" . $self->TextFilter( \$noteParentContent, $collid, undef, $cgi ) . "\n\n";
}
elsif ( $label =~ m,^notecontent, )
{
my $noteContent = $self->FilterNoteContentForPopup( $textRef );
my $tmpResult = "\n\n" . $self->TextFilter( \$noteContent, $collid, undef, $cgi ) . "\n\n";
$results .= $tmpResult;
}
else
{
die "Bad search label: $label";
}
}
return $results;
}
# ----------------------------------------------------------------------
# NAME : _ScopedResultsFilter
# PURPOSE : create a reslist view type filtering for results from
# a simple type search
# CALLED BY :
# CALLS :
# INPUT : name of result set, boolean for indenting or not
# RETURNS :
# GLOBALS :
# SIDE-EFFECTS :
# NOTES :
# ----------------------------------------------------------------------
sub _ScopedResultsFilter
{
my $self = shift;
my ( $cgi, $name, $indent, $bbo, $idno ) = @_;
my $collid = $self->GetValueByKey( 'collid' );
my $rset = $self->GetXPatResultSet( $name );
$rset->InitIterator();
my $results = '';
my ( $label, $textRef, $byte ) ;
my $sid = $cgi->param( 'sid' );
while ( $label = $rset->SniffNextResult( ) )
{
if ( $indent )
{ $results .= $self->HtmlForIndentLevel ( $label, $sid ); }
else
{ $results .= " \n"; }
( $label, $textRef, $byte ) = $rset->GetNextResult( );
# need to match beginnings of labels, not eq the entire label, because
# the labels end with session ids
if ( $label =~ m,^mainheader, )
{
my ( $mainHeaderResults, $idno ) =
$self->FilterMainHeader ( $textRef, $byte, $name, $cgi, $bbo ) ;
$results .= $mainHeaderResults;
}
elsif ( $label =~ m,^div(\d+)head, )
{
my $level = $1;
my $headName = 'div' . $level . 'head';
my $rgnName = 'div' . $level;
my $requestedDiv = $cgi->param( 'rgn' );
$results .= $self->FilterDivhead( $textRef,
$level,
$headName,
$collid,
$rgnName,
$requestedDiv,
$cgi,
) ;
}
elsif ( $label =~ m,^kwic, )
{ $results .= $self->FilterKwic ( $textRef, $cgi, $name ); }
elsif ( $label =~ m,^fullregion, )
# should have $idno by now, so send it along with $collid
# for building pageviewer links
{ $results .= $self->TextFilter ( $textRef, $collid, $idno, $cgi ); }
else
{
$results .= $self->HtmlForIndentLevel ( 'FINISH', $sid );
}
}
$results .= $self->HtmlForIndentLevel ( 'FINISH', $sid );
return $results;
}
# ----------------------------------------------------------------------
# NAME : _ResultsFrameHeaderFilter
# PURPOSE : create the html at the beginning of the results frame
# CALLED BY :
# CALLS :
# INPUT : XPatResultSet object, cgi object
# RETURNS :
# GLOBALS :
# SIDE-EFFECTS :
# NOTES :
# ----------------------------------------------------------------------
sub _ResultsFrameHeaderFilter
{
my $self = shift;
my ( $cgi, $name ) = @_;
my $collName = $self->GetValueByKey( 'collname' );
my $rset = $self->GetXPatResultSet( $name );
# --------------------------------------------------
# this section creates the "your search resulted in X in Y records" string
my $matches = $rset->GetTotalMatches();
my $records = $rset->GetTotalRecords( );
my $matchString = $matches . ( ( $matches == 1 ) ? qq{ match} : qq{ matches} );
my $recordString = $records . ( ( $records == 1 ) ? qq{ record} : qq{ records} );
my $s = '';
$s .= qq{Search results for $collName\n
\n};
$s .= qq{\n};
# if boolean
if ( $cgi->param( 'type' ) eq 'boolean' )
{ $s .= qq{Your search in $collName resulted in $matchString\n \n}; }
# if simple or proximity
else
{ $s .= qq{Your search in $collName resulted in $matchString in $recordString\n \n}; }
# --------------------------------------------------
# --------------------------------------------------
# refine search link
my $link = &TextClassUtils::ChangeNavLink ( $cgi, $ENV{'SCRIPT_NAME'}, 'page', 'boolean' );
$s .= qq{ Refine this search};
$s .= qq{\n};
return $s;
}
# ----------------------------------------------------------------------
# NAME :
# PURPOSE :
#
# CALLED BY :
# CALLS :
# INPUT :
# RETURNS :
# GLOBALS :
# SIDE-EFFECTS :
# NOTES :
# ----------------------------------------------------------------------
sub _BuildSliceNavigationLinks
{
my $self = shift;
my ( $cgi, $name ) = @_;
my $collName = $self->GetValueByKey( 'collname' );
my $rset = $self->GetXPatResultSet( $name );
my $s = '';
# --------------------------------------------------
my $fisheyeString = &TextClassUtils::BuildFisheyeString ( $cgi,
$rset,
);
my $nextHitsLink = &TextClassUtils::BuildPrevNextHitsLink ( $cgi, $matches, 'next' );
my $prevHitsLink = &TextClassUtils::BuildPrevNextHitsLink ( $cgi, $matches, 'prev' );
# fisheye and slice info
$s .= qq{
\n
\n};
$s .= qq{
$fisheyeString
\n};
$s .= qq{
\n};
$s .= qq{
} .
qq{$prevHitsLink} . ' ' . qq{$nextHitsLink
\n};
$s .= qq{
\n
\n};
$s .= qq{\n};
return $s;
}
# ----------------------------------------------------------------------
# NAME : BuildRefineLink
# PURPOSE :
#
# CALLED BY :
# CALLS :
# INPUT :
# RETURNS :
# GLOBALS :
# SIDE-EFFECTS :
# NOTES :
# ----------------------------------------------------------------------
sub BuildRefineLink
{
my $cgi = shift;
my $scriptName = shift;
my @parmArray = @_;
## clone the cgi object
my $tempCgi = new CGI( $cgi );
## remove params that will be replaced or null
## (browse is being used to trigger stats gathering, but the nav bar
## links should always take you to the "start" of each type of search,
## and those should not be counted for stats purposes)
$tempCgi->delete( 'type' );
$tempCgi->delete( 'subtype' );
$tempCgi->delete( 'page' );
$tempCgi->delete( 'browse' );
$tempCgi->delete( 'view' );
$tempCgi->delete( 'idno' );
$tempCgi->delete( 'node' );
## now alter query string and replace
my ( $parm, $value ) = ( '', '' );
while ( @parmArray )
{
$parm = shift ( @parmArray );
$value = shift ( @parmArray );
$tempCgi->param( $parm, $value );
}
return ( $scriptName . '?' . $tempCgi->query_string );
};
# ----------------------------------------------------------------------
# NAME : HtmlForIndentLevel
# PURPOSE : generate the needed html bits (uls and lis) for getting
# to the right level of indentation when filtering results
# CALLED BY :
# CALLS :
# INPUT : label, session id
# RETURNS :
# GLOBALS :
# SIDE-EFFECTS :
# NOTES :
# ----------------------------------------------------------------------
sub HtmlForIndentLevel
{
my $self = shift;
my ( $label, $sid ) = @_;
# remove session id from end of label so we can compare the bare label
# with the hash of depths
$label =~ s,$sid$,,;
my $results = '';
my $currentDepth = $self->GetCurrentIndentDepth( ) || 0 ;
my $lastLabel = $self->GetLastIndentLabel( );
# find level to indent to based on incoming label
my %headDepths = $self->GetHeadDepths( );
my $targetDepth = $headDepths{$label};
# if we get two similar items in a row, no need to indent
# or outdent or change last label;
# just close last item and start new one
if ( $lastLabel eq $label )
{
# $results = "\n
" ;
$results = "\n" ;
}
# otherwise, we are changing levels
# if this is a low level label (those that occur as deepest results items),
# and not the same the last one, we have to indent just one level
elsif ( grep ( /$label/, $self->GetMaxDepthItems( ) ) )
{
$results = "
]*>.*,Section$buttons \n,s;
$$textRef =~ s,,,g; # this will happen in ot60...
# DlpsUtils routine to do entity filtering
FilterCharEnts_All( $textRef );
$results .= $$textRef;
return $results;
}
# ----------------------------------------------------------------------
# NAME : MakePrevNextSectionButtons
# PURPOSE :
#
# CALLED BY :
# CALLS :
# INPUT :
# RETURNS :
# GLOBALS :
# SIDE-EFFECTS :
# NOTES :
# ----------------------------------------------------------------------
sub MakePrevNextSectionButtons
{
my $self = shift;
my ( $textRef , $level ) = @_;
# find node info and get div hierarchy info from it
$$textRef =~ m,NODE=\"([^\"]*?)\",;
my $node = $1;
$node =~ m,([^:]*?):(.+),;
my $idno = $1;
my $divTree = $2;
my $divRegion = "DIV" . $level;
# get all the period separated numbers in the NODE value
my @divs = split ( /\./, $divTree );
# find the sequence number of the current DIV at this level (index
# of array is one less than the divlevel)
my $currentDiv = $divs[ $level - 1 ];
my $prevDiv = $currentDiv - 1;
my $nextDiv = $currentDiv + 1;
# create string with the first part of the NODE string up to
# the div level above where we currently are
my $newDivTree = '';
if ( ( $level - 1 ) > 0 )
{
$newDivTree = join ( ".", @divs[0..($level-1) ] . '.' );
}
# get ready to search for immediate sibling divs
my $xpat = $self->GetXPatObject( );
# only if there is a previous div, make a button
my ( $prevButton, $nextButton ) = ( '', '' );
my ( $prevError, $prevResult, $nextError, $nextResult);
my $coll = $self->GetValueByKey( 'collid' );
if ( $prevDiv >= 1 )
{
my $prevQuery =
qq{pr.region."$divRegion-T" \(region "$divRegion-T" incl \"NODE=} .
$idno . ':' . $newDivTree . $prevDiv . qq{ \"\);};
( $prevError, $prevResult) = $xpat->GetSimpleResultsFromQuery( $prevQuery );
# no error handling at the moment
if ( ! $prevError &&
$prevResult )
{ $prevButton = &TextClassUtils::MakeButtonLinkFromDivT( $prevResult, 'prev', $coll ); }
}
# next button
my $nextQuery = qq{pr.region."$divRegion-T" \(region "$divRegion-T" incl \"NODE=} .
$idno . ':' . $newDivTree . $nextDiv . qq{ \"\);};
( $nextError, $nextResult) = $xpat->GetSimpleResultsFromQuery( $nextQuery );
# no error handling at the moment
if ( ! $nextError && $nextResult )
{ $nextButton = &TextClassUtils::MakeButtonLinkFromDivT( $nextResult, 'next', $coll ); }
return qq{ } . $prevButton . qq{ } . $nextButton;
}
# ----------------------------------------------------------------------
# NAME :
# PURPOSE :
#
# CALLED BY :
# CALLS :
# INPUT :
# RETURNS :
# GLOBALS :
# SIDE-EFFECTS :
# NOTES :
# ----------------------------------------------------------------------
sub FilterKwic
{
my $self = shift;
my $textRef = shift;
my $cgi = shift;
my $name = shift;
my $rset = $self->GetXPatResultSet( $name );
my $results = '';
my $locale = $self->GetLocale();
&CleanResidualTags ( $textRef );
&SimpleHtmlFilter ( $textRef );
&HighlightHit ( $textRef, $cgi, $locale ); # carry hit along in $self????
# DlpsUtils routine to do entity filtering
FilterCharEnts_All( \$results );
$results .= $$textRef;
return $results;
}
# ----------------------------------------------------------------------
# NAME :
# PURPOSE :
#
# CALLED BY :
# CALLS :
# INPUT :
# RETURNS :
# GLOBALS :
# SIDE-EFFECTS :
# NOTES :
# ----------------------------------------------------------------------
# sub ProcessMainHeader
# {
# my $self = shift;
# my $rset = $self->GetXPatResultSet();
# my $results = '';
# # start html list
# $results .= qq{
\n};
# my $label;
# SNIFFMAINLOOP:
# while ( $label = $rset->SniffNextResult( ) )
# {
# if ( $label eq 'mainheader' )
# {
# my ( $label, $textRef ) = $rset->GetNextResult() ;
# $results .= qq{
,gs;
$profile =~ s,]*TYPE="[Pp][Uu][Bb][Ll]"[^>]*>(.*?),$1 ,gs;
$profile =~ s,]*TYPE="[Pp][Ee][Rr][Ff]"[^>]*>(.*?),$1 ,gs;
$profile =~ s,]*TYPE="edition"[^>]*>(.*?),$1 edition ,gs;
$profile =~ s,]*TYPE="[Gg][Ee][Nn][Rr][Ee]"[^>]*>(.*?),Genre: $1 ,gs;
$profile =~ s,]*TYPE="[Pp][Ee][Rr][Ii][Oo][Dd]"[^>]*>(.*?),$1 ,gs;
$$headRef =~ s,,$profile,;
}
$$headRef =~ s,]*>,,gs;
$$headRef =~ s,,,gs;
$$headRef =~ s,]*>(.*?),$1,gs;
$$headRef =~ s,]*>,,gs;
$$headRef =~ s,,,gs;
$$headRef =~ s,]*>(.*?),$1,gs;
$$headRef =~ s,]*>,,gs;
$$headRef =~ s,,,gs;
$$headRef =~ s,]*>(.*?),$1,gs;
# DlpsUtils routine to do entity filtering
FilterCharEnts_All( $headRef );
return $$headRef;
}
# ----------------------------------------------------------------------
# NAME : _SearchWithinLinks
# PURPOSE : Take in an id, return a scalar that contains
# text and links for searches restricted to that text
# CALLED BY :
# CALLS :
# INPUT :
# RETURNS :
# GLOBALS :
# SIDE-EFFECTS :
# NOTES : Was part of __thisTextToolbar in gums-idx.dev
# Needs work. Just stubbed in right now.
# ----------------------------------------------------------------------
sub _SearchWithinLinks
{
my $self = shift;
my $cgi = shift;
my $idno = $cgi->param( 'idno' );
my $tm = $self->GetTermMapper( );
my $xpat = $self->GetXPatObject( );
my $coll = $self->GetValueByKey( 'collid' );
my ( $searchLinks );
$searchLinks =
join ( "\n",
qq{simple search}, ',',
qq{proximity search}, 'or',
qq{boolean search}
);
# do pat search for main region
my($idRgn) = $tm->map('IDNO', 'synthetic', 'native');
my ( $error, $startEnd ) = $xpat->GetSimpleResultsFromQuery(qq{pr.region.main (region main incl ($idRgn incl "$idno")); });
if ( $ENV{'DEBUG'} )
{ print(qq{}); }
my ( $htmlLink, $htmlSize );
if ( $error )
{
$htmlLink = qq{Error returned from XPat search: $error\n};
}
elsif ( $startEnd =~ m,(\d+)(\d+), )
{
$htmlSize = $2 - $1;
if (($htmlSize / (1024 * 1024)) > 1)
{ $htmlSize = ' (~' . ((int(10 * ($htmlSize / (1024 * 1024)))) / 10) . ' megabytes)'; }
elsif (($htmlSize / 1024) > 1)
{ $htmlSize = ' (~' . ((int(10 * ($htmlSize / 1024))) / 10) . ' kilobytes)'; }
else
{ $htmlSize = " ($htmlSize bytes)"; }
$htmlLink = qq{
View the entire text$htmlSize.\n};
}
else
{
$htmlLink = qq{Nothing found with id number $idno in collection $coll\n};
}
return($searchLinks . $htmlLink);
}
# ----------------------------------------------------------------------
# NAME : TextFilter
# PURPOSE :
#
# CALLED BY :
# CALLS :
# INPUT :
# RETURNS :
# GLOBALS :
# SIDE-EFFECTS :
# NOTES : was separate gumsfilt.pl file
# ----------------------------------------------------------------------
sub TextFilter
{
my $self = shift;
my ( $sRef, $collid, $idno, $cgi ) = @_;
my $results = '';
## GUMS SGML filter. expects command line options for certain optional
## text-processing variables, and then text from STDIN, to be returned
## on STDOUT. tries to do some intelligent chunking of input so that
## things dont block.
## will process a HEADER in a no-frills way if present, otherwise
## just concentrates on the TEXT. this processes text such that it
## will be placed in an HTML BODY element, there are some things that
## the caller is responsible for (things in the HTML HEAD element,
## say)
## this filter might need to know:
## -O # optional requires for special
# routines (how to make EMBEDs or
# special hyperlinks)
## at the top of each loop through input line(s) there is a check to
## see if optional requires were made, and if so then
## &main::gumsFiltOptionals(\$i) is called (so the required file must
## define this, it must accept a reference to a scalar as the first
## and only argument).
## -F # url fragments for external entities
# like images or other auxiliary files
## -A # a path that is the system path represented
# by the -F argument, so this script can
# check to see whats there that fits
# the entity.
## this value, if present, gets used in the generic graphic/image
## filtering, assuming the value of the given entity attributes means
## a filename of some kind.
my @EntityExtensionsToCheck = (
'.gif',
'.jpeg',
'.jpg',
'.tif',
'.tiff',
'.mov',
'.GIF',
'.JPEG',
'.JPG',
'.TIF',
'.TIFF',
'.MOV',
);
my %HIstarts = (
'italic' => ' ',
'ital' => ' ',
'italics' => ' ',
'italics, underlined' => ' ',
'italics?' => ' ',
'smcap' => ' ',
'sc' => ' ',
'scital' => ' ',
'underlined' => ' ',
'und' => ' ',
'gothic' => ' ',
'underlined 2x' => ' ',
'underlined 3x' => ' ',
'indented' => ' ',
);
my %HIends = (
'italic' => ' ',
'ital' => ' ',
'italics' => ' ',
'italics, underlined' => ' ',
'italics?' => ' ',
'smcap' => ' ',
'sc' => ' ',
'scital' => ' ',
'underlined' => ' ',
'und' => ' ',
'gothic' => ' ',
'underlined 2x' => ' ',
'underlined 3x' => ' ',
'indented' => '',
);
if ($$sRef =~ s,(]*>.*?),,s)
{
my $header = $1;
$header = &_HeaderFilter( \$header );
$$sRef =~ s,,
$header
,;
}
## body below
## DIVs
while ($$sRef =~ s,
]*>(.*?)
,
$2
,gs) { }
while ($$sRef =~ s,
]*>,\n,gs) { }
## DIV-ish things that are not named DIV\d+
$$sRef =~ s,<(ARGUMENT|CLOSER|DEDICAT|EPIGRAPH|PREFACE|SIGNED|TRAILER)[^>]*>,
]*>(.*?),$1,gs;
$$sRef =~ s,]*>(.*?),$1,gs;
$$sRef =~ s,]*>,,gs;
$$sRef =~ s,,,gs;
s,]*>(.*?),$1,gs;
$$sRef =~ s,]*>,,gs;
$$sRef =~ s,, ,gs;
# this is a nasty non-gumsian hack to get around a need to show the
# genre and publication info for Yeats, which is not in the header.
# it's the best I can do with my feeble skills. sooty 10-25-99
$$sRef =~ s|(.*?)(.*?)([0-9]+)(.*?)|
About this work:
Author: $1
Gender: $5
Genre: $2
First published: $4
|gs;
# if I were a better programmer, I wouldn't have to do this for ones without genre. sooty 01-14-00
$$sRef =~ s|(.*?)([0-9]+)(.*?)|
About this work:
Author: $1
Gender: $4
This edition published: $2
|gs;
$$sRef =~ s,]*>(.*?),,gs;
## Hack for Pound page images
while ($$sRef =~ s,