#!/usr/bin/perl -w
############################################################################
#This will be my second attempt at writing a .html text
#database search script. Much for the following is owed to
#the authors and contributors to the Webpage search protocol
#Webglimpse, by Michael Smith, which was used as a starting 
#point for this program. Without it I don't think I could have
#ever gotten this thing to work (these lines said in hopeful
#anticipation).
#
#This program is brought to you by the letters G and L and I and M and P
#and S and E. =)
############################################################################ 
#/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\#
############################################################################
#		**** Global Varriables ****
#		**** Default Settings ****
############################################################################
#               **** System Configuration *****
############################################################################
#Tells the search where everything is at.
$glimpse_loc = "/usr/local/bin/glimpse";
$index_loc = "/home/hep/drschuet/public_html";

$cgi_url = "http://hep.uchicago.edu/cgi-bin"; 
$web_base = "http://hep.uchicago.edu/~drschuet/public_html";
$dir_base = "/home/hep/drschuet/public_html";

############################################################################
#               **** No More Configuration Needed ****
#               **** Below this line ****
############################################################################
#According to those who know greater things glimpse will sometimes need 
#to evoke system programs such as cat, sort, mv, and what not. Hence we
#need to set up a path so it can find them.
$ENV{'PATH'} = '/bin:/usr/bin:/usr/local/bin';

#Here we set the defaults values for the user inputs (yes I am borrowing a 
#lot here from Michael Smith's webglimpse).
$query = ' ';
$qs_case = ' '; 		# Case sensitive when 'on'.
$qs_errors = ' ';		# Number of spelling errors allowed in a match.
$qs_query = ' ';		# The actual search querry.
$qs_whole = ' ';		# Whole or partial word search.

$opt_case = "-i";
$opt_errors = ' ';
$opt_whole = ' ';

#It appears that as glimpse is indexing a direcotry it creates a file called
#indexing in process. Since a query cannot be run while an index is being
#constructed we need a way to lock out query attempts during indexing.
$lockfile = "indexing-in-progress";

$rslts_a = ' ';
$rslts_b = "\n";

############################################################################
#/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\#
############################################################################
#		**** The Program ****
############################################################################
$| = 1;

#Initial response to server.
print "Content-type: text/html\n\n";
print "<HTML>\n";
print "<HEAD>\n";

#Unsuprisingly we shall take the same approach as webglimpse and use this#
#cgi to construct a page for retrieval by the client using the GET method.#
#Thus we first need to results of the clients querry. Since the method we#
#are using is GET these results we recieve are sent in the QUERY_STRING#
#portion of the ENV our cgi recieves from the server. Here we get the#
#results if they exist or run the error no query subroutine if they do not.#
($query = $ENV{'QUERY_STRING'}) or &err_noquery ;

#These results are currently encoded in a nasty URL encoding. query is now#
#a set of namevalue pairs in string form each pair seperated from the#
#others by an  and all non alphaneumeric characters have been replaced#
#by their hexadecimal equivilnts. So we need to break up the $query string# 
#into namevalue pairs and the divide these namevalue pairs into the name#
#value elements appending a qs_ to the name element to make referencing#
#easier decode the code then set the name and varriable again equal to#
#each other though this time not as a string.#

foreach $bitowork (split (m/\&/, $query)) {
	$bitoname = '';
	$bitovalue = '';
	($bitoname, $bitovalue) = (split( m/=/, $bitowork));
	#Run a subroutine to decode the URL encoding.#
	$bitovalue = url_decoding($bitovalue);
	$bitoname = url_decoding($bitoname);
	#Anouther bit of code whose purpose I am a little unsure of.#
	#I belive that it ensures that the line contains only#
	#certian acceptible characters, some sort of security.#
	if ($bitoname =~ /^[a-zA-Z0-9_]*$/ ) {
		$varname = "qs_$bitoname";
		$$varname = "$bitovalue";
	}
}

#Make sure that glimpse isn't running an index.
if (-e "$index_loc/$lockfile"){
	&err_locked;
}

#Check to make sure that glimpse is on the machine.#
-x $glimpse_loc || &err_noglimpse($glimpse_loc);

#Check to make sure that the index is available.
#-r "$index_loc/.glimpse_index" || &err_noindex($index_loc);

#Now we use these new variables to construct the users options.
$opt_errors = "-$qs_errors" if $qs_errors =~ /^[0-8]$/;
$opt_case = " " if $qs_case =~ /^on$/;
$opt_whole = "-w" unless $qs_whole =~ /^on$/;

#Here is where we prepare the query before we give it to glimpse. We need
#to remove the everything which is not a word and replace it with a |; 
#except where the query begins with something which is not a word then we 
#must remove but not replace that non-word.
$prepared_query = prepare_query($qs_query);

#Check to make sure that the query is not empty. 
if (!(defined($prepared_query))) {
    &err_badquery;
}

#$prepared_query = '\b(' . $prepared_query . ')\b' if opt_whole;

#Here's where the real work occurs. We define the command for glimpse 
#and then hopefully get glimpse to run. Yet anouther result of 
#Webglimpses inspirational tutoring.
$cmd = "glimpse -l -O -y -H $index_loc $opt_case $opt_whole $opt_errors"
	. " \'$prepared_query\' > tmp_srch_rslts";
`$cmd`;

#print "<HR><CENTER>-*- Glimpse command: $cmd -*-</CENTER><HR>\n";

#Open the termprary results search file as OUTLAND 
if (!(open OUTLAND, "tmp_srch_rslts")) {
    print "<H3 ALIGN=center><FONT COLOR=yellow>Danger, Will Robinson. Danger!</FONT></H3>";
    &err_noglimpse;
}

#Check the return code
$rc = $? >> 8;
if ($rc!=0) {
	&err_badglimpse(<OUTLAND>);
}

#If it gets to this point it should be working. . .

#We begin our response now.
$rslts_a .= "<TITLE>Results for query \"$qs_query\"</TITLE>\n";
$rslts_a .= "<BASE HREF=\"$web_base\">\n";
$rslts_a .= "</HEAD>\n\n";
$rslts_a .= "<BODY BACKGROUND=\"backgrounds/bg1944.jpg\">\n";
$rslts_a .= "<H3 ALIGN=center><HR>\n";
$rslts_a .= "<FONT COLOR=blue>Results for query \`\`$qs_query\'\'</FONT>\n";
$rslts_a .= "<HR></H3>\n\n";

#Now we want to take this and use it to construct the final part of our
#web.

$rslts_b .= "<FORM ACTION=\"$cgi_url/mh_gamma\" METHOD=\"GET\">\n";
$rslts_b .= "<INPUT TYPE=\"hidden\" NAME=\"query_name\" VALUE=\"$qs_query\">\n";
$rslts_b .= "<CENTER> \n <TABLE BORDER>\n";

#Here we process the results of the search. Each line of the glimpse
#output is processed individually. First it is broken up into an array
#then we save the last two values in the array (the file iteself and the
#directory which immediatly proceeds it. Here I make the assumption 
#that your file structure is one and only one directory deep, i.e. that
#the database is stored in a directory immediatly attached to 
foreach $file (<OUTLAND>) {
        $f_ref = relref($file);
	@norc = split ("/", $f_ref);		
	$file = pop @norc;
#	$file_noext = rm_end($file);
	
	$rslts_b .= "<TR>\n";
	$rslts_b .= "<TD><A HREF=\"$f_ref\">$file</A></TD>\n";
	$rslts_b .= "<TD>\n";
	$rslts_b .= "<INPUT TYPE=\"hidden\" NAME=\"loc_$file\" VALUE=\"$f_ref\">\n";
	$rslts_b .= "<INPUT TYPE=\"hidden\" NAME=\"old_$file\" VALUE=\"LaTeX Source\">\n";
	$rslts_b .= "<SELECT NAME=\"new_$file\">\n";
        $rslts_b .= "<OPTION SELECTED>LaTeX Source<OPTION>\n";
	$rslts_b .= "<OPTION>LaTeX \.dvi<OPTION>\n";
	$rslts_b .= "<OPTION>Post Script<OPTION>\n";
	$rslts_b .= "</SELECT>\n";
	$rslts_b .= "</TD>\n";
	$rslts_b .= "<TD>\n";
	$rslts_b .= "<INPUT TYPE=\"submit\">\n";
	$rslts_b .= "</TD>\n";
	$rslts_b .= "<TD>\n";
	$rslts_b .= "<INPUT TYPE=\"reset\">\n";
	$rslts_b .= "</TD>\n";
        $rslts_b .= "</TR>\n";
}


#Attach an end note.
$rslts_b .=  "\n </TABLE> \n </CENTER> \n </FORM> \n </BODY> \n </HTML> \n";

#Publish a.
print "$rslts_a";

#Publish b.
print "$rslts_b";

#Put away all our toys.
close(OUTLAND);
`rm tmp_srch_rslts`;

 
############################################################################
#/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\#
############################################################################
#		<--> Subroutines <-->
############################################################################
#The script was called without a query.
sub err_noquery {
    print "<TITLE>No Querry</TITLE>\n";
    print "</HEAD>\n\n";
    print "<BODY>\n";
    print "<H3 ALIGN=center>No Query</H3>\n";
    print "<P>The script was called withour a query.</P>\n\n";
    print "</BODY>\n\n</HTML>";
    exit -1;
}

############################################################################
#Glimpse could not be found.
sub err_noglimpse {
    print "<TITLE>No Glimpse</TITLE>\n";
    print "</HEAD>\n\n";
    print "<BODY>\n";
    print "<H3 ALIGN=center>Glimpse was not found</H3>\n";
    print "<CENTER>Using: $glimpse_loc</CENTER>\n\n";
    print "</BODY>\n\n</HTML>";
    exit -1;
}

############################################################################
#Glimpse has an error.
sub err_badglimpse {
    my(@glines) = @_;
    print "<TITLE>Bad Glimpse</TITLE>\n";
    print "</HEAD>\n\n";
    print "<BODY>\n";
    print "<H3 ALIGN=center>An error has occured with glimpse.</H3>\n";
    print "<P>The search papameters resulted in an error with glimpse.";
    print "Please try your search again with diffrent parameters.</P>\n";
    print "<HR>\n";
    print "<P>Output from glimpse:\n";
    print "<PRE>@glines</PRE>\n";
    print "<HR>\n\n";
    print "</BODY>\n\n</HTML>";
    exit -1;
}

############################################################################
#Glimpse index could not be found.
sub err_noindex {
    local($index_loc) = @_;
    print "<TITLE>Glimpse Index Not Found</TITLE>\n";
    print "</HEAD>\n\n";
    print "<BODY>\n";
    print "<H3 ALIGN=center>Glimpse Index Not Found</H3>\n";
    print "<P>The glimpse index was either not found or it could not be accessed.</P>\n";
    print "<CENTER>Using: $index_loc</P><CENTER>\n\n";
    print "</BODY>\n\n</HTML>";
    exit -1;
}

############################################################################
#Glimpse configuration file missing.
sub err_conf {
    print "<TITLE>Glimpse configuration file missing</TITLE>\n";
    print "</HEAD>\n\n";
    print "<BODY>\n";
    print "<H3 ALIGN=center>Glimpse configuration file missing</H3>\n\n";
    print "</BODY>\n\n</HTML>";
    exit -1;
}

############################################################################
sub err_badquery {
    print "<TITLE>Bad query</TITLE>\n";
    print "</HEAD>\n\n";
    print "<BODY>\n";
    print "<H3 ALIGN=center>Bad Query</H3>\n";
    print "<P>The query contains no word and will thus take to long.";
    print "Please try your search again with diffrent parameters.</P>\n\n";
    print "</BODY>\n\n</HTML>";
    exit -1;
}

############################################################################
#Glimpse is indexing
sub err_locked {
    print "<TITLE>Indexing in progress</TITLE>\n";
    print "</HEAD>\n\n";
    print "<BODY>\n";
    print "<H3 ALIGN=center>Indexing in progress</H3>\n";
    print "<P>Please try query again later.</P>\n\n";
    print "</BODY>\n\n</HTML>";
    exit -1;
}

############################################################################
sub url_decoding {
    #This bit of decoding is borrowed from a contribution to 
    #Webglimpse which I believe as writen by Peter Bigot, although
    #the actual code appears to have been added by someone else.
    local($_) = @_;
    s/\+/ /g;
    s/%([A-Fa-f0-9]{2})/pack("c",hex($1))/ge;
    return $_
}

############################################################################
sub prepare_query {
    local($_) = @_;
    s/^\W+|\W+$//;
    s/\W+/\;/g;
    return $_;
}

############################################################################
sub rm_end {
	local($_) = @_;
	s/\.(...)//;
	return $_;
}

############################################################################
sub relref {
        local($_) = @_;
	s/^$dir_base\///;
	return $_;
}









