/* @(#)html.c 1.9 95/09/16 */

/*
 * Copyright (c) 1994, 1995 by Wayne C. Gramlich.  All rights reserved.
 *
 * Permission to use, copy, modify, distribute, and sell this software
 * for any purpose is hereby granted without fee provided that the above
 * copyright notice and this permission are retained.  The author makes
 * no representations about the suitability of this software for any purpose.
 * It is provided "as is" without express or implied warranty.
 */

/* LINTLIBRARY */

#include "assert.h"
#include "config_extern.h"
#include "chr_extern.h"
#include "error_extern.h"
#include "html_extern.h"
#include "http_extern.h"
#include "link_extern.h"
#include "memory_extern.h"
#include "remote_extern.h"
#include "str_extern.h"
#include "text_extern.h"
#include "vector_extern.h"
#include "url_extern.h"

#define ATTRIBUTES_LOOP(variable, attributes) \
    VECTOR_LOOP(Attribute, variable, attributes)

const Chr marker_chr = (Chr)1;		

struct attribute_struct {
    Attribute_type type;	/* Attribute name */
    Text white_space;		/* White space before attribute name/value */
    Text name;			/* Attribute name (or 0 if not present) */
    Str upper_case_name;	/* Name as an upper case string */
    Text equals;		/* The "=" with all surrounding white space */
				/* 0 means no "=" value is present */
    Text value;			/* The Attribute value including quotes */
};

struct html_struct {
    unsigned index;		/* Current index into {tags} */
    Tags tags;			/* Tags making up entire document */
};

struct match_struct {
    unsigned end_index;		/* Index into {tags} vector of {html} */
    unsigned end_offset;	/* Character offset to beginning of tag */
    Tag end_tag;		/* The {tag} containing the pattern end */
    Html html;			/* {Html} object match was found in */
    unsigned marker_index;	/* Index into {tags} vector of {html} */
    unsigned marker_offset;	/* Character offset to beginning of tag */
    Tag marker_tag;		/* The {tag} containing the pattern marker */
    Str pattern;		/* Pattern that matched */
    Remote remote;		/* Remote object associated with match */
    unsigned start_index;	/* Index into {tags} vector of {html} */
    unsigned start_offset;	/* Character offset to beginning of tag */
    Tag start_tag;		/* The {tag} containing the pattern start */
};

struct tag_struct {
    Tag_type type;		/* Type of Html */
    Text preceeding_text;	/* Text and white-space preceeding "<" */
    Text open_bracket;		/* Characters from "<" to next token */
    Text name;			/* Tag name (excluding any preceeding "/") */
    Str upper_case_name;	/* Upper case tag name as a string */
    Attributes attributes;	/* Attribute list */
    Text close_bracket;		/* End of last attribute up to closing ">" */
};

static void attributes_append(Attributes, Attribute);
static Attributes attributes_create(void);
static Attributes attributes_parse(Text);
static void attributes_write(Attributes, FILE *);

static Html html_create(void);

static match_compare(Match, Match, void *);

static Match matches_fetch(Matches, unsigned);
static unsigned matches_size(Matches);

static Tag tag_create(void);
static int tag_is_head(Tag);
static void tag_write(Tag, FILE *);

static void tags_append(Tags, Tag);
static Tags tags_create(void);
static Tag tags_fetch(Tags, unsigned);
static unsigned tags_size(Tags);

/*
 * attributes_append(attributes, attribute)
 *	This routine will append {attribute} to {attributes}.
 */
void
attributes_append(
    Attributes attributes,
    Attribute attribute)
{
    vector_append((Vector)attributes, (Pointer)attribute);
}

/*
 * attributes_create()
 *	This routine will create and return an empty {Attributes} object.
 */
static Attributes
attributes_create(void)
{
    Attributes attributes;

    attributes = (Attributes)vector_create();
    return attributes;
}

/*
 * attributes_write(attribute, out_file)
 *	This routine will write {attribute} to {out_file}.
 */
static void
attributes_write(
    Attributes attributes,
    FILE *out_file)
{
    Attribute attribute;

    ATTRIBUTES_LOOP(attribute, attributes) {
	text_write(attribute->white_space, out_file);
	if (attribute->name != (Text)0) {
	    text_write(attribute->name, out_file);
	    assert(attribute->equals != (Text)0);
	    text_write(attribute->equals, out_file);
	}
	switch (attribute->type) {
	    case Attribute_name_value:
		text_write(attribute->value, out_file);
		break;
	    case Attribute_single_quote:
		(void)fprintf(out_file, "'");
		text_write(attribute->value, out_file);
		(void)fprintf(out_file, "'");
		break;
	    case Attribute_double_quote:
		(void)fprintf(out_file, "\"");
		text_write(attribute->value, out_file);
		(void)fprintf(out_file, "\"");
		break;
	    default:
		assert(0);
	}
    }
}

/*
 * attributes_parse(text)
 *	This routine will parse as many attributes off of {text} as
 *	possible and return them.
 */
static Attributes
attributes_parse(
    Text text)
{
    Attribute attribute;
    Attributes attributes;
    Chr chr;
    unsigned index;

    attributes = attributes_create();

    /* Slurp up each attribute: */
    index = 0;
    for (;;) {
	while (chr_is_white_space(text_chr_fetch(text, index))) {
	    index++;
	}
	chr = text_chr_fetch(text, index);
	if ((chr == '\'') || (chr == '"')) {
	    /*
	     * SGML permits the attribute name to be dropped when
	     * short tags are enabled as long as there is no ambiguity.
	     * Nobody really writes HTML this way though.
	     */
	    goto messed_up;
	}
	if (!chr_is_name_start(text_chr_fetch(text, index))) {
	    break;
	}

	/* Allocate the {Attribute} object: */
	attribute = (Attribute)memory_allocate_zeroed(sizeof *attribute);
	attribute->white_space = text_lop_shallow(text, index);

	/* Get the attribute name: */
	attribute->name = text_lop_name_shallow(text);
	attribute->upper_case_name = text_str_upper_case(attribute->name);
	index = 0;

	/* Go searching for the '=': */
	while (chr_is_white_space(text_chr_fetch(text, index))) {
	    index++;
	}
	if (text_chr_fetch(text, index) == '=') {
	    Chr quote;

	    /* Found it: */
	    index++;
	    attribute->equals = text_lop_white_space_shallow(text, index);
	    index = 0;

	    /* Slurp in the value: */
	    chr = text_chr_fetch(text, index);
	    if ((chr == '\'') || (chr == '"')) {
		/*
		 * Quoted literal string.  This is a really sloppy string
		 * parser.  A careful reading of the SGML grammar shows
		 * that we should only allow "replaceable character data"
		 * inside a string.  Since nobody who writes HTML has
		 * the foggiest notion of what "replaceable character data"
		 * is we won't commit the sin being that careful.  Instead,
		 * we'll do what all the other HTML parsers do -- scan
		 * until a closing quote is found.
 		 */
		quote = chr;
		/* Drop quote on floor */
		text_free_shallow(text_lop_shallow(text, 1));
		index = 0;
		for (;;) {
		    chr = text_chr_fetch(text, index);
		    if (chr == '\n') {
			/* Unterminated string: */
			goto messed_up;
		    }
		    if ((chr == quote) || (chr == '\n')) {
			break;
		    }
		    index++;
		}
		attribute->value = text_lop_shallow(text, index);
		attribute->type = (chr == '"') ?
		  Attribute_double_quote : Attribute_single_quote;
		index = 0;
		/* Drop quote on floor */
		text_free_shallow(text_lop_shallow(text, 1));
	    } else {
		/*
		 * "True" SGML only permits a name here.  However, all
		 * HTML parsers are so sloppy that they allow anything
		 * excluding a white-space here.  Since we are not on
		 * a religeous SGML crusade, we let the slop through.
		 */
		for (;;) {
		    chr = text_chr_fetch(text, index);
		    if ((chr == '>') || chr_is_white_space(chr)) {
			break;
		    }
		    index++;
		}
		attribute->value = text_lop_shallow(text, index);
		attribute->type = Attribute_name_value;
	    }
	    index = 0;
	} else {
	    /*
 	     * The attribute name is really its value.  This is only
	     * valid when short tags are enabled and there is no
	     * ambiguity.  Nobody writes HTML this way though.
	     */
	    attribute->type = Attribute_name_value;
	    attribute->equals = (Text)0;
	    attribute->value = attribute->name;
	    attribute->name = (Text)0;
	    attribute->upper_case_name = (Str)0;
	}
	attributes_append(attributes, attribute);
    }
    return attributes;
  messed_up:
    return attributes;
}

/*
 * attributes_scan(attributes, remote, config)
 *	This routine will scan the attributes in {attributes} looking
 *	for enough information to add a new {Link} object to the
 *	{links} list in {remote}.  {config} is used to
 *	figure out whether the HREF attribute points to a local
 *	document.
 */
void
attributes_scan(
    Attributes attributes,
    Remote remote,
    Config config)
{
    Str href;

    /* Search for DESCRIBE, HREF, NAME attributes: */

    /* See whether <A> tag has an <HREF>: */
    href = attributes_search(attributes, (Str)"HREF");
    if (!str_is_empty(href)) {
	Url local_url;

	local_url = url_parse(href);
	if (url_is_local(local_url, config)) {
	    /* Reference to document on this machine: */
	    Str describe;
	    Link link;
	    Links links;
	    Str name;

	    describe = attributes_search(attributes, (Str)"DESCRIBE");
	    name = attributes_search(attributes, (Str)"NAME");
	    link = link_create(remote, local_url, name, describe);
	    links = remote_links(remote);
	    links_append(links, link);
	}
    }
}

/*
 * attributes_search(attributes, name)
 *	This routine will return the value associated with {name} in
 *	{attributes}.  {name} must be upper-case.  If {name} is not
 *	in {attributes}, {(Str)""} is returned.
 */
Str
attributes_search(
    Attributes attributes,
    Str name)
{
    Attribute attribute;

    ATTRIBUTES_LOOP(attribute, attributes) {
	if (str_equal(attribute->upper_case_name, name)) {
	    return text_str_copy(attribute->value);
	}
    }
    return (Str)"";
}

/*
 * html_create()
 *	This routine will create and return an empty Html object.
 */
static Html
html_create(void)
{
    Html html;

    html = (Html)memory_allocate_zeroed(sizeof *html);
    return html;
}

/*
 * html_make_absolute(html, base_url)
 *	This routine will go through {html} converting all relative
 *	URL's into absolute URL's using {base_url} as the base.
 */
void
html_make_absolute(
    Html html,
    Url base_url)
{
    Tag tag;
    unsigned tag_index;
    Tags tags;
    unsigned size;
    
    tags = html->tags;
    size = tags_size(tags);
    for (tag_index = 0; tag_index < size; tag_index++) {
	Attribute attribute;
	Attributes attributes;

	tag = tags_fetch(tags, tag_index);
	attributes = tag->attributes;
	if (attributes != (Attributes)0) {
	    ATTRIBUTES_LOOP(attribute, attributes) {
		Str upper_case_name;

		upper_case_name = attribute->upper_case_name;
		if (str_equal(upper_case_name, (Str)"HREF") ||
		  str_equal(upper_case_name, (Str)"SRC")) {
		    Url new_url;
		    unsigned new_url_size;
		    Str new_url_str;
		    Text new_url_text;
		    Url original_url;
		    Str original_url_str;
		    Text original_url_text;

		    original_url_text = attribute->value;
		    original_url_str = text_str_copy(original_url_text);
		    original_url = url_parse(original_url_str);
		    new_url = url_make_absolute(original_url, base_url);
		    new_url_str = url_str(new_url);
		    new_url_size = str_size(new_url_str);
		    new_url_text = text_create(new_url_str, new_url_size);
		    attribute->value = new_url_text;
		}
	    }
	}
    }
}

/*
 * html_parse(text)
 *	This routine will parse {text} into a bunch of Html objects
 *	and return them.
 */
Html
html_parse(
    Text text)
{
    unsigned index;
    Html html;
    Tags tags;

    tags = tags_create();
    html = html_create();

    tags = tags_create();
    html->tags = tags;

    text = text_copy_shallow(text);
    for (;;) {
	Chr chr;
	int offset;
	Tag tag;

	tag = tag_create();
	tag->attributes = (Attributes)0;
	tag->upper_case_name = (Str)"";
	offset = text_chr_search(text, '<');
	if (offset < 0) {
	    /* No more tags.  Fill in the last {html} object and leave: */
	    tag->type = Tag_end_of_file;
	    tag->preceeding_text = text;
	    tags_append(tags, tag);
	    break;
	}

	tag->preceeding_text = text_lop_shallow(text, (unsigned)offset);
	if (text_size(text) <= 1) {
	    goto messed_up;
	}

	index = 1;
	while (chr_is_white_space(text_chr_fetch(text, index))) {
	    /*
	     * White-space after '<' is not permitted in "true"
	     * SGML.  Most people who write HTML neither know nor care
	     * about correct SGML syntax.  Hence, we will silently
	     * accept the white-space.
	     */
	    index++;
	}

	chr = text_chr_fetch(text, index);
	switch (chr) {
	    case '!':
	      {
		/*
		 * The only SGML declarations allowed in HTML should be
		 * be comments.
		 */
		tag->type = Tag_comment;
		tag->open_bracket = text_lop_shallow(text, index);

		/* Sloppy!  Should look for "--" */
		break;
	      }
	    case '?':
		/*
 		 * While "true" SGML permits "<? ...>" markup.  No
		 * HTML should ever have it.
		 */
		goto messed_up;
	    case '/':
		/* End-tag: */
		/* Again, silently accept any bogus white space: */
		tag->type = Tag_end;
		index++;
		tag->open_bracket = text_lop_white_space_shallow(text, index);

		/* Get the tag name: */
		tag->name = text_lop_name_shallow(text);
		tag->upper_case_name = text_str_upper_case(tag->name);
		index = 0;
		break;
	    case '>':
		/*
		 * Empty Start-tag:
 		 */
		goto messed_up;
	    default:
	      {
		if (!chr_is_name_start(chr)) {
		    /* Messed up tag: */
		    goto messed_up;
		}
		tag->type = Tag_start;
		tag->open_bracket = text_lop_shallow(text, index);

		/* Get the tag name: */
		tag->name = text_lop_name_shallow(text);
		tag->upper_case_name = text_str_upper_case(tag->name);
		index = 0;

		tag->attributes = attributes_parse(text);
	      }
	}

	/* Slurp up everything to the end-tag: */
	while (chr_is_white_space(text_chr_fetch(text, index))) {
	    index++;
	}
	if (text_chr_fetch(text, index) == '>') {
	    index++;
	    tag->close_bracket = text_lop_shallow(text, index);
	} else {
	    /* For now, slurp else everything up: */
	    offset = text_chr_search(text, '>');
	    if (offset >= 0) {
		tag->close_bracket = text_lop_shallow(text, offset + 1);
	    } else {
		goto messed_up;
	    }
	}
	assert(tag != (Tag)0);
	tags_append(tags, tag);
    }
    return html;
  messed_up:
    return (Html)0;
}

/*
 * html_read(url, errors)
 *	This routine will fetch and return the document named by {url}.
 *	Any errors are indicated by returning (Url)0.
 */
Html
html_read(
    Url url,
    Errors errors)
{
    Html html;
    Text text;

    text = http_url_read(url, errors);
    if (text == (Text)0) {
	return (Html)0;
    }
    html = html_parse(text);
    return html;
}

/*
 * html_search(html, pattern, remote)
 *	This routine will find {pattern} in {html} and return an associated
 *	{Match} object; otherwise, (Match)0 is returned.  {pattern} must
 *	be one or more words separated by single spaces.  {remote} is
 *	saved in the resulting {Match} object.
 */
Match
html_search(
    Html html,
    Str pattern,
    Remote remote)
{
    /* The variable names are the same as the {Match} structure. */
    Chr pattern_first;
    unsigned pattern_size;
    unsigned start_index;
    unsigned size;
    Tags tags;
    
    pattern_size = str_size(pattern);
    pattern_first = chr_to_upper(pattern[0]);
    tags = html->tags;
    size = tags_size(tags);
    for (start_index = 0; start_index < size; start_index++) {
	unsigned start_offset;
	Tag start_tag;
	Text start_text;
	unsigned start_text_size;

	start_tag = tags_fetch(tags, start_index);
	if (tag_is_head(start_tag)) {
	    continue;
	}
	start_text = start_tag->preceeding_text;
	start_text_size = text_size(start_text);
	for (start_offset = 0;
	  start_offset < start_text_size; start_offset++) {
	    unsigned end_index;
	    int matched_white_space;
	    unsigned pattern_offset;
	    unsigned marker_index;
	    unsigned marker_offset;
	    Tag marker_tag;

	    if (chr_to_upper(text_chr_fetch(start_text, start_offset)) !=
	      pattern_first) {
		continue;
	    }
	    marker_tag = (Tag)0;

	    /* First character matches! Try to match remaining ones:*/
	    pattern_offset = 0;
	    matched_white_space = 0;
	    for (end_index = start_index; end_index < size; end_index++) {
		unsigned end_offset;
		unsigned end_offset_start;
		Tag end_tag;
		Text end_text;
		unsigned end_text_size;

		end_tag = tags_fetch(tags, end_index);
		end_text = end_tag->preceeding_text;
		end_text_size = text_size(end_text);
		end_offset_start =
		  (start_index == end_index) ? start_offset : 0;
		for (end_offset = end_offset_start;
		  end_offset < end_text_size; end_offset++) {
		    Chr end_chr;
		    Chr pattern_chr;

		    end_chr =
		      chr_to_upper(text_chr_fetch(end_text, end_offset));
		    pattern_chr = chr_to_upper(pattern[pattern_offset]);
		    if (pattern_chr == marker_chr) {
			pattern_offset += 1;
			marker_index = end_index;
			marker_tag = end_tag;
			marker_offset = end_offset;
			pattern_chr = chr_to_upper(pattern[pattern_offset]);
		    }
		    if (chr_is_white_space(end_chr)) {
			if (matched_white_space) {
			    /* Already matched white space in pattern: */
			    continue;
			}
			end_chr = (Chr)' ';
		    }
		    if (end_chr != pattern_chr) {
			goto no_match;
		    }
		    if (pattern_chr == (Chr)' ') {
			matched_white_space = 1;
		    }
		    pattern_offset += 1;
		    if (pattern_offset == pattern_size) {
			/* We have a complete match! */
			Match match;

			end_offset += 1;
			if (marker_tag == (Tag)0){
			    marker_index = end_index;
			    marker_tag = end_tag;
			    marker_offset = end_offset;
			}
			match = (Match)memory_allocate(sizeof *match);
			match->end_index = end_index;
			match->end_tag = end_tag;
			match->end_offset = end_offset;
			match->html = html;
			match->marker_index = marker_index;
			match->marker_tag = marker_tag;
			match->marker_offset = marker_offset;
			match->pattern = pattern;
			match->remote = remote;
			match->start_index = start_index;
			match->start_tag = start_tag;
			match->start_offset = start_offset;
			return match;
		    }
		}
	    }
	  no_match:;
	}
    }
    /* No match: */
    return (Match)0;
}

/*
 * html_tags(html)
 *	This routine will return the tags associated with {html}.
 */
Tags
html_tags(
    Html html)
{
    return html->tags;
}

/*
 * html_write_public_annotation(html, out_file, url_name, name_value, pattern)
 *	This routine will write out the name...
 */
void
html_write_public_annotation(
    Html html,
    FILE *out_file,
    Str url_name,
    Str name_value)
{
    unsigned index;
    int insertion_point_found;
    unsigned size;
    Tag tag;
    Tags tags;

    /*
     * The text of an annotation starts immediately after the <PRE> tag.
     */
    insertion_point_found = 0;
    tags = html->tags;
    size = tags_size(tags);
    for (index = 0; index < size; index++) {
	Str upper_case_name;

	tag = tags_fetch(tags, index);
	upper_case_name = tag->upper_case_name;
	text_write(tag->preceeding_text, out_file);
	tag_write(tag, out_file);
	if (!insertion_point_found &&
	  (tag->type == Tag_start) &&
	  str_equal(upper_case_name, (Str)"PRE")) {
	    /* Found the insertion point: */
	    insertion_point_found = 1;

	    /* Delete any previous hypertext link: */
	    if (index + 5 < size) {
		Tag tag1;
		Tag tag2;
		Tag tag3;
		Tag tag4;
		Tag tag5;

		tag1 = tags_fetch(tags, index + 1);
		tag2 = tags_fetch(tags, index + 2);
		tag3 = tags_fetch(tags, index + 3);
		tag4 = tags_fetch(tags, index + 4);
		tag5 = tags_fetch(tags, index + 5);
		if ((tag1->type == Tag_end) &&
		  str_equal(tag1->upper_case_name, (Str)"PRE") &&
		  (tag2->type == Tag_start) &&
		  str_equal(tag2->upper_case_name, (Str)"A") &&
		  str_equal(text_str_copy(tag2->preceeding_text),
		  (Str)"\nPublic annotation for\n") &&
		  (tag3->type == Tag_end) &&
		  str_equal(tag3->upper_case_name, (Str)"A") &&
		  (tag4->type == Tag_start) &&
		  str_equal(tag4->upper_case_name, (Str)"P")) {
		    /* Delete next 4 tags */
		    index += 4;

		    /* Delete new-line at beginning of {tag5}. */
		    (void)text_lop_shallow(tag5->preceeding_text, 1);
		}
	    }

	    /* Output the new hypertext link: */
	    (void)fprintf(out_file,
	      "</pre>\n"
	      "Public annotation for\n"
/**/	      "<A HRef=\"%s\"\n"
/**/	      "Name=\"%s\">\n"
/**/	      "%s</A>.<P>\n",
	      url_name, name_value, url_name);
	}
    }
}

/*
 * html_write_remaining(html, out_file, with_markup)
 *	This routine will resume writing the tags {Html} starting with
 *	tag in {html} to {out_file}.  If {with_markup} is 1, all SGML
 *	markup will be output; otherwise, all SGML markup will be omitted.
 */
void
html_write_remaining(
    Html html,
    FILE *out_file,
    int with_markup)
{
    unsigned index;
    Tag tag;
    Tags tags;
    unsigned size;

    tags = html->tags;
    size = tags_size(tags);
    
    index = html->index;
    if (index < size) {
	tag = tags_fetch(tags, html->index);
	if (with_markup) {
	    tag_write(tag, out_file);
	}
    }
    for (index += 1; index < size; index++) {
	tag = tags_fetch(tags, index);
	text_write(tag->preceeding_text, out_file);
	if (with_markup) {
	    tag_write(tag, out_file);
	}
    }
}

/*
 * html_write_to_body_end(html, out_file, matches, with_markup)
 *	This routine will write write the {Html} chain starting with {html}
 *	to {out_file} until either a </Body> or </HTML> tag is encountered.
 *	If {with_markup} is 1, all SGML markup will be output; otherwise,
 *	all SGML markup will be omitted.  The {Html} containg the </Body>
 *	or </HTML> tag is returned.
 */
void
html_write_to_body_end(
    Html html,
    FILE *out_file,
    Matches matches,
    int with_markup)
{
    Tag tag;
    Tags tags;
    unsigned index;
    Match match;
    int start_match;
    unsigned matches_index;
    unsigned matches_total;
    unsigned size;

    /* Fetch the first match object: */
    match = (Match)0;
    start_match = 1;
    matches_index = 0;
    matches_total = matches_size(matches);
    if (matches_total != 0) {
	match = matches_fetch(matches, 0);
    }

    tags = html->tags;
    tag = tags_fetch(tags, html->index);
    if (with_markup) {
	tag_write(tag, out_file);
    }
    size = tags_size(tags);
    for (index = html->index + 1; index < size; index++) {
	Str upper_case_name;
	Text text;
	unsigned text_offset;
	unsigned text_length;

	tag = tags_fetch(tags, index);
	upper_case_name = tag->upper_case_name;
	if ((tag->type == Tag_end) &&
	  (str_equal(upper_case_name, (Str)"BODY") ||
	  str_equal(upper_case_name, (Str)"HTML"))) {
	    break;
	}
	text = tag->preceeding_text;
	text_length = text_size(text);
	text_offset = 0;
	while ((match != (Match)0) &&
	  (start_match && (match->start_index == index)) ||
	  (!start_match && (match->end_index == index))) {

	    if (start_match) {
		unsigned start_offset;

		start_match = 0;

		/* Output any intervening text: */
		start_offset = match->start_offset;
		text_write_range(text, out_file,
		  text_offset, start_offset - text_offset);
		text_offset = start_offset;

		/* Output the anchor name and <B>: */
		(void)fprintf(out_file,
		  "<B><I><A Name=\"WAIS_FETCH\">");
		(void)fflush(out_file);
	    } else {
		/* End Match */
		unsigned end_offset;

		start_match = 1;
		/* Ouput intervening text: */

		end_offset = match->end_offset;
		text_write_range(text, out_file,
		  text_offset, end_offset - text_offset);
		text_offset = end_offset;

		/* Output the </B>: */
		(void)fprintf(out_file, "</A></I></B>");
		(void)fflush(out_file);
		
		/* Advance to next match: */
		matches_index += 1;
		if (matches_index < matches_total) {
		    match = matches_fetch(matches, matches_index);
		} else {
		    match = (Match)0;
		}
	    }
	}
	text_write_range(text, out_file,
	  text_offset, text_length - text_offset);
	if (with_markup) {
	    tag_write(tag, out_file);
	}
    }
    html->index = index;
}

/*
 * html_write_to_body_start(html, out_file, with_markup)
 *	This routine will write the {html} to {out_file} until the
 *	beginning of the body is found.  If {with_markup} is 1, all
 *	SGML markup will be output; otherwise, all SGML markup will
 *	be omitted.
 */
void
html_write_to_body_start(
    Html html,
    FILE *out_file,
    int with_markup)
{
    unsigned index;
    unsigned size;
    Tag tag;
    Tags tags;

    /*
     * The following tags occur before the body -- <HTML>, <HEAD>,
     * <TITLE>, </TITLE>, <ISINDEX>, </HEAD>, <BODY>, and I think
     * <KEYWORDS>.  As a hack, treat <NCSA-ANNOTATION-FORMAT-1>
     * as part of the header.
     */
    tags = html->tags;
    size = tags_size(tags);
    for (index = 0; index < size; index++) {
	tag = tags_fetch(tags, index);
	if (!tag_is_head(tag)) {
	    break;
	}
	text_write(tag->preceeding_text, out_file);
	if (with_markup) {
	    tag_write(tag, out_file);
	}
    }
    html->index = index;
}

/*
 * match_compare(match1, match2, handle)
 *	This routine will return {-1, 0, 1} dending upon whether {match1}
 *	is before, at, or after {match2}.  {handle} is completely ignored,
 *	but is needed for the {vector_sort}() routine.
 */
/* ARGSUSED */
static int
match_compare(
    Match match1,
    Match match2,
    void *handle)
{
    assert(match1->html == match2->html);
    if (match1->marker_index < match1->marker_index) {
	return -1;
    } else if (match1->marker_index > match2->marker_index) {
	return 1;
    }
    assert(match1->marker_index == match2->marker_index);
    if (match1->marker_offset < match2->marker_offset) {
	return -1;
    } else if (match1->marker_offset > match2->marker_offset) {
	return 1;
    }
    assert(match1->marker_offset == match2->marker_offset);
    return 0;
}

/*
 * matches_append(matches, match)
 *	This routine will append {match} to {matches}.
 */
void
matches_append(
    Matches matches,
    Match match)
{
    vector_append((Vector)matches, (Pointer)match);
}

/*
 * matches_create()
 *	This routine will create and return an empty vector of {Match} objects.
 */
Matches
matches_create(void)
{
    Matches matches;

    matches = (Matches)vector_create();
    return matches;
}

/*
 * matches_fetch(matches, offset)
 *	This routine will fetch the {offset}'th match object from {matches}.
 */
static Match
matches_fetch(
    Matches matches,
    unsigned offset)
{
    return (Match)vector_fetch((Vector)matches, offset);
}

/*
 * matches_sort(matches)
 *	This routine will sort all of the matches in {matches}.
 */
void
matches_sort(
    Matches matches)
{
    vector_sort((Vector)matches,
     (Vector_compare_element)match_compare, (void *)0);
}

/*
 * matches_size(matches)
 *	This routine will return the size of {matches}.
 */
static unsigned
matches_size(
    Matches matches)
{
    return vector_size((Vector)matches);
}

/*
 * tag_attributes(tag)
 *	This routine will return the {Attributes} object associated with {tag}.
 */
Attributes
tag_attributes(
    Tag tag)
{
    return tag->attributes;
}

/*
 * tag_create()
 *	This routine will create and return an empty {Tag} object.
 */
static Tag
tag_create(void)
{
    Tag tag;

    tag = (Tag)memory_allocate(sizeof *tag);
    return tag;
}

/*
 * tag_is_end(tag)
 *	This routine will return 1 if {tag} is a end tag and 0 otherwise.
 */
int
tag_is_end(
    Tag tag)
{
    return (tag->type == Tag_end);
}

/*
 * tag_is_head(tag)
 *	This routine will return 1 if {tag} is an HTML header tag and
 *	0 otherwise.
 */
static int
tag_is_head(
    Tag tag)
{
    Str upper_case_name;

    upper_case_name = tag->upper_case_name;
    switch (tag->type) {
      case Tag_start:
	if (str_equal(upper_case_name, (Str)"HTML") ||
	  str_equal(upper_case_name, (Str)"HEAD") ||
	  str_equal(upper_case_name, (Str)"TITLE") ||
	  str_equal(upper_case_name, (Str)"ISINDEX") ||
	  str_equal(upper_case_name, (Str)"KEYWORDS") ||
	  str_equal(upper_case_name, (Str)"BODY") ||
	  str_equal(upper_case_name, (Str)"LINK") ||
	  str_equal(upper_case_name, (Str)"NCSA-ANNOTATION-FORMAT-1")) {
	    return 1;
	}
	break;
      case Tag_end:
	if (str_equal(upper_case_name, (Str)"TITLE") ||
	  str_equal(upper_case_name, (Str)"HEAD")) {
	    return 1;
	}
    }
    return 0;
}

/*
 * tag_is_name(tag, name)
 *	This routine will return 1 if {name} matches the name in {tag}
 *	and 0 otherwise.  {name} must be upper-case.
 */
int
tag_is_name(
    Tag tag,
    Str name)
{
    return str_equal(tag->upper_case_name, name);
}

/*
 * tag_is_start(tag)
 *	This routine will return 1 if {tag} is a start tag and 0 otherwise.
 */
int
tag_is_start(
    Tag tag)
{
    return (tag->type == Tag_start);
}

/*
 * tag_preceeding_text(tag)
 *	This routine will return the preceeding text portion of {tag}.
 */
Text
tag_preceeding_text(
    Tag tag)
{
    return tag->preceeding_text;
}

/*
 * tag_write(tag, out_file)
 *	This routine will write {tag} to {out_file}.
 */
static void
tag_write(
    Tag tag,
    FILE *out_file)
{
    switch (tag->type) {
      case Tag_start:
	text_write(tag->open_bracket, out_file);
	text_write(tag->name, out_file);
	attributes_write(tag->attributes, out_file);
	text_write(tag->close_bracket, out_file);
	break;
      case Tag_end:
	text_write(tag->open_bracket, out_file);
	text_write(tag->name, out_file);
	text_write(tag->close_bracket, out_file);
	break;
      case Tag_end_of_file:
	break;
      case Tag_comment:
	text_write(tag->open_bracket, out_file);
	text_write(tag->close_bracket, out_file);
	break;
      case Tag_undefined:
      default:
        assert(0);
    }
}

/*
 * tags_append(tags, tag)
 *	This routine will append {tag} to {tags}.
 */
static void
tags_append(
    Tags tags,
    Tag tag)
{
    vector_append((Vector)tags, (Pointer)tag);
}

/*
 * tags_create()
 *	This routine will create and return an empty {Tags} object.
 */
static Tags
tags_create(void)
{
    Tags tags;

    tags = (Tags)vector_create();
    return tags;
}

/*
 * tags_fetch(tags, index)
 *	This routine will return the {index}'th tag from {tags}.
 */
static Tag
tags_fetch(
    Tags tags,
    unsigned index)
{
    Tag tag;

    tag = (Tag)vector_fetch((Vector)tags, index);
    return tag;
}

/*
 * tags_size(tags)
 *	This routine will return the number of tags in {tags}.
 */
static unsigned
tags_size(
    Tags tags)
{
    unsigned size;

    size = vector_size((Vector)tags);
    return size;
}

