#!/usr/bin/env perl
# $Id$
# Public domain.  Originally written by Andreas Scherer, 2023.

use strict;
use warnings;

use File::Basename;
use Getopt::Long qw(:config no_ignore_case bundling);
use Pod::Usage;

my $progname;
my $collator;

BEGIN {
  $progname = basename $0;
  # Unicode::Collate has been around a long time,
  # but it's not part of core Perl.
  eval {
    require Unicode::Collate;
    $collator = Unicode::Collate->new();
  };
}

my $usage = "Usage: $progname < input_file > output_file\n";

# Standard options for TeX Live.
Getopt::Long::GetOptions(
  'help|?'  => \&help_handler,
  'version' => sub { print version(); exit 0; }
) or die $usage;
## help_handler()
sub help_handler {
  open(my $pipe, '|-', $ENV{PAGER} || 'less -e') or exit 1;
  pod2usage(-message => version(), -output => $pipe,
    -verbose => 99, -sections => "SHORT DESCRIPTION|EXAMPLE|COPYRIGHT");
  close $pipe;
  exit 0;
}
## version()
sub version {
  return $progname.' $Revision$ $Date$'."\n";
}

# Read input from the console and look for blocks like
# \mini
# [LOCATION KEY TEXPART]*
# }\FI
# and sort them alphabetically by the KEYs.
#
# Every KEY contains a single C identifier (variable, type, function, etc.),
# either as a single brace-enclosed argument of a TeX control sequence,
# or as a single identifier in $math_mode$; the latter come from '@f'ormat
# definitions. All internal backslashes are removed from the KEY before it
# is used in the storage.
my %mini_index; # storage for index entries of a section
my $print_index = 0; # do we collect and print a mini-index?
foreach my $line (<STDIN>)
{
	if ("\\mini\n" eq $line) { # start of mini-index
		$print_index = 1;
		%mini_index = (); # reset mini-index storage
	} elsif ("}\\FI\n" eq $line) { # end of mini-index
		foreach my $key ($collator ?  # (I) sort by second column
			$collator->sort(keys %mini_index) :
			sort {"\L$a" cmp "\L$b"} keys %mini_index) {
			print sort { # (II) sort by third and first column
				my ($a_loc,undef,$a_id) = split / /, $a, 3;
				my ($b_loc,undef,$b_id) = split / /, $b, 3;
				$a_id cmp $b_id || $a_loc cmp $b_loc }
			@{ $mini_index{$key} };
		}
		$print_index = 0;
	} elsif ($print_index) { # mini-index entry
		my (undef,$key) = split / /, $line; # 2nd column is the key
		$key =~ s/\\//g; # strip internal TeX escape(s)
		$key =~ m/\w*\{(\w+)\}/; # extract plain key from control seq.
		$key =~ m/\$(\w+)\$/ unless defined $1; # extract @f TeX key
		push @{ $mini_index{$1} }, $line; # store index entry
		next; # print later
	}

	print $line;
}

exit 0;

=pod

=encoding utf8

=head1 NAME

ctwill-proofsort - Sort mini-indexes alphabetically

=head1 SHORT DESCRIPTION

This small Perl script 'ctwill-proofsort' sorts the mini-indexes for each
section in the TeX file created by 'ctwill +P', i.e., prior to
formatting with the 'ctproofmac.tex' macros.
It reads its input from <STDIN> and writes its output to <STDOUT>.

=over

=item Run CTWILL twice on your 'input_file'.w, creating 'input_file'.tex

=item Invoke "( ctwill-proofsort < 'input_file'.tex ) 1<> 'input_file'.tex"

=item Run TeX on 'input_file'.tex once

=back

See also "man ctwill" for more information on how to use the CTWILL system,
both with or without the '+P' option.

=head1 EXAMPLE

Example from MMIX (https://github.com/ascherer/mmix, 'local' branch):

=over

=item $ tie -c mmotype-twill.ch mmotype.{w,ch} mmotype-mini.ch

=item $ ctwill +P -x mmotype mmotype-twill (run this two times)

=item $ ( ctwill-proofsort < mmotype.tex ) 1<> mmotype.tex

=item $ tex mmotype

=back

=head1 COPYRIGHT

Public domain.  Originally written by Andreas Scherer, 2023.

Contemporary development on https://github.com/ascherer/cwebbin.

=cut
