#!/usr/bin/perl

# Standalone compiler for CLC-INTERCAL

# This file is part of CLC-INTERCAL.

# Copyright (C) 1999 Claudio Calvelli <lunatic@assurdo.com>, all rights reserved

# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either 2 of the License, or
# (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

require 5.002;

$0 =~ / / or die "The compiler only work if the command name contains spaces\n";

use vars qw($VERSION);

$VERSION = '0.03';

use Getopt::Long;
use Charset::Baudot;
use Charset::EBCDIC;
use Language::INTERCAL;

my $just_compile = 0;
my $dont_compile = 0;
my $input_alphabet = 2;
my $output_alphabet = -1;
my $optimise = 0;
my $output_name = '';
my $backend = 'Perl';
my $dont_run = 0;
my $bug = '';
my $ubug = '';
my $obsolete = 0;

my @to_do = ();
my @to_link = ();
my $link_name = '';

Getopt::Long::config qw(no_ignore_case auto_abbrev permute bundling);

GetOptions('compile'   => \$just_compile,
	   'c'         => \$just_compile,
           'C'         => \$dont_compile,
           'nocompile' => \$dont_compile,
	   'bug:i'     => \$bug,
	   'ubug:i'    => \$ubug,
	   'obsolete'  => \$obsolete,
	   'a'         => sub { $input_alphabet = 0 },
	   'ascii'     => sub { $input_alphabet = 0 },
	   'b'         => sub { $input_alphabet = 1 },
	   'baudot'    => sub { $input_alphabet = 1 },
	   'e'         => sub { $input_alphabet = 2 },
	   'ebcdic'    => sub { $input_alphabet = 2 },
	   'A'         => sub { $output_alphabet = 0 },
	   'ASCII'     => sub { $output_alphabet = 0 },
	   'B'         => sub { $output_alphabet = 1 },
	   'BAUDOT'    => sub { $output_alphabet = 1 },
	   'E'         => sub { $output_alphabet = 2 },
	   'EBCDIC'    => sub { $output_alphabet = 2 },
	   '0'         => sub { $output_alphabet = -1 },
	   'optimise'  => \$optimise,
	   'O'         => \$optimise,
	   'o=s'       => \$output_name,
	   'output=s'  => \$output_name,
	   'backend=s' => \$backend,
	   'l=s'       => \$backend,
	   'r'         => \$dont_run,
	   'norun'     => \$dont_run,
	   '<>'        => \&to_do) or usage();

fiddle Language::INTERCAL "bug=$bug" if $bug ne '';
fiddle Language::INTERCAL "ubug=$ubug" if $ubug ne '';
fiddle Language::INTERCAL 'next' if $obsolete;

my $to_do;
for $to_do (@to_do) {
    my ($op, @op) = @$to_do;
    &$op(@op);
}

if (@to_link) {
    my $name = shift @to_link;
    $name->link(@to_link);
    if ($dont_run || $backend ne 'Perl') {
	if ($output_name eq '') {
	    $output_name = $name->complete_name($backend, $link_name);
	}
	$name->backend($backend, $output_name);
    } else {
	if ($output_name eq '') {
	    $output_name = 'program';
	}
	$name->backend('Perl', $output_name);
	_run($input_alphabet, $output_alphabet, $output_name);
    }
}

sub to_do {
    my ($source, $dest, $suffix) = @_;
    ($dest = $source) =~ s/\.[^\.]+$// or
    	die "File name $source requires suffix\n";
    $suffix = substr($&, 1);
    my @alph = ($input_alphabet, $output_alphabet, $optimise);
    if ($suffix eq 'i') {
	if ($dont_compile) {
	    push @to_do, ['list', @alph, $source, $output_name];
	} elsif ($just_compile) {
	    push @to_do, ['compile', @alph, $source,
			  $output_name || ($dest . '.ipt')];
	} else {
	    push @to_do, ['compile_link', @alph, $source, $dest];
	}
    } elsif ($suffix eq 'ipt') {
	if ($dont_compile || $just_compile) {
	} else {
	    push @to_do, ['link', @alph, $source, $dest];
	}
    } else {
	die "Unknown file suffix: $suffix\n";
    }
}

sub compile_link {
    my ($in, $out, $opt, $src, $dest) = @_;
    my $ptree = _compile($in, $out, $opt, $src);
    $link_name = $dest;
    push @to_link, $ptree;
}

sub link {
    my ($in, $out, $opt, $src, $dest) = @_;
    my $ptree = load Language::INTERCAL $src;
    $ptree->optimise() if $opt;
    $link_name = $dest;
    push @to_link, $ptree;
}

sub compile {
    my ($in, $out, $opt, $src, $dest) = @_;
    my $ptree = _compile($in, $out, $opt, $src);
    $ptree->save($dest);
}

sub list {
    my ($in, $out, $opt, $src, $dest) = @_;
    open(SRC, '< ' . $src) or die "$src\: $!\n";
    my $text = '';
    while (<SRC>) { $text .= $_ }
    close SRC;
    return if $out < 0;
    if ($in != $out) {
	$text = baudot2ascii($text) if $in == 1;
	$text = ebcdic2ascii($text) if $in == 2;
	$text =~ s![cC]\010[/|]!!g;
	$text =~ s![vV]\010-!!g;
	$text = ascii2baudot($text) if $out == 1;
	$text = ascii2ebcdic($text) if $out == 2;
    }
    open(DST, '> ' . ($dest || '-')) or die "$dest\: $!\n";
    print DST $text;
    close DST;
}

sub _compile {
    my ($in, $out, $opt, $src) = @_;
    open(SRC, '< ' . $src) or die "$src\: $!\n";
    my $text = '';
    while (<SRC>) { $text .= $_ }
    close SRC;
    $text = baudot2ascii($text) if $in == 1;
    if ($in != 2) {
	$text =~ s![cC]\010[/|]!!g;
	$text =~ s![vV]\010-!!g;
	$text = ascii2ebcdic($text);
    }
    my $ptree = parse Language::INTERCAL $text, sub {
	    	my $t = join('', @_);
		$t = ascii2baudot($t) if $out == 1;
		$t = ascii2ebcdic($t) if $out == 2;
		print $t if $out >= 0;
	    };
    name $ptree $src;
    optimise $ptree if $opt;
    $ptree;
}

sub _run {
    my ($in, $out, $prog) = @_;
    &$prog(sub {
		my $t;
	    	if (@_) {
		    read STDIN, $t, @_;
		} else {
		    $t = <STDIN>;
		    $t =~ s/\n/B/g if $in == 1;
		    $t = baudot2ascii($t) if $in == 1;
		    if ($in != 2) {
			$t =~ s![cC]\010[/|]!!g;
			$t =~ s![vV]\010-!!g;
			$t = ascii2ebcdic($t);
		    }
		}
		$t;
	    }, sub {
	    	my $t = join('', @_);
		$t = ascii2baudot($t) if $out == 1;
		$t = ascii2ebcdic($t) if $out == 2;
		print $t;
	    });
}

sub usage {
    (my $p = $0) =~ s!^.*/!!;
    die "Usage: '$p' [-aAbBcCeEOr] [-o name] [-l lang] files...\n";
}

__END__

=pod

=head1 NAME

'oo, ick' - Compiler for CLC-INTERCAL

=head1 SYNOPSIS

B<'oo, ick'> [options] B<files>...\n";

=head1 DESCRIPTION

'oo, ick' reads one or more source files and compiles them using the
CLC-INTERCAL compiler. Currently, two types of input source files are
recognised:

=over 4

=item INTERCAL program source

These files must have suffix B<.i>. The compiler can produce a CLC-INTERCAL
source tree, an object using one of the optional compiler back ends, or
just compile and run the program.

=item CLC-INTERCAL parse tree.

These files must have suffix B<.ipt>. The compiler can produce an object using
one of the optional back ends, or just run the program.

=back

The compiler accepts several options, some of which are documented here:

=over 4

=item B<-c>

Just produce a CLC-INTERCAL parse tree. If the input file is already a parse
tree, does nothing.

=item B<-C>

Does not compile the program. I can't remember why one would want to do this.

=item B<-A>

Lists the program source in ASCII.

=item B<-B>

Lists the program source in Baudot.

=item B<-E>

Lists the program source in EBCDIC.

=item B<-O>

Does not list the program source. This is the default. Note that this is
a zero, not an oh.

=item B<-a>

Converts the program source from ASCII to EBCDIC before invoking the
compiler.

=item B<-b>

Converts the program source from Baudot to EBCDIC before invoking the
compiler.

=item B<-e>

Leaves the program source unchanged. The compiler will assume it's in
EBCDIC. This is the default.

=item B<-O>

Invokes the optimiser. This is an oh, not a zero.

=item B<-o> I<name>

Selects a name for the output file. Default is to use the same name as
the source changing the suffix as appropriate. What is an appropriate
suffix depends on the backend being used. For CLC-INTERCAL parse trees,
the suffix is B<.ipt>.

=item B<-l> I<name>

Selects a different compiler back end. Default is to use the built in
Perl back end and then call the subroutine created by the compiler. If
a different back end is selected, the program won't automatically run,
but presumably the back end produces some output file.

=item B<-r>

Do not run the program. This is the default if the back end selected is
not the built in Perl. Note that if the program does run, all output is
subject to the same conversions as specified by the B<-A>, B<-B>, or
B<-E> switch, with the exception that B<-0> is ignored; this includes
any binary output, so the safest thing is to use B<-0>. For input, the
same conversions as the source code apply, however, the conversion is
not applied to binary input. The recommended choice is B<-e>, which is
identical to what the program would do when compiled to a standalone
executable.

=back

=head1 NOTES

The program name is B<'oo, ick'>, with embedded space and comma. Because of
unexcusable limitation in several utilities (including, but not limited to,
I<man>, I<perldoc>, I<make> and I<ExtUtils::MakeMaker>), the installation
scripts will create a separate executable B<oo,space,ick> and related man
page. These can be useful to consult this documentation, but the executable
won't run. Use B<'oo, ick'> instead.

=head1 SEE ALSO

L<Language::INTERCAL>.

