#!/usr/perl

# Get the standard option parser
require 'Getopts';

# Get the personalised file name mangler (if any)
eval "require 'Personal'" || eval <<'END';
sub personalise	{ $_[0]; }
sub preformat	{ $_[0]; }
END

# Usage is
#	Tar [-tvx] [-L logfile] -f Tarfile
&Getopts('f:L:tvx');

die "No tar file specified\n" unless $opt_f ne '';
$tar = $opt_f;
$logfile = $opt_L;

# Initialisation
# --------------
#
# Tar file block size, and header format.
# Array of month names, for date conversion.

@months = ('Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
	   'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec');

$blocksize = 512;
$template = 'A100 A8 A8 A8 A12 A12 A8 a1 A100 A8 A32 A32 A8 A8';

# Pre-allocate file block buffers
$header = "\0" x $blocksize + 1;
$block  = "\0" x $blocksize + 1;

# In formatted output, break on slashes (see report_file).
$: = '/';

# Main processing
# ---------------

open(TAR,$tar) || die "Cannot open $tar: $!\n";

if ($logfile ne '')
{
	open(LOG,">$logfile") || die "Cannot open log file $logfile: $!\n";
}

FILE: {
	$bytes = read(TAR,$header,$blocksize);
	die "Tar: Header block too short\n" unless ($bytes == $blocksize);

	# A null header block marks the end
	last FILE if $header eq "\0" x $blocksize;

	# Decode the information in the file header
	&process_header();

	# If we are to produce a table of contents, do so now.
	&report_file() if $opt_t;

	# Open the output file
	&open_file($name, $type) if $opt_x;

	# Skip through the file data blocks
	while ($size > 0)
	{
		$bytes = read(TAR, $block, $blocksize);
		die "End of file during file $name\n" if $bytes < $blocksize;

		# Write the data block to the output file
		&write_file($block,$bytes,$size) if $opt_x;

		# Keep track of the number of bytes still to read
		$size -= $bytes;
	}

	# Close the output file
	&close_file() if $opt_x;

	# Next file
	redo FILE;
}

close LOG if ($logfile ne '');

# Header block processing. Perform some validity checks, and decode the
# header fields into global variables for later use. Also checks that
# the header checksum is valid.

sub process_header
{
	# Store the header fields in global variables.
	($name, $mode, $uid, $gid, $size, $mtime, $check, $type, $link,
	 $magic, $uname, $gname, $major, $minor) = unpack($template,$header);

	# Check the format of the various fields
	$mode  = &oct($mode,'mode');
	$uid   = &oct($uid,'user id');
	$gid   = &oct($gid,'group id');
	$size  = &oct($size,'size');
	$mtime = &oct($mtime,'modification time');
	$check = &oct($check,'checksum');
	$major = &oct($major,'major device');
	$minor = &oct($minor,'minor device');

	# Test the checksum
	substr($header, 148, 8) = ' ' x 8;
	$checksum = unpack('%32C*', $header);
	die "Invalid checksum for file $name header\n" if $check != $checksum;
}

# Report the details of the current file

format =
@<<<<<<<<< @>>>>>>>> @>>>>>>> @<<<<<<<<<<<<<<<< ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
$modestr,  $ids,     $sz,     $date,            $temp_name
~~                                              ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
                                                $temp_name
.

sub report_file
{
	print($name,"\n"), return unless $opt_v;

	# Format the user and group IDs
	local($ids) = sprintf("%d/%d",$uid,$gid);

	# Format the date for output
	local ($sec,$min,$hr,$dd,$mon,$yy) = localtime($mtime);
	local ($mm) = $months[$mon];

	$yy += 1900;
	local ($date) = sprintf("%s %.2d %.2d:%.2d %.4d",$mm,$dd,$hr,$min,$yy);

	local($modestr) = &desc($mode,$type);
	local($temp_name) = $name;
	local($sz) = $size;

	write;
}

# Produce a mode description string (eg "-rw-rw-r--") from the numeric mode
# and the tar file type.

sub desc
{
	local($mode, $type) = @_;
	local($own, $grp, $oth);

	$oth = $mode & 07;
	$grp = ($mode >> 3) & 07;
	$own = ($mode >> 6) & 07;

	($type eq '5' ? 'd' : '-') . &mode($own) . &mode($grp) . &mode($oth);
}

# Handle a single group of permissions.

sub mode
{
	local ($num) = @_;
	local ($str) = '---';

	substr($str,0,1) = 'r' if ($num & 04);
	substr($str,1,1) = 'w' if ($num & 02);
	substr($str,2,1) = 'x' if ($num & 01);

	$str;
}

# Convert an octal string from the tar file header into a numeric value.
# If the string is not in the correct format (whitespace followed by a
# string of octal digits), report the error and stop.

sub oct
{
	local ($str,$field) = @_;

	unless ($str =~ /^\s*([0-7]*)$/)
	{
		die "Header $field field is not in the correct format\n";
	}

	oct($1);
}

# Open the output file.

sub open_file
{
	local ($name, $type) = @_;
	local ($in_name, $out_name);

	$skip = 0;
	$in_name = &preformat($name);
	$out_name = &munge_filename($in_name, $type);
	print LOG "$name\t$in_name\t$out_name\n" if ($opt_L);

	if ($skip == 0)
	{
		open(OUT, ">$out_name") || die "Cannot open $out_name: $!\n";
	}
}

# Write the next block of data to the output file.

sub write_file
{
	local ($block, $bytes, $size) = @_;

	if ($skip == 0)
	{
		print OUT ($bytes <= $size) ? $block : substr($block,0,$size);
	}
}

# Close the output file.

sub close_file
{
	close OUT if $skip == 0;
}

# ******************* Os-dependent file name handling *******************

# This is the big OS-dependent bit. Take a file name from the tar header,
# and convert it so that it conforms to Archimedes file naming conventions.
# This process is somewhat adhoc, and should be modified if necessary to
# handle the names used in particular tar files.

sub munge_filename
{
	local ($name,$type) = @_;
	local ($out, @names);

	# We don't support CONTIG - treat as NORMAL.
	$type = '0' if $type eq '7';

	# NORMAL files ending with a slash are directories
	$type = '5' if ($name =~ m#/$#) && ($type eq '0' || $type eq "\0");

	# Split the path into pathname elements
	@names = split(/\//, $name);

	# Don't allow rooted pathnames - treat as relative to the
	# current directory
	shift(@names) if $names[0] eq '';

	# We are going to treat the final part specially, so extract it.
	# If the file is a directory, we don't do this.
	$file = pop(@names) unless $type eq '5';

	if ($#names == -1)
	{
		$dir = '';
	}
	else
	{
		# Clean up each part of the pathname.
		grep(&cleanup, @names);

		# Build the directory name.
		$dir = join('.',@names);

		# If the file type is DIR, make the directory and quit.
		if ($type eq '5')
		{
			$skip = 1;
			mkdir($dir) || &continue("Cannot make directory '$dir' ($!)");
			return $dir;
		}

		# Othewise, the directory must exist. We'll have one go
		# at making it if not, then give up and moan at the user.
		unless (-d $dir)
		{
			if (-e _)
			{
				# If it's already a file, complain.
				&continue("Directory '$dir' already there as a file");
			}
			elsif (mkdir($dir) == 0)
			{
				# If we can't make it, complain.
				&continue("Cannot make directory '$dir' ($!)");
			}
		}
	}

	# OK, now we sort out the basename. We'll pass this over to another
	# subroutine. It needs to know the directory to create the file in,
	# and the basename. It returns the filename to use, or undef on an
	# error, or if we have nothing to extract (special files).

	# First, though, we allow the file to be 'personalised'.
	$file = &personalise($file);
	&handle_file($dir,$file);
}

# Clean up filenames.
#
# The rules are
#     Replace dots with commas.
#     Remove all special characters :*#$&@^%\
#     Cut down to 10 characters by removing non-alphanumerics, if necessary.
#     As a last resort, truncate to 10 characters.

sub cleanup
{
	local ($ch);

	# Capitalise the name.
	tr/A-Z/a-z/;
	s/\b(\w)/(($ch = $1) =~ tr:a-z:A-Z:), $ch/eg;

	# Replace dots with commas.
	tr/./,/;

	# Delete magic characters.
	# I quoted everything here, just out of paranoia.
	tr/\:\*\#\$\&\@\^\%\\//d;

	# If we are longer than 10 characters, delete punctuation.
	tr/a-zA-Z0-9//cd if length($_) > 10;

	# If we are still too long, truncate.
	$_ = substr($_,0,10) if length($_) > 10;
}

# Handle a standard file (ie, everything except a directory). We take the
# file name, to be created in directory $dir. First, we decide what to
# call it, creating any new sub-directories we need. Then, if it's a
# special file, we create it containing a comment about the type, and
# then return 'undef' (which signals to the caller that it should not
# write anything to the file). Otherwise, we simply return the name to
# use. We use $_ for the filename, as we will be doing a lot of pattern
# matching, etc here!

sub handle_file
{
	local ($dir, $_) = @_;
	local ($dots, $pre, $suf);

	# Before we start, replace any initial and final dots
	# with exclamation marks.
	s/^\.+/'!' x length($&)/e;
	s/\.+$/'!' x length($&)/e;

	# Our main problem is dots. Count how many there are in the
	# supplied filename.
	$dots = tr/././;

	if ($dots == 0 || ($dots == 1 && m#\.[0-9]+$#))
	{
		# If we have no dots, or simply a dot followed by a number,
		# we can simply clean up the filename and ensure that we can
		# write OK. We do this for every case, so there is nothing
		# more to do here.
	}
	else
	{
		# Split the filename into prefix.suffix, and add the suffix
		# to the directory name, using the prefix as the filename.

		($pre, $suf) = m/^(.*)\.([^.]*)/;

		$_ = $suf;
		&cleanup;
		$dir = ($dir eq '') ? $_ : $dir . '.' . $_;

		# If the prefix has dots, replace them with commas.
		$pre =~ tr/./,/;
		$_ = $pre;
	}

	&cleanup;
	$dir = &check_write($dir);

	($dir eq '') ? $_ : "$dir.$_";
}

# Complain about an error, and ask the user if he wishes to continue.

sub continue
{
	local ($ch);
	&oswrstr($_[0]);
	&oswrstr(" Continue? (y/n) ");
	$ch = &osrdch();
	$ch =~ tr/a-z/A-Z/;
	$ch = 'N' unless $ch eq 'Y';
	&oswrstr("$ch\r\n");
	exit(1) unless $ch eq 'Y';
}

# Check that we can write a new file into the specified directory. If we
# can't, return the name of a newly generated directory where we can. This
# subroutine uses the array %dir_subst to remember how to handle the
# directories it has seen.

# It also ensures that the file $_ is not already in the directory. If it is,
# it gives the user the opportunity to rename the file, skip it, or stop.

sub check_write
{
	local ($dir) = @_;
	local ($res, $base, $newdir);

	# If the directory name is empty, just return.
	return $dir if ($dir eq '');

	# If the directory doesn't exist yet, create it (we will certainly
	# have no problems writing in a new directory).
	unless (-e $dir)
	{
		mkdir($dir) || ($skip = 1);
		return $dir;
	}

	# If the directory already exists as a file, offer the user the
	# chance to rename it.
	unless (-d $dir)
	{
		print "Directory $dir clashes with a file\n";
		print "New name (CR to skip): ";
		chop($newdir = <STDIN>);
		$skip = 1 unless $newdir;
		return $newdir ? &check_write($newdir) : $dir;
	}

	if (!defined $dir_subst{$dir})
	{
		$res = &check_dir($dir);

		$skip = 1 if ($res == -1);
		return $dir if $res;

		$base = (($dir =~ /\.([^.]*)$/) ? $1 : $dir);
		if (length($base) < 9)
		{
			$newdir = $dir . '-A';
		}
		else
		{
			$newdir = $` . '.' . substr($base,0,8) . '-A';
		}
		mkdir($newdir) || ($skip = 1);
	}
	else
	{
		$newdir = $dir_subst{$dir};
		$res = &check_dir($newdir);

		$skip = 1 if ($res == -1);
		return $newdir if $res;

		++$newdir;
		mkdir($newdir) || ($skip = 1);
	}

	$dir_subst{$dir} = $newdir;
	$newdir;
}


# Check the directory for the file $_, and for less than 77 entries.
# Return 1 for OK, 0 for no room, -1 for skip this file.
sub check_dir
{
	local ($dir) = @_;
	local ($file, *DIR);
	local (@files);

	opendir(DIR,$dir);
	@files = readdir(DIR);
	closedir(DIR);

	for $file (@files)
	{
		next unless $_ eq $file;
		print '-' x 70, "\n";
		system("Cat $dir");
		print "\nFile $_ already exists\n";
		print "New name (CR to skip): ";
		chop($file = <STDIN>);
		return -1 if $file eq '';
		$_ = $file;
		last;
	}

	# Maximum 77 files in a directory.
	# Remember $#arr is last index, not count!
	$#files < 76 ? $dir : undef;
}
