#!/usr/bin/perl -w
#
# armc 1.06 (20-Sep-03)
#
# (C) 2002, Martin Wuerthner <martin@mw-software.com>
#
# This script is FreeWare - you may copy it freely as long as it is left
# unchanged and it is not sold for profit.
#
# ----------------------
# -- armc Perl script --
# ----------------------
# This script converts 26-bit assembler code to partly 32-bit safe code.
# This is only a first step in a semi-automated process. This script does
# not (and cannot) do the full job. However, in addition to doing most of
# the tedious search-and-replace work, it also directs you to most places
# where manual post-processing is necessary by telling you where flag
# preservation over BL and SWI calls might have been assumed by the original
# code. Please note that I have written this script for my own use, it has
# not been engineered for general use, but it is a quick-and-dirty job that
# works for the specific way in which I write source files.
#
# This script does the following:
# - prints warnings about all places where the original 26-bit code
#   required flag preservation over a BL or a SWI call. There is no way the
#   script can distinguish between flag preservation required and return
#   flags from a routine, therefore you have to tell the script about return
#   flags (all SWIs are assumed to return V). If the called routine returns
#   status in a flag and you have not annotated this routine to return the
#   flag, then this causes a warning when the caller uses the flag.
# - replaces STMFD R13!,{<regs>,r14} by FNentry(<regs>)
# - replaces LDM<cond>FD R13!,{<regs>,pc}[^] by FNexit(<cond>) or
#   FNexit_specific(<cond>,<regs>) if regs are different from entry
# - replaces LDM<cond>FD R13!,{<regs>,r14} by FNpull_env(<cond>)
#   or FNpull_env_specific(<cond>,<regs>) if regs are different from entry
# - replaces BIC<cond>S PC,R14,<V or C flag> by FNreturnVC(<cond>) (or CC)
# - replaces ORR<cond>S PC,R14,<V or C flag> by FNreturnVS(<cond>) (or VC)
# - merges LDM<cond>FD R13!,{r14} with a following return instruction with
#   condition cond into a FNpull_and_return(<cond>) macro (also for returns
#   with return flags, e.g., FNpull_and_returnVC(<cond>))
#
# -----------------------
# -- General procedure --
# -----------------------
# 1) Prepare your sources as untokenised textual BASIC
# 2) Run armc on the source file - ignore the converted output for the
#    moment and check the warnings that were printed
# 3) Check all places for which warnings about flag usage after BL and SWI
#    were given
# 4) If a flag warning after BL xxx was caused by routine xxx returning
#    flags (check for flag-returning routine exits in the original source or
#    for exitVS, returnVS etc. macros in the converted output) you may want
#    to put this information in the routine header so armc knows during the
#    next run
# 5) run armc again, make sure you keep the original source code and start
#    using the converted output from now on
# 6) If a flag warning after SWI or BL really indicates a place where the
#    original code relied on flag preservation, you need to code around it
#    (e.g., by adding suitable conditional jumps before the call)
# 7) If the code is a module, you need to make sure that the swiReturn...
#    and swiExit... macros are used for all SWI exits, so SWIs are flag-
#    preserving in 26-bit mode.
#
# You still need to check for:
# A) processor mode changes (TSTP, TEQP instructions)
# B) explicit modifications to entry r14, e.g., BIC r14,r14,#10000000 at
#    routine entry to clear the V flag - for example, in this case, each
#    routine exit that was flag-restoring in the original sources (i.e.,
#    MOVS or LDM^) is now an FNexit or FNreturn macro in the converted
#    code) and must be replaced manually by its VC variant, e.g., FNexitVC,
#    FNreturnVC, ...
# C) using entry r14 as an address (e.g., by trying to clear all flag bits,
#    BIC rX,r14,#FC000003 or BIC rX,r14,#FC000000 in USR mode)
# D) BIC<cond>S/ORR<cond>S PC,R14,#imm with values other than SR_C or SR_V
#    or their hex equivalents, which are the only ones recognized by armc
#    (e.g., armc will not deal with returns that clear both the V and the C
#    flag, nor, of course, will it cope with your own constant names for the
#    status flags). To catch these instructions, use David Ruck's ARMalyser,
#    which is a good idea to use in any case.
# E) Fixed size code structures and statically computed offsets in the code
#    that are broken by the expansion one instruction into multiple
#    instructions. A simple example is a jump table where each entry must
#    exactly be one instruction. If one such instruction was for example
#    bics pc,r14,#SR_V, then the resulting FNreturnVC(al) is two
#    instructions and will break the jump table structure.
# F) everything else needed for 32-bit compatibility ;-)

# usage: armc [-p] [-c defsfile] <infile> <outfile>
#
# This script is suitable for textual (i.e., untokenised) BASIC assembler
# source code only (due to the difference in label syntax it will not
# recognize ObjAsm labels but it should be relatively easy to change this).
# It ignores the surrounding assembling machinery and looks at the actual
# assembler code only. It is very basic in that it does not parse the source
# properly, not even the [ and ] directives, instead, it is triggered by
# certain patterns.
#
# armc replaces routine entry and exit code by non-flag-preserving macros
# that can be used to assemble 26-bit and 32-bit variants of the software.
# The macros are defined in 32BitLib, which is available separately.
#
# Like SWIs under RO5, subroutines in the converted code are generally
# assumed not to preserve the flags. This means that some other aspects of
# the software have to be updated. armc will find most places where flag
# preservation over subroutines and SWI calls was assumed by the software.
# These places will be printed to STDERR during conversion. This is not a
# proper data flow analysis but is pretty accurate in most circumstances.
# The heuristic is as follows: If a conditional instruction after a BL or
# SWI is based on a flag that has not been affected by any instruction
# after the BL or SWI, then this flag was either returned from the call
# (which is OK) or the software relied on the flags before the call, which
# no longer works in the 32-bit version. SWIs always return V, so using V
# after a SWI does not cause a warning. If a SWI returns other flags as well
# (e.g., C), then accessing the C flags after the SWI call will cause a
# warning to be printed. If a flag is accessed that was returned by a
# subroutines called via BL then always a warning is printed. To avoid these
# warnings for subroutines that are known to return flags, you can document
# the fact that a routine returns the flags in the source code. armc then
# reads the structured comments and does not give a warning for this flag.
#
# My standardized routine header looks like this (but you do not need the
# full header for armc to read the information):
# ; entry. <entry conditions>
# ;        <more entry conditions>
# ; exit. <some exit conditions>
# ;       <more exit conditions>
# .routinelabel
#
# The semicolons must be at the beginning of the line. armc looks for the
# flags VC, VS, CC, CS, PL, MI at the beginning of some text in the exit
# conditions section. So, to document that a routine returns status in its C
# flag, you would put the following line before the routine label:
# ; exit. CC/CS
# .routinelabel
#
# Or, as it often happens in my code:
# ; exit. VC = success, then r0 = result
# ;       VS = error, then r0 => error block
# .routinelabel
#
# The variants "On exit", "on exit", "On exit;", "on exit;", "Errors" are
# permissible as well.
# 
# To document that both V and C are used as return flags:
# ; exit. CC/CS
# ;       VC/VS
# .routinelabel
#
# Then, callers can access the declared flags after the call without causing
# a warning. Please note that the output of the conversion does not change
# in any way, only the warnings that are produced.
#
# If the source code includes a library, then the routine headers of the
# library code can be processed as well by specifying -c <libraryfile>
# before the source and target filenames.
#
# There is another special hard-wired feature that I needed for my own
# sources: Parts of the code can be conditional with a special flag
# variable Fixes32Bit. Code in IF NOT Fixes32Bit branches or in the ELSE
# branches of IF Fixes32Bit branches is ignored. This allows you to maintain
# the old status of the sources as well. This is probably not useful for
# most people.
# IF [NOT] Fixes32Bit THEN
#    ...
# [ELSE
#    ...]
# ENDIF
#
# Passing the option -p causes the macros FNstptr/FNldptr to be treated as
# function calls. This again, is very specialized and probably not useful
# unless you happen to have some CC source code.
#
# Limitations
# -----------
# Does not replace function returns with multiple flags, i.e., only
# replaces BIC/ORR<cond>S PC,R14,<V flag>, <C flag> or <Z flag>.
#
# History
# -------
# 1.06 (20-Sep-03):
# Fixed major bug in 1.05: Exit register lists containing spaces were
# converted to the empty string. Improved replacement of
# ldm<cond>fd r13!,{...,r14} by FNpull_env[specific]: Allowed spaces in same
# positions as for exit points.
#
# 1.05 (22-Apr-03):
# Accepts spaces in a lot more positions in function entry and exit points.
# Added -p parameter. Accepts more variants to specify return flags for
# routines. Correctly handles IF TRUE and IF FALSE sections (often used to
# comment out chunks of code). Handles ADR as ALU instruction. Allows spaces
# between FN<name> and the parameters.
#
# 1.04 (26-Mar-03):
# Prints warning message when a function return is encountered with other
# flag manipulations than those that are replaced. Prints warning message
# when a ldm with pc in the list and the ^ flag is encountered that is not
# replaced. Supports the Z flag when replacing BIC/ORR<cond>S PC,R14,#<flag>
#
# 1.03 (13-Dec-02):
# Allows routines to return the N flag as well.
# Added check for 32BitNoEntry comment keyword.
#
# 1.02 (06-Dec-02):
# Allows a definition file to be passed. The function header comments of
# this file are read, so the return flags of the functions in that file are
# taken into account.
# Now recgnizes the FNexit/return/... macros itself, so when running
# converted code through armc, it still spots the flag usage, so the flag
# preservation over SWI/function call warnings are much more accurate.
# Hard-wired special case for FNstptr/ldptr - these are in one of my private
# library and translate to calls, so they need to be recognized as such.
#
# Perl code following - Warning: this is very messy and the regular
# expressions are likely to be impenetrable.

die "usage: armc [-p] [-c <defsfile>] <infile> <outfile>\n"
  unless (@ARGV == 2 && $ARGV[0] !~ /^-/)
         || (@ARGV == 3 && $ARGV[0] eq "-p")
         || (@ARGV == 4 && $ARGV[0] eq "-c")
         || (@ARGV == 5 && $ARGV[0] eq "-p" && $ARGV[1] eq "-c");

$c_flag = 1;
$z_flag = 2;
$v_flag = 4;
$n_flag = 8;

$infilename = shift;
if ($infilename eq "-p") {
  $treat_fnstldptr = 1;
  $infilename = shift;
}
else {
  $treat_fnstldptr = 0;
}

if ($infilename eq "-c") {
  $defsfilename = shift;
  $infilename = shift;

  open (INFILE,$defsfilename)
    || die "Cannot open file '$defsfilename' for reading\n";
  read_comments();
  close(INFILE);
}
else {
  $defsfilename = "";
}
$outfilename = shift;

open (INFILE,$infilename)
  || die "Cannot open file '$infilename' for reading\n";
open (OUTFILE,">".$outfilename)
  || die "Cannot open file '$outfilename' for writing\n";

print STDERR "Processing $infilename\n";
read_comments();

close(INFILE);

print scalar keys %flags, " flag-returning subroutines\n";

open (INFILE,$infilename)
  || die "Cannot open file '$infilename' for reading\n";

$line = 0;
$entry = "";
$last_line = "";
$ignore = 0;
$pull_env_list = "";
$pull_env_cond = "";
$last_was_pull_env = 0;
$last_label = "<start>";
$in_fixes_if = 0;                 # we are currently in the THEN or ELSE branch of an IF Fixes32Bit or IF NOT Fixes32Bit
$process_code = 1;                # we want to process the current code
$if_level = 0;                    # nesting level of multi-line IF ... ENDIF
$in_fixes_if_level = 0;           # the nest level at which the IF Fixes32Bit occurred
$C_hot = 0; $Z_hot = 0; $N_hot = 0; $V_hot = 0;   # current flags not those returned by a BL/SWI
while(<INFILE>) {
  $line++;
  my $prev_was_pull_env = $last_was_pull_env;
  $last_was_pull_env = 0;
  chomp();

  if (m/^\s*IF\s*(Fixes32Bit|TRUE)\s*THEN$/) {
    $if_level++; $in_fixes_if = 1;
    $in_fixes_if_level = $if_level;
    $process_code = 1;
    # print STDERR "Opening IF Fixes32Bit at line $line\n";
  }
  elsif (m/^\s*IF\s*(NOT Fixes32Bit|FALSE)\s*THEN$/) {
    $if_level++; $in_fixes_if = 1;
    $in_fixes_if_level = $if_level;
    $process_code = 0;
    # print STDERR "Opening IF NOT Fixes32Bit at line $line\n";
  }
  elsif (m/^\s*IF.*THEN$/) {
    $if_level++;
    # print STDERR "Opening IF at line $line (level now $if_level)\n";
  }
  if (m/^\s*ELSE$/) {
    $process_code = !$process_code if ($in_fixes_if && $if_level == $in_fixes_if_level);
  }
  if (m/^\s*ENDIF/) {   # we allow characters after the endif, the important thing is that it is at the start of the line
    if (!$if_level) {
      print STDERR "Warning: ENDIF without matching if (line $line)\n";
    }
    else {
      if ($in_fixes_if && $if_level == $in_fixes_if_level) {
        $process_code = 1; $in_fixes_if = 0;
      }
      $if_level--;
      # print STDERR "Closing ENDIF at line $line\n";
    }
  }

  if ($process_code) {

  $C_used = 0; $N_used = 0; $Z_used = 0; $V_used = 0; $opcode = ""; $ccode = ""; $sfx1 = "";
  if (m/^\s*\.([a-zA-Z0-9_%]+)$/) {
    $last_label = $1;
  }
  elsif (m/^\s*\.([a-zA-Z0-9_%]+)\s/) {
    $last_label = $1;
  }
  if (m/^\s*(\.[a-zA-Z0-9_%]+\s+)?([a-zA-Z][a-zA-Z][a-zA-Z])([EeNnVvMmPpCcAaNnHhLlGg][QqEeSsCcIiLlVvTtOo])([fFeEiIdDsSpP]|)([aAdDbB]|)\s/
      || m/^\s*(\.[a-zA-Z0-9_%]+\s+)?([bB][lL])([EeNnVvMmPpCcAaNnHhLlGg][QqEeSsCcIiLlVvTtOo])\s+[a-zA-Z0-9_]+()()/
      || m/^\s*(\.[a-zA-Z0-9_%]+\s+)?([bB])([EeNnVvMmPpCcAaNnHhLlGg][QqEeSsCcIiLlVvTtOo])\s+[a-zA-Z0-9_]+()()/) {
    my $op = $2;
    my $cond = $3;
    my $suffix1 = $4;
    my $suffix2 = $5;
    if ($op =~ m/ADD/i || $op =~ m/ADC/i || $op =~ m/ADR/i || $op =~ m/SUB/i || $op =~ m/SBC/i || $op =~ m/RSB/i || $op =~ m/RSC/i
        || $op =~ m/MOV/i || $op =~ m/MVN/i || $op =~ m/CMP/i || $op =~ m/CMN/i || $op =~ m/AND/i || $op =~ m/ORR/i
        || $op =~ m/EOR/i || $op =~ m/BIC/i || $op =~ m/TST/i || $op =~ m/TEQ/i || $op =~ m/MUL/i || $op =~ m/MLA/i
        || $op =~ m/SWI/i || $op =~ m/B/i || $op =~ m/BL/i
        || $op =~ m/LDR/i || $op =~ m/STR/i
        || (($op =~ m/LDM/i || $op =~ m/STM/i) && $suffix1 ne "" && $suffix2 ne "")) {
      if ($cond =~ m/EQ/i || $cond =~ m/NE/i || $cond =~ m/HI/i || $cond =~ m/LS/i || $cond =~ m/GT/i
          || $cond =~ m/LE/i) { $Z_used = 1; }
      if ($cond =~ m/VS/i || $cond =~ m/VC/i || $cond =~ m/GE/i || $cond =~ m/LT/i || $cond =~ m/GT/i
          || $cond =~ m/LE/i) { $V_used = 1; }
      if ($cond =~ m/MI/i || $cond =~ m/PL/i || $cond =~ m/GE/i || $cond =~ m/LT/i || $cond =~ m/GT/i
          || $cond =~ m/LE/i) { $N_used = 1; }
      if ($cond =~ m/CS/i || $cond =~ m/CC/i || $cond =~ m/HI/i || $cond =~ m/LS/i || $cond =~ m/HS/i || $cond =~ m/LO/i) { $C_used = 1; }
      if ($cond !~ m/AL/i && $cond !~ m/NV/i && !$C_used && !$N_used && !$Z_used && !$V_used) {
        print STDERR "Invalid condition code $cond (line $line)\n";
      }
      else {
        $opcode = $op; $ccode = $cond; $sfx1 = $suffix1;
      }
    }
  }
  elsif (m/^\s*(\.[a-zA-Z0-9_%]+\s+)?([a-zA-Z][a-zA-Z][a-zA-Z])([sSpP]|)\s/
         || m/^\s*(\.[a-zA-Z0-9_%]+\s+)?([sS][tT][mM])[fFeEIiDd][aAdDbB]()\s/
         || m/^\s*(\.[a-zA-Z0-9_%]+\s+)?([bB][lL])()\s/
         || m/^\s*(\.[a-zA-Z0-9_%]+\s+)?([bB])()\s/) {
    my $op = $2;
    my $suffix1 = $3;
    if ($op =~ m/ADD/i || $op =~ m/ADC/i || $op =~ m/ADR/i || $op =~ m/SUB/i || $op =~ m/SBC/i || $op =~ m/RSB/i || $op =~ m/RSC/i
        || $op =~ m/MOV/i || $op =~ m/MVN/i || $op =~ m/CMP/i || $op =~ m/CMN/i || $op =~ m/AND/i || $op =~ m/ORR/i
        || $op =~ m/EOR/i || $op =~ m/BIC/i || $op =~ m/TST/i || $op =~ m/TEQ/i || $op =~ m/MUL/i || $op =~ m/MLA/i
        || $op =~ m/SWI/i || $op =~ m/B/i || $op =~ m/BL/i
        || $op =~ m/LDR/i || $op =~ m/STR/i || $op =~ m/LDM/i || $op =~ m/STM/i) {
      $opcode = $op; $ccode = ""; $sfx1 = $suffix1;
    }
  }
  elsif (m/^\s*(\.[a-zA-Z0-9_%]+\s+)?FN(mov|add|adr|check|dhex|dlhex|dlmess|dlstring|dmess|dstring|exit|exitS|exitVS|exitVC|exit_specific|pull_env|pull_env_specific|pull_and_return|pull_and_return_VC|pull_and_returnVS|pull_and_returnCC|pull_and_returnCS|return|returnVC|returnVS|returnCC|returnCS|returnEQ|returnNE|returnVCCC|returnVCCS|returnVSCS|returnVSCC|returnCCEQ|returnCCNE|clearVi|setVi|clearC|setC|push|pull|XRel|shade|unshade|select|deselect)\s*\(([EeNnVvMmPpCcAaNnHhLlGg][QqEeSsCcIiLlVvTtOo])(,.+)?\)/) {
    $opcode = $2;     # "pseudo"-opcode
    my $cond = $3;
    if ($cond =~ m/EQ/i || $cond =~ m/NE/i || $cond =~ m/HI/i || $cond =~ m/LS/i || $cond =~ m/GT/i
        || $cond =~ m/LE/i) { $Z_used = 1; }
    if ($cond =~ m/VS/i || $cond =~ m/VC/i || $cond =~ m/GE/i || $cond =~ m/LT/i || $cond =~ m/GT/i
        || $cond =~ m/LE/i) { $V_used = 1; }
    if ($cond =~ m/MI/i || $cond =~ m/PL/i || $cond =~ m/GE/i || $cond =~ m/LT/i || $cond =~ m/GT/i
        || $cond =~ m/LE/i) { $N_used = 1; }
    if ($cond =~ m/CS/i || $cond =~ m/CC/i || $cond =~ m/HI/i || $cond =~ m/LS/i || $cond =~ m/HS/i || $cond =~ m/LO/i) { $C_used = 1; }
    if ($cond !~ m/AL/i && $cond !~ m/NV/i && !$C_used && !$N_used && !$Z_used && !$V_used) {
      print STDERR "Invalid condition code $cond (line $line)\n";
    }
    else {
      $ccode = $cond;
    }
    # print STDERR "$_ using ";
    # print STDERR "Z" if ($Z_used);
    # print STDERR "C" if ($C_used);
    # print STDERR "V" if ($V_used);
    # print STDERR "N" if ($N_used);
    # print STDERR "\n";
  }
  elsif (m/^\s*(\.[a-zA-Z0-9_%]+\s+)?FN([a-zA-Z%]+)\s*\(([EeNnVvMmPpCcAaNnHhLlGg][QqEeSsCcIiLlVvTtOo])(,.+)?\)/) {
    print STDERR "Unknown conditional library function FN$2\n";
  }
  elsif (m/^\s*(\.[a-zA-Z0-9_%]+\s+)?FN(stptr|ldptr)/) {
    $opcode = $2;     # "pseudo"-opcode
    $ccode = "";
  }

  # Here, we have $opcode, $ccode and $sfx1 if the line is an instruction (else $opcode eq "")
  # if ($opcode ne "") { print STDERR "'$opcode' '$ccode' '$sfx1'\n"; }

  # OK, at this stage, we know whether the current instruction depends on any of the flags
  # If this flag is hot, print a warning
  if ($C_used && $C_hot || $Z_used && $Z_hot || $V_used && $V_hot || $N_used && $N_hot) {
    # print a warning unless this line is explicitly documented as correct!
    print STDERR "Warning: Flags used after BL or SWI (line $line after $last_label)\n" unless m/32BitCorrect/i;
    print STDERR "$_\n "unless m/32BitCorrect/i;
  }

  # Now, find out whether any of the flags are changed by this instruction
  if (($opcode =~ m/CMP/i || $opcode =~ m/CMN/i)
      || (($opcode =~ m/ADD/i || $opcode =~ m/ADC/i || $opcode =~ m/SUB/i || $opcode =~ m/SBC/i
      || $opcode =~ m/RSB/i || $opcode =~ m/RSC/i) && $sfx1 =~ m/S/i)) {
    # CMP, CMN and arithmetic instructions with the S bit modify all flags
    # print STDERR "Clearing flags after $_\n";
    $C_hot = 0; $N_hot = 0; $Z_hot = 0; $V_hot = 0;
  }
  if (($opcode =~ m/TEQ/i || $opcode =~ m/TST/i)
      || ($opcode =~ m/MOV/i || $opcode =~ m/MVN/i || $opcode =~ m/AND/i || $opcode =~ m/ORR/i
      || $opcode =~ m/EOR/i || $opcode =~ m/BIC/i) && $sfx1 =~ m/S/i) {
    # TEQ, TST and logical instructions modify N and Z only unless there is a shift
    # print STDERR "Clearing N and Z after $_\n";
    $N_hot = 0; $Z_hot = 0;
    if (m/,LSL/i || m/,LSR/i || m/,ASR/i || m/ROR/i || m/RRX/i) {
      $C_hot = 0;
    }
  }
  if (($opcode =~ m/MUL/i || $opcode =~ m/MLA/i) && $sfx1 =~ m/S/i) {
    # print STDERR "Clearing N,C and Z after $_\n";
    $N_hot = 0; $Z_hot = 0; $C_hot = 0;
  }

  if ($opcode =~ m/SWI/i) {
    # NB. we do not mark V as hot because the following code can legally rely on it
    # print STDERR "Marking N,C and Z as hot after $_\n";
    $Z_hot = 1; $N_hot = 1; $C_hot = 1; $V_hot = 0;
  }

  if ($opcode =~ m/B/i) {
    if ($ccode eq "" or $ccode =~ m/AL/i) {
      # an unconditional jump, so the flags are not hot any more
      $C_hot = 0; $N_hot = 0; $Z_hot = 0; $V_hot = 0;
    }
  }

  if ($opcode =~ m/BL/i) {
    # all flags are hot except those passed back by the routine
    my $routine;
    if (m/^\s*(\.[a-zA-Z0-9_%]+\s+)?[bB][lL]([EeNnVvMmPpCcAaNnHhLlGg][QqEeSsCcIiLlVvTtOo])?\s+([a-zA-Z0-9_%]+)/) {
      $routine = $3;
    }
    else {
      print STDERR "Warning: Could not parse BL operand in '$_'\n";
      $routine = "";
    }
    # print STDERR "Marking flags as hot after calling $routine\n";
    $Z_hot = 1; $N_hot = 1; $C_hot = 1; $V_hot = 1;
    my $modflags = $flags{$routine};
    if ($modflags) {
      $Z_hot = 0 if $modflags & $z_flag;
      $C_hot = 0 if $modflags & $c_flag;
      $V_hot = 0 if $modflags & $v_flag;
      $N_hot = 0 if $modflags & $n_flag;
      # print STDERR "Ignoring ";
      # print STDERR "C" if $modflags & $c_flag;
      # print STDERR "Z" if $modflags & $z_flag;
      # print STDERR "V" if $modflags & $v_flag;
      # print STDERR "N" if $modflags & $n_flag;
      # print STDERR " flag(s) after calling $routine\n";
    }
  }

  if ($treat_fnstldptr) {
    if ($opcode eq "stptr" || $opcode eq "ldptr") {
      # this macro involves a call, so all flags are hot!
      $Z_hot = 1; $N_hot = 1; $C_hot = 1; $V_hot = 1;
    }
  }

  if ($opcode eq "shade" || $opcode eq "unshade"
      || $opcode eq "select" || $opcode eq "deselect") {
    # these macros involve a call, so all flags are hot!
    $Z_hot = 1; $N_hot = 1; $C_hot = 1; $V_hot = 1;
  }

  if ($opcode eq "check") {
    # this macro involves a call, so all flags are hot - except Z which
    # is returned!
    $Z_hot = 0; $N_hot = 1; $C_hot = 1; $V_hot = 1;
  }

  if (m/([mM][oO][vV]|[bB][iI][cC]|[oO][rR][sS])[sS]\s+[pP][cC],\s*/
      || m/[lL][dD][mM][fFiI][dDaA]\s+.*\{.*[pP][cC]\s*}\^/) {
    # unconditional control flow transfer - all flags are cleared
    # print STDERR "Clearing flags after $_\n";
    $C_hot = 0; $N_hot = 0; $Z_hot = 0; $V_hot = 0;
  }

  } # if ($process_code)

  # Replace non-32-bit-safe instructions (and convert to FNentry/exit)

  if (!m/32BitNoEntry/) {
    # the keyword 32BitNoEntry can be used to mark a line that should not
    # be treated as a function entry - this keyword is not present on this
    # line, so try and substitute stack instructions pushing R14
    if (s/[sS][tT][mM][FfDd][DdBb]\s+(([Rr]|)13|[sS][pP])\s*!\s*,\s*\{\s*(([rR]|)14|[lL][rRkK])\s*}/FNentry("")/) {
      $entry = "";
    }
    if (s/[sS][tT][mM][FfDd][DdBb]\s+(([Rr]|)13|[sS][pP])\s*!\s*,\s*\{([\-Rr0-9, ]+),\s*(([rR]|)14|[lL][rRkK])\s*}/FNentry("$3")/) {
      $entry = $3;
    }
  }
  if (!m/32BitNoExit/) {
    # the keyword 32BitNoExit can be used to mark a line that should not
    # be replaced by an return/exit macro - this keyword is not present,
    # so try and replace instructions
    if (m/[lL][dD][mM]([a-zA-Z][a-zA-Z]|)[FfIi][DdAa]\s+(([Rr]|)13|[sS][pP])\s*!\s*,\s*\{(([\-Rr0-9, ]+),|)\s*([rR]15|[pP][cC])\s*}\s*\^?/) {
      my $cond = $1;
      my $list = "";
      # $4 = register list including trailing comma or empty
      # $5 = register list excluding trailing comma
      $list = $5 if $4 ne "";
      $list = "" if $list =~ /^\s+$/;
      $cond = "al" if $cond eq "";
      # compare with the current entry
      if ($list eq "$entry") {
        $rep = "FNexit($cond)";
      }
      else {
        $rep = "FNexit_specific($cond,\"$list\")";
      }
      s/[lL][dD][mM]([a-zA-Z][a-zA-Z]|)[FfIi][DdAa]\s+(([Rr]|)13|[sS][pP])\s*!\s*,\s*\{(([\-Rr0-9, ]+),|)\s*([rR]15|[pP][cC])\s*}\s*\^?/$rep/;
    }
    if (m/[lL][dD][mM]([a-zA-Z][a-zA-Z]|)[FfIi][DdAa]\s+(([Rr]|)13|[sS][pP])\s*!\s*,\s*\{(([\-Rr0-9, ]+),|)\s*(([rR]|)14|[lL][rRkK])\s*}/) {
      my $cond = $1;
      my $list = "";
      $list = $5 if $4 ne "";
      $cond = "al" if $cond eq "";
      # compare with the current entry
      if ($list eq "$entry") {
        $rep = "FNpull_env($cond)";
      }
      else {
        $rep = "FNpull_env_specific($cond,\"$list\")";
      }
      $pull_env_list = $list;
      $pull_env_cond = $cond;
      s/[lL][dD][mM]([a-zA-Z][a-zA-Z]|)[FfIi][DdAa]\s+(([Rr]|)13|[sS][pP])\s*!\s*,\s*\{(([\-Rr0-9, ]+),|)\s*(([rR]|)14|[lL][rRkK])\s*}/$rep/;
      $last_was_pull_env = 1;
    }
    if (m/[mM][oO][vV]([a-zA-Z][a-zA-Z]|)[sS]\s+[pP][cC]\s*,\s*(([rR]|)14|[lL][rRkK])/) {
      my $cond = $1;
      $cond = "al" if $cond eq "";
      $rep="FNreturn($cond)";
      s/[mM][oO][vV]([a-zA-Z][a-zA-Z]|)[sS]\s+[pP][cC]\s*,\s*(([rR]|)14|[lL][rRkK])/$rep/;
    }
    if (m/[bB][iI][cC]([a-zA-Z][a-zA-Z]|)[sS]\s+[pP][cC]\s*,\s*(([rR]|)14|[lL][rRkK])\s*,\s*(#&10000000|#SR_V)/) {
      my $cond = $1;
      $cond = "al" if $cond eq "";
      if ($prev_was_pull_env && $cond eq $pull_env_cond && $pull_env_list eq "") {
        $rep = "FNpull_and_returnVC($cond)";
        $ignore = 1;    # ignore the previous line
      }
      else {
        $rep = "FNreturnVC($cond)";
      }
      s/[bB][iI][cC]([a-zA-Z][a-zA-Z]|)[sS]\s+[pP][cC]\s*,\s*(([rR]|)14|[lL][rRkK])\s*,\s*(#&10000000|#SR_V)/$rep/;
    }
    if (m/[oO][rR][rR]([a-zA-Z][a-zA-Z]|)[sS]\s+[pP][cC]\s*,\s*(([rR]|)14|[lL][rRkK])\s*,\s*(#&10000000|#SR_V)/) {
      my $cond = $1;
      $cond = "al" if $cond eq "";
      if ($prev_was_pull_env && $cond eq $pull_env_cond && $pull_env_list eq "") {
        $rep = "FNpull_and_returnVS($cond)";
        $ignore = 1;    # ignore the previous line
      }
      else {
        $rep = "FNreturnVS($cond)";
      }
      s/[oO][rR][rR]([a-zA-Z][a-zA-Z]|)[sS]\s+[pP][cC]\s*,\s*(([rR]|)14|[lL][rRkK])\s*,\s*(#&10000000|#SR_V)/$rep/;
    }
    if (m/[bB][iI][cC]([a-zA-Z][a-zA-Z]|)[sS]\s+[pP][cC]\s*,\s*(([rR]|)14|[lL][rRkK])\s*,\s*(#&20000000|#SR_C)/) {
      my $cond = $1;
      $cond = "al" if $cond eq "";
      if ($prev_was_pull_env && $cond eq $pull_env_cond && $pull_env_list eq "") {
        $rep = "FNpull_and_returnCC($cond)";
        $ignore = 1;    # ignore the previous line
      }
      else {
        $rep = "FNreturnCC($cond)";
      }
      s/[bB][iI][cC]([a-zA-Z][a-zA-Z]|)[sS]\s+[pP][cC]\s*,\s*(([rR]|)14|[lL][rRkK])\s*,\s*(#&20000000|#SR_C)/$rep/;
    }
    if (m/[oO][rR][rR]([a-zA-Z][a-zA-Z]|)[sS]\s+[pP][cC]\s*,\s*(([rR]|)14|[lL][rRkK])\s*,\s*(#&20000000|#SR_C)/) {
      my $cond = $1;
      $cond = "al" if $cond eq "";
      if ($prev_was_pull_env && $cond eq $pull_env_cond && $pull_env_list eq "") {
        $rep = "FNpull_and_returnCS($cond)";
        $ignore = 1;    # ignore the previous line
      }
      else {
        $rep = "FNreturnCS($cond)";
      }
      s/[oO][rR][rR]([a-zA-Z][a-zA-Z]|)[sS]\s+[pP][cC]\s*,\s*(([rR]|)14|[lL][rRkK])\s*,\s*(#&20000000|#SR_C)/$rep/;
    }
    if (m/[bB][iI][cC]([a-zA-Z][a-zA-Z]|)[sS]\s+[pP][cC]\s*,\s*(([rR]|)14|[lL][rRkK])\s*,\s*(#&40000000|#SR_Z)/) {
      my $cond = $1;
      $cond = "al" if $cond eq "";
      if ($prev_was_pull_env && $cond eq $pull_env_cond && $pull_env_list eq "") {
        $rep = "FNpull_and_returnNE($cond)";
        $ignore = 1;    # ignore the previous line
      }
      else {
        $rep = "FNreturnNE($cond)";
      }
      s/[bB][iI][cC]([a-zA-Z][a-zA-Z]|)[sS]\s+[pP][cC]\s*,\s*(([rR]|)14|[lL][rRkK])\s*,\s*(#&40000000|#SR_Z)/$rep/;
    }
    if (m/[oO][rR][rR]([a-zA-Z][a-zA-Z]|)[sS]\s+[pP][cC]\s*,\s*(([rR]|)14|[lL][rRkK])\s*,\s*(#&40000000|#SR_Z)/) {
      my $cond = $1;
      $cond = "al" if $cond eq "";
      if ($prev_was_pull_env && $cond eq $pull_env_cond && $pull_env_list eq "") {
        $rep = "FNpull_and_returnEQ($cond)";
        $ignore = 1;    # ignore the previous line
      }
      else {
        $rep = "FNreturnEQ($cond)";
      }
      s/[oO][rR][rR]([a-zA-Z][a-zA-Z]|)[sS]\s+[pP][cC]\s*,\s*(([rR]|)14|[lL][rRkK])\s*,\s*(#&40000000|#SR_Z)/$rep/;
    }
    if (m/([oO][rR][rR]|[bB][iI][cC])([a-zA-Z][a-zA-Z]|)[sS]\s+[pP][cC]\s*,\s*(([rR]|)14|[lL][rRkK])\s*,/) {
      print STDERR "Warning: Unrecognized flag return in (line $line after $last_label)\n";
      print STDERR "$_\n";
    }
    if (m/([oO][rR][rR]|[bB][iI][cC])([a-zA-Z][a-zA-Z]|)([sS]|)\s+(([rR]|)14|[lL][rRkK])\s*,\s*(([rR]|)14|[lL][rRkK])\s*,\s*#(SR_|&[0-9A-F][0-9A-F]00000[0-3])/) {
      print STDERR "Warning: Return flags manipulation in (line $line after $last_label)\n";
      print STDERR "$_\n";
    }
    if (m/[lL][dD][mM].*\{.*([Rr]15|[pP][cC])\s*}\s*\^/) {
      print STDERR "Warning: LDM pc^ in (line $line after $last_label)\n";
      print STDERR "$_\n";
    }
  }

  if ($line && not $ignore) { print OUTFILE $last_line,"\n"; }
  $last_line = $_;
  $ignore = 0;
}
if ($line && not $ignore) { print OUTFILE $last_line,"\n"; }

close(OUTFILE);
close(INFILE);
print STDERR "Done\n";
exit(0);

sub read_comments {
  # prescan to get the flags for routines
  # (read structured comments from routine headers)
  $in_exit_paragraph = 0;
  while(<INFILE>) {
    if (m/\s*IF NOT Fixes32Bit THEN$/) {
      my $rc_if_level = 1;
      while($rc_if_level > 0 && <INFILE>) {
        if (m/\s*IF\s.*THEN$/) { $rc_if_level++; }
        if (m/\s*ENDIF$/) { $rc_if_level--; }
      }
      return if $rc_if_level > 0;
      # read the next line
      return unless <INFILE>;
    }
    if (!$in_exit_paragraph) {
      if (m/^\s*;\s*([Oo][Nn]\s*|)[Ee]xit([.; ]|$)/
          || m/^\s*;\s*[Ee]rrors[.; ]/) {
        $in_exit_paragraph = 1;
        $label_found = 0;
        $current_flags = 0;
        $_ = ";".$';   # process the remainder of the line below!
      }
    }
    if ($in_exit_paragraph) {
      if (m/^\s*\.([a-zA-Z0-9_%]+)/) {
        $flags{$1} = $current_flags;
        $label_found = 1;
        if ($current_flags) {
          # print STDERR "Routine $1 returns ";
          # print STDERR "C" if $current_flags & $c_flag;
          # print STDERR "Z" if $current_flags & $z_flag;
          # print STDERR "V" if $current_flags & $v_flag;
          # print STDERR "N" if $current_flags & $n_flag;
          # print STDERR "\n";
        }
      }
      elsif ($label_found) {
        # we allow multiple labels, but as soon as we have found one
        # which was not followed by another label, the sequence has
        # ended - this means that an empty line between the labels
        # already terminates the association!
        $in_exit_paragraph = 0;
      }
      $current_flags |= $c_flag if (m/^\s*;.*[^a-zA-Z][Cc][CcSs][^a-zA-Z]/);
      $current_flags |= $c_flag if (m/^\s*;.*[^a-zA-Z]SR_C[^a-zA-Z]/);
      $current_flags |= $v_flag if (m/^\s*;.*[^a-zA-Z][Vv][CcSs][^a-zA-Z]/);
      $current_flags |= $v_flag if (m/^\s*;.*[^a-zA-Z]SR_V[^a-zA-Z]/);
      $current_flags |= $z_flag if (m/^\s*;.*[^a-zA-Z](EQ|NE|eq|ne)[^a-zA-Z]/);
      $current_flags |= $z_flag if (m/^\s*;.*[^a-zA-Z]SR_Z[^a-zA-Z]/);
      $current_flags |= $n_flag if (m/^\s*;.*[^a-zA-Z](PL|MI|pl|mi)[^a-zA-Z]/);
      $current_flags |= $n_flag if (m/^\s*;.*[^a-zA-Z]SR_N[^a-zA-Z]/);
    }
  }
}
