#!/usr/bin/perl -w # # Partial Preprocessor # # This program processes C preprocessor conditionals, eliminating # conditions known to be true or false. This can be used to simplify # #ifdef-ridden code. This program attempts to be smart about complex # conditions. For example, # #if defined(foo) && defined(bar) || defined(huh) # is replaced by # #if defined(huh) # when we know that foo is defined and bar is not. For another example, # partpp -U__WIN32__ # would eliminate all Win32-specific code from a given source code. # # Parameters: # -Dsym we know that sym is defined # -Usym we know that sym is not defined # -C assume that user wrote "clean" code, that is, all # object-like macros behave as objects # -d process #define/#undef directives in order to # keep our dictionary up-to-date. # -f force simplify expressions even when they don't contain # any of our symbols # -eexpr evaluate expression and print simplified version # # The -C option deserves explanation: assume the following: # #define X 0) - (1 # #if (defined(foo) || X) # Assuming foo is defined, the #if expression would be equivalent to # #if (1 || X) # which is at first sight equivalent to # #if (X) # which is equal to "#if -1" i.e. true. However, the original # expression is "#if (1 || 0) - (1)" which is false. To guarantee # correctness, we assume that this can happen, and pass through # expressions containing "raw" macro names unchanged. With -C, we # assume user doesn't write such hacks (that is, all non-function # macros are properly parenthesized expressions), and would replace # the starting expression with "(X)". # # With -f, this program simplifies expressions even if they don't # contain one of our symbols. For example, with -f, # #if 1 || 2 # is simplified to "#if 1". Because partpp computes in decimal (and # thus makes code containing hex/oct computations less readable), # this is normally not done on unrelated expressions. # # Bugs: # - when we modify a conditional line, comments get lost # - results may be incorrect if an #include file #undef's or # #define's a symbol passed in by -D/-U. The -d option may make # this even worse. # - #elif is not supported # - `#if 0' and `#if 1' are eliminated even when -f is not specified # # (c) 2002 by Stefan Reuther # use strict; use vars qw{ %defined @files $opt_optimistic $opt_defines @exprs $opt_force $seen_defined }; $opt_optimistic = 0; $opt_defines = 0; $opt_force = 0; my $allow_cc = 0; foreach (@ARGV) { if (/^--begin$/) { $allow_cc = 1; } elsif (/^--end$/) { $allow_cc = 0; } elsif (/^-D(.+)/) { $defined{$1} = 1; } elsif (/^-U(.+)/) { $defined{$1} = 0; } elsif (!$allow_cc && /^-C$/) { $opt_optimistic = 1; } elsif (!$allow_cc && /^-d$/) { $opt_defines = 1; } elsif (!$allow_cc && /^-f$/) { $opt_force = 1; } elsif (!$allow_cc && /^-e(.*)/) { push @exprs, $1; } elsif (/^-h$/ || /^--?help$/) { print "$0 - partial preprocessor usage: $0 [-Dsym...] [-Usym...] [-C] [-d] [-f] [-h|--help] [files...] -Dsym define a symbol -Usym undefine a symbol -C assume 'clean' code (detailed info in source code) -d process #define/#undef lines -f force simplify C compiler options can be passed in --begin --end, such as $0 --begin -Dsym -Dsym -Idir -Ldir -llib --end file.c \(this program will then ignore options it doesn't know). If no files specified, processes stdin. Output always goes to stdout. \(c) 2002 Stefan Reuther .\n"; exit 0; } else { if ($_ =~ /^-/ && $_ ne '-') { die "bad parameter `$_'\n" unless $allow_cc; } else { push @files, $_; } } } push @files, "-" if !@files && !@exprs; my $exit = 0; foreach (@exprs) { print $_, ":\n- ", eval_expr($_), "\n"; } foreach (@files) { if ($_ eq '-') { process (\*STDIN, \*STDOUT); } else { open IN, "< $_" or do { print STDERR "$_: $!\n"; $exit=1; next; }; process (\*IN, \*STDOUT); close IN; } } exit $exit; ################################# process ################################# # process (infd, outfd) # - process one file. This is the engine of the program. sub process { my ($in, $out) = @_; my $total; my @ifstack; my $out_enabled = 1; # line loop LINE: while (1) { # read one "virtual" line $total = ""; do { my $line = <$in>; if (!defined $line) { last LINE if $total eq ''; die "$_:$.: unterminated comment\n"; } while ($line =~ /\\\n$/) { my $next = <$in>; die "$_:$.: unexpected end of file\n" if not defined $next; $line .= $next; } $total .= $line; } while (! is_complete($total)); # remove comments to see if it's a pp command my $single_line = $total; $single_line =~ s/\\\n//sg; my $command = ""; while (1) { last if $single_line =~ m/\G$/sgc; if ($single_line =~ m{\G([^\"\'/]+)}sgc || $single_line =~ m{\G('([^\'\\]|\\.)*')}sgc || $single_line =~ m{\G("([^\"\\]|\\.)*")}sgc || $single_line =~ m{\G(/[^/*])}sgc) { $command .= $1; next; } if ($single_line =~ m{\G//.*}sgc || $single_line =~ m{\G/\*.*?\*/}sgc) { $command .= " "; next; } die "huh?"; } if ($command =~ /^\s*\#\s*([a-z]+)\s*(.*)/) { # pp command my ($cm, $arg) = ($1, trim($2)); if ($cm eq 'ifdef') { if ($out_enabled) { if (exists $defined{$arg}) { $out_enabled = $defined{$arg}; unshift @ifstack, $out_enabled; } else { unshift @ifstack, 'unknown'; print $out $total; } } else { unshift @ifstack, 1; } } elsif ($cm eq 'ifndef') { if ($out_enabled) { if (defined $defined{$arg}) { $out_enabled = 1 - $defined{$arg}; unshift @ifstack, $out_enabled; } else { unshift @ifstack, 'unknown'; print $out $total; } } else { unshift @ifstack, 1; } } elsif ($cm eq 'if') { if ($out_enabled) { my $result = trim(eval_expr($arg)); if ($result !~ /\D/) { $out_enabled = ($result ? "1" : "0"); unshift @ifstack, $out_enabled; } else { unshift @ifstack, 'unknown'; print $out "#if $result\n"; } } else { unshift @ifstack, 1; } } elsif ($cm eq 'elif') { die "#elif is not yet implemented"; } elsif ($cm eq 'else') { die "#else without #if" if ! @ifstack; if ($out_enabled) { # output can be enabled if a) we don't know or b) we # know our condition is true if ($ifstack[0] eq '1') { $ifstack[0] = '0'; $out_enabled = 0; } else { print $out $total; } } else { if ($ifstack[0] eq '0') { $ifstack[0] = '1'; $out_enabled = check_out(@ifstack); } } } elsif ($cm eq 'endif') { die "#endif without #if" if ! @ifstack; my $why = shift @ifstack; $out_enabled = check_out (@ifstack); if ($out_enabled && ($why ne '0' && $why ne '1')) { print $out $total; } } else { if ($cm eq 'define') { if ($out_enabled && $opt_defines && $arg =~ m{^(\w+)}) { $defined{$1} = 1; } } elsif ($cm eq 'undef') { if ($out_enabled && $opt_defines && $arg =~ m{^(\w+)}) { $defined{$1} = 0; } } elsif ($cm ne 'include' && $cm ne 'pragma' && $cm ne 'error') { die "$_:$.: unknown preprocessor command '$cm'\n"; } print $out $total if $out_enabled; } } else { print $out $total if $out_enabled; } } printf "$_:$.: %d unterminated conditional(s) still open\n", scalar @ifstack if @ifstack; } # true iff $_[0] is a complete line. Complete lines may not contain # partial comments. sub is_complete { my $line = shift; $line =~ s/\\\n//sg; while (1) { last if $line =~ /\G$/sgc; next if ($line =~ m{\G[^\"\'/]+}sgc || $line =~ m{\G'([^\'\\]|\\.)*'}sgc || $line =~ m{\G"([^\"\\]|\\.)*"}sgc || $line =~ m{\G/[^/*]}sgc || $line =~ m{\G//.*}sgc || $line =~ m{\G/\*.*?\*/}sgc); die "$_:$.: literal across lines\n" if ($line =~ m{\G[\"\']}sgc); # it is a comment return 0; } return 1; } # Given a list of status tokens ('0', '1', 'anything else'), decide # whether we can safely suppress output. sub check_out { foreach (@_) { if ($_ eq '0') { return 0; } } return 1; } # Remove leading and trailing whitespace. sub trim { my $x = shift; $x =~ s/^\s+//s; $x =~ s/\s+$//s; $x; } # Given an expression, return simplified version thereof (i.e. # defined(X) expanded if known). sub eval_expr { local $_ = shift; local $seen_defined = $opt_force; my $result = eval_cond(); return $_ if !/\G\s*$/sgc || $result =~ m{/\*ERROR\*/} || !$seen_defined; $result; } # Parse conditional-expression production. We process these, so # "#if defined(A) ? defined(B) : 0" works. sub eval_cond { my $a = eval_or(); if (m{\G\s*\?}sgc) { # conditional operator my $lhs = eval_cond(); return "/*ERROR*/" if not /\G\s*:/sgc; my $rhs = eval_cond(); if ($a =~ /\D/) { return "$a ? $lhs : $rhs"; } else { return $a ? $lhs : $rhs; } } else { return $a; } } # Parse logical-or-expression. Evaluates to "1" if one part known to # be defined, "0" if all parts known to be undefined, "||" of all # unknown terms else. sub eval_or { my @terms; my @all; my $true = 0; do { my $a = eval_and(); push @all, $a; if ($a =~ /\D/) { push @terms, $a; } else { $true ||= $a+0; } } while (m{\G\s*\|\|}sgc); return (@all == 1 ? $all[0] : $true ? "1" : !@terms ? "0" : @terms == 1 ? "0 || $terms[0]" : join " || ", @terms); } # Parse logical-and-expression. Evaluates to "0" if one part known to # be undefined, "1" if all parts known to be defined, "&&" of all # unknown terms else. sub eval_and { my @terms; my @all; my $true = 1; do { my $a = eval_bitor(); push @all, $a; if ($a =~ /\D/) { push @terms, $a; } else { $true &&= $a+0; } } while (m{\G\s*\&\&}sgc); return (@all == 1 ? $all[0] : !$true ? "0" : !@terms ? "1" : @terms == 1 ? "1 && $terms[0]" : join " && ", @terms); } # Parse inclusive-or-expression. This combines the known terms to one, # that is, `#if 1 + (2 | 3)' yields `#if 1 + 3'. sub eval_bitor { my @terms; my $value = 0; do { my $a = eval_bitxor(); if ($a =~ /\D/) { push @terms, $a; } else { $value |= $a+0; } } while (m{\G\s*\|(?=$|[^|])}sgc); push @terms, $value if $value; return (@terms ? join " | ", @terms : "0"); } # Parse exclusive-or-expression. This combines the known terms to one, # that is, `#if 1 + (2 ^ 3)' yields `#if 1 + 1'. sub eval_bitxor { my @terms; my $value = 0; do { my $a = eval_bitand(); if ($a =~ /\D/) { push @terms, $a; } else { $value ^= $a+0; } } while (m{\G\s*\^}sgc); push @terms, $value if $value; return (@terms ? join " ^ ", @terms : "0"); } # Parse and-expression. This combines the known terms to one, # that is, `#if 1 + (2 & 3)' yields `#if 1 + 2'. sub eval_bitand { my @terms; my $value = ~0; do { my $a = eval_eq(); if ($a =~ /\D/) { push @terms, $a; } else { $value &= $a+0; } } while (m{\G\s*\&(?=$|[^&])}sgc); push @terms, $value if $value != ~0; if ($value == 0) { return "0"; } else { return (@terms ? join " & ", @terms : "0"); } } # Parse equality-expression. Those are too seldom in conditionals # concerned with definedness, so we don't evaluate them. sub eval_eq { my $a = eval_rela(); while (m{\G\s*([=!]=)}sgc) { $a .= " $1 "; $a .= eval_rela(); } $a; } # Parse relational-expression. Those are too seldom in conditionals # concerned with definedness, so we don't evaluate them. sub eval_rela { my $a = eval_shift(); while (m{\G\s*([<>]=?)(?=[^<>])}sgc) { $a .= " $1 "; $a .= eval_shift(); } $a; } # Parse shift-expression. Those are too seldom in conditionals # concerned with definedness, so we don't evaluate them. sub eval_shift { my $a = eval_add(); while (m{\G\s*(<<|>>)}sgc) { $a .= " $1 "; $a .= eval_add(); } $a; } # Parse additive-expression. Those are too seldom in conditionals # concerned with definedness, so we don't evaluate them. sub eval_add { my $a = eval_mult(); while (m{\G\s*(\+|-)}sgc) { $a .= " $1 "; $a .= eval_mult(); } $a; } # Parse multiplicative-expression. Those are too seldom in conditionals # concerned with definedness, so we don't evaluate them. sub eval_mult { my $a = eval_unary(); while (m{\G\s*(\*|/|%)}sgc) { $a .= " $1 "; $a .= eval_unary(); } $a; } # Parse unary-expression. In preprocessor defines, this are: # - "-expr", "+expr", "!expr", "~expr" (handled because they're simple) # - "defined(SYM)", "defined SYM" (handled) # - numbers are converted to decimal # - parenthesized expressions sub eval_unary { m{\G\s*}sgc; if (m{\G!}sgc) { my $arg = eval_unary(); if ($arg =~ /\D/) { return "!$arg"; } else { return $arg ? "0" : "1"; } } elsif (m{\G~}sgc) { my $arg = eval_unary(); if ($arg =~ /\D/) { return "~$arg"; } else { return ~$arg; } } elsif (m{\G\+}sgc) { return eval_unary(); } elsif (m{\G\-}sgc) { my $arg = eval_unary(); if ($arg =~ /\D/) { return "- $arg"; } else { return -$arg; } } elsif (m{\Gdefined\s*\(\s*(\w+)\s*\)}sgc || m{\G\s*defined\s+(\w+)}sgc) { if (exists $defined{$1}) { $seen_defined = 1; return $defined{$1}; } else { return "defined($1)"; } } elsif (m{\G0[xX]([0-9a-fA-F]+)[uUlL]*}sgc) { return hex $1; } elsif (m{\G0([0-7]+)[uUlL]*}sgc) { return oct $1; } elsif (m{\G([0-9]+)[uUlL]*}sgc) { return $1; } elsif (m{\G(\w+)}sgc) { if (m{\G\s(?=\()}sgc) { # function-like macro return "/*ERROR*/"; } elsif (exists $defined{$1} && !$defined{$1}) { # object-like macro which is known to not be defined return "0"; } elsif ($opt_optimistic) { # object-like macro which is (maybe) defined return $1; } else { return "/*ERROR*/"; } } elsif (m{\G\(}sgc) { my $arg = eval_cond(); return "/*ERROR*/" if !m{\G\s*\)}sgc; if ($arg =~ /\D/) { return "($arg)"; } else { return $arg; } } else { return "/*ERROR*/"; } } sub debug { print "| ", shift, ":\n| $_\n| ", " " x pos($_), "^\n"; }