#! /usr/bin/perl

# Copyright (c) 2021-2022, PostgreSQL Global Development Group

#
# This script looks for symbols that are referenced in #ifdef or defined()
# tests without having #include'd the file that defines them.  Since this
# situation won't necessarily lead to any compiler message, it seems worth
# having an automated check for it.  In particular, use this to audit the
# results of pgrminclude!
#
# Usage: configure and build a PG source tree (non-VPATH), then start this
# script at the top level.  It's best to enable as many configure options
# as you can, especially --enable-cassert which is known to affect include
# requirements.  NB: you MUST use gcc, unless you have another compiler that
# can be persuaded to spit out the names of referenced include files.
#
# The results are necessarily platform-dependent, so use care in interpreting
# them.  We try to process all .c files, even those not intended for the
# current platform, so there will be some phony failures.
#
# src/tools/pginclude/pgcheckdefines
#

use strict;
use warnings;

use Cwd;
use File::Basename;

my $topdir = cwd();

# Programs to use
my $FIND = "find";
my $MAKE = "make";

#
# Build arrays of all the .c and .h files in the tree
#
# We ignore .h files under src/include/port/, since only the one exposed as
# src/include/port.h is interesting.  (XXX Windows ports have additional
# files there?)  Ditto for .h files in src/backend/port/ subdirectories.
# Including these .h files would clutter the list of define'd symbols and
# cause a lot of false-positive results.
#
my (@cfiles, @hfiles);

open my $pipe, '-|', "$FIND * -type f -name '*.c'"
  or die "can't fork: $!";
while (<$pipe>)
{
	chomp;
	push @cfiles, $_;
}
close $pipe or die "$FIND failed: $!";

open $pipe, '-|', "$FIND * -type f -name '*.h'"
  or die "can't fork: $!";
while (<$pipe>)
{
	chomp;
	push @hfiles, $_
	  unless m|^src/include/port/|
	  || m|^src/backend/port/\w+/|;
}
close $pipe or die "$FIND failed: $!";

#
# For each .h file, extract all the symbols it #define's, and add them to
# a hash table.  To cover the possibility of multiple .h files defining
# the same symbol, we make each hash entry a hash of filenames.
#
my %defines;

foreach my $hfile (@hfiles)
{
	open my $fh, '<', $hfile
	  or die "can't open $hfile: $!";
	while (<$fh>)
	{
		if (m/^\s*#\s*define\s+(\w+)/)
		{
			$defines{$1}{$hfile} = 1;
		}
	}
	close $fh;
}

#
# For each file (both .h and .c), run the compiler to get a list of what
# files it #include's.  Then extract all the symbols it tests for defined-ness,
# and check each one against the previously built hashtable.
#
foreach my $file (@hfiles, @cfiles)
{
	my ($fname, $fpath) = fileparse($file);
	chdir $fpath or die "can't chdir to $fpath: $!";

	#
	# Ask 'make' to parse the makefile so we can get the correct flags to
	# use.  CPPFLAGS in particular varies for each subdirectory.  If we are
	# processing a .h file, we might be in a subdirectory that has no
	# Makefile, in which case we have to fake it.  Note that there seems
	# no easy way to prevent make from recursing into subdirectories and
	# hence printing multiple definitions --- we keep the last one, which
	# should come from the current Makefile.
	#
	my $MAKECMD;

	if (-f "Makefile" || -f "GNUmakefile")
	{
		$MAKECMD = "$MAKE -qp";
	}
	else
	{
		my $subdir = $fpath;
		chop $subdir;
		my $top_builddir = "..";
		my $tmp          = $fpath;
		while (($tmp = dirname($tmp)) ne '.')
		{
			$top_builddir = $top_builddir . "/..";
		}
		$MAKECMD =
		  "$MAKE -qp 'subdir=$subdir' 'top_builddir=$top_builddir' -f '$top_builddir/src/Makefile.global'";
	}

	my ($CPPFLAGS, $CFLAGS, $CFLAGS_SL, $PTHREAD_CFLAGS, $CC);

	open $pipe, '-|', "$MAKECMD"
	  or die "can't fork: $!";
	while (<$pipe>)
	{
		if (m/^CPPFLAGS :?= (.*)/)
		{
			$CPPFLAGS = $1;
		}
		elsif (m/^CFLAGS :?= (.*)/)
		{
			$CFLAGS = $1;
		}
		elsif (m/^CFLAGS_SL :?= (.*)/)
		{
			$CFLAGS_SL = $1;
		}
		elsif (m/^PTHREAD_CFLAGS :?= (.*)/)
		{
			$PTHREAD_CFLAGS = $1;
		}
		elsif (m/^CC :?= (.*)/)
		{
			$CC = $1;
		}
	}

	# If make exits with status 1, it's not an error, it just means make
	# thinks some files may not be up-to-date.  Only complain on status 2.
	close PIPE;
	die "$MAKE failed in $fpath\n" if $? != 0 && $? != 256;

	# Expand out stuff that might be referenced in CFLAGS
	$CFLAGS =~ s/\$\(CFLAGS_SL\)/$CFLAGS_SL/;
	$CFLAGS =~ s/\$\(PTHREAD_CFLAGS\)/$PTHREAD_CFLAGS/;

	#
	# Run the compiler (which had better be gcc) to get the inclusions.
	# "gcc -H" reports inclusions on stderr as "... filename" where the
	# number of dots varies according to nesting depth.
	#
	my @includes = ();
	my $COMPILE  = "$CC $CPPFLAGS $CFLAGS -H -E $fname";
	open $pipe, '-|', "$COMPILE 2>&1 >/dev/null"
	  or die "can't fork: $!";
	while (<$pipe>)
	{
		if (m/^\.+ (.*)/)
		{
			my $include = $1;

			# Ignore system headers (absolute paths); but complain if a
			# .c file includes a system header before any PG header.
			if ($include =~ m|^/|)
			{
				warn "$file includes $include before any Postgres inclusion\n"
				  if $#includes == -1 && $file =~ m/\.c$/;
				next;
			}

			# Strip any "./" (assume this appears only at front)
			$include =~ s|^\./||;

			# Make path relative to top of tree
			my $ipath = $fpath;
			while ($include =~ s|^\.\./||)
			{
				$ipath = dirname($ipath) . "/";
			}
			$ipath =~ s|^\./||;
			push @includes, $ipath . $include;
		}
		else
		{
			warn "$CC: $_";
		}
	}

	# The compiler might fail, particularly if we are checking a file that's
	# not supposed to be compiled at all on the current platform, so don't
	# quit on nonzero status.
	close PIPE or warn "$COMPILE failed in $fpath\n";

	#
	# Scan the file to find #ifdef, #ifndef, and #if defined() constructs
	# We assume #ifdef isn't continued across lines, and that defined(foo)
	# isn't split across lines either
	#
	open my $fh, '<', $fname
	  or die "can't open $file: $!";
	my $inif = 0;
	while (<$fh>)
	{
		my $line = $_;
		if ($line =~ m/^\s*#\s*ifdef\s+(\w+)/)
		{
			checkit($file, $1, @includes);
		}
		if ($line =~ m/^\s*#\s*ifndef\s+(\w+)/)
		{
			checkit($file, $1, @includes);
		}
		if ($line =~ m/^\s*#\s*if\s+/)
		{
			$inif = 1;
		}
		if ($inif)
		{
			while ($line =~ s/\bdefined(\s+|\s*\(\s*)(\w+)//)
			{
				checkit($file, $2, @includes);
			}
			if (!($line =~ m/\\$/))
			{
				$inif = 0;
			}
		}
	}
	close $fh;

	chdir $topdir or die "can't chdir to $topdir: $!";
}

exit 0;

# Check an is-defined reference
sub checkit
{
	my ($file, $symbol, @includes) = @_;

	# Ignore if symbol isn't defined in any PG include files
	if (!defined $defines{$symbol})
	{
		return;
	}

	#
	# Try to match source(s) of symbol to the inclusions of the current file
	# (including itself).  We consider it OK if any one matches.
	#
	# Note: these tests aren't bulletproof; in theory the inclusion might
	# occur after the use of the symbol.  Given our normal file layout,
	# however, the risk is minimal.
	#
	foreach my $deffile (keys %{ $defines{$symbol} })
	{
		return if $deffile eq $file;
		foreach my $reffile (@includes)
		{
			return if $deffile eq $reffile;
		}
	}

	#
	# If current file is a .h file, it's OK for it to assume that one of the
	# base headers (postgres.h or postgres_fe.h) has been included.
	#
	if ($file =~ m/\.h$/)
	{
		foreach my $deffile (keys %{ $defines{$symbol} })
		{
			return if $deffile eq 'src/include/c.h';
			return if $deffile eq 'src/include/postgres.h';
			return if $deffile eq 'src/include/postgres_fe.h';
			return if $deffile eq 'src/include/pg_config.h';
			return if $deffile eq 'src/include/pg_config_manual.h';
		}
	}

	#
	my @places = keys %{ $defines{$symbol} };
	print "$file references $symbol, defined in @places\n";

	# print "includes: @includes\n";

	return;
}
