#!/usr/bin/env python3

# Python version issues? Try using build-tools/path/linux-x86/python3

import argparse
import collections
import filecmp
import multiprocessing
import os
import pathlib
import re
import sys
import textwrap

_SOURCE_RE = re.compile(r'.* := (?P<file>.*)\n?$')
_WILDCARD_RE = re.compile(r'\$\(wildcard (?P<file>[^\)]+)\)')

BuiltFilesResult = collections.namedtuple('BuiltFilesResult', ['files', 'src_dir'])

def parse_cmd_file(dotcmd, parse_deps=True):
  """
  Parse a .cmd file for the source files it used to build.

  The .cmd is a Makefile script generated by scripts/basic/fixdep.c and has
  following format:

  cmd_init/main.o := clang <......>

   source_init/main.o := <root_dir>/common/init/main.c

   deps_init/main.o := \
       $(wildcard include/config/INIT_ENV_ARG_LIMIT) \
       ...
       <root_dir>/include/common/include/linux/compiler-version.h \
       arch/arm64/include/generated/uapi/asm/sockios.h \
       ...
       <root_dir>/common/include/kunit/try-catch.h \

   init/main.o: $(deps_init/main.o)

   $(deps_init/main.o):

  We're interested in parsing source_ line to get the source file
  and all the items in deps_. For the sake of not missing anything, we pull out
  all the paths inside $(wildcard <path>) and non-absolute paths, which mostly
  appear to be generated in the output folder. We'll check if they really exist
  later and remove them from our list if not.
  """
  deps = set()
  source = None
  with dotcmd.open() as f:
    in_deps = False

    for line in f.readlines():
      if line.startswith('source_'):
        m = _SOURCE_RE.fullmatch(line)
        if m:
          source = os.path.normpath(m.group('file'))

      if parse_deps:
        if in_deps:
          m = _WILDCARD_RE.search(line)
          split = line.split()
          if m:
            deps.add(os.path.normpath(m.group('file')))
          elif len(split) > 0:
            deps.add(os.path.normpath(split[0]))

          if not split or split[-1] != '\\':
            in_deps = False
        if line.startswith('deps_'):
          in_deps = True
  return BuiltFilesResult(source, deps)

def find_source_dir(dir):
  """Guess the source directory for a build output folder.

  The list of files we are interested in should be relative to the kernel source
  directory. Dependencies listed by the .cmd files are a mix of absolute and
  relative paths.

  init/main.c has been part of kernel since 2.6.12-rc2. It appears unlikely to
  go away and thus is a good candidate to make assumptions about its existence
  in the build output and source trees.
  """
  main_cmd = dir / 'init' / '.main.o.cmd'
  main_c, _ = parse_cmd_file(main_cmd, parse_deps=False)
  if not main_c:
    print(f'ERROR! Failed to extract GKI kernel directory from {main_cmd}')
    sys.exit(1)

  # Remove "init/main.c" to leave us with GKI_KERNEL_DIR
  return pathlib.Path(main_c).parent.parent

def extract_built_files(dir):
  """Extract source files and their dependencies from a build directory

  Args:
      dir: Kernel build output folder (e.g. out/android-mainline/common)

  Returns:
    A tuple. The first value is the set of found files used in the build.
    The second value is the location of the source directory.
  """
  files = set()
  src_dir = find_source_dir(dir)
  src_dir_prefix = str(src_dir) + os.sep

  with multiprocessing.Pool() as p:
    for source, deps in p.map(parse_cmd_file, dir.glob('**/.*.cmd')):
      if source and source.startswith(src_dir_prefix):
        files.add(source.removeprefix(src_dir_prefix))
        files.update(dep.removeprefix(src_dir_prefix) for dep in deps if dep.startswith(src_dir_prefix))

  return BuiltFilesResult(set(f for f in files if os.path.exists(src_dir / f)),
          src_dir)

if __name__ == '__main__':
  parser = argparse.ArgumentParser(
    description='Compare the files used to compile a GKI kernel with those on a'
                ' device kernel',
    formatter_class=argparse.RawDescriptionHelpFormatter,
    epilog=textwrap.dedent('''
    Comparison of the trees is achieved by parsing the build output of the GKI kernel tree (GKI_KERNEL_OUT_DIR). The .cmd
    files list the file dependencies and give a good idea of which files are used when compiling the GKI kernel.

    Limitations:
      - Vendor kernel should have GKI kernel baseline merged into its tree for accurate diff reporting
      - Does not compare files which *would* be compiled into a vmlinux in a vendor kernel build.
        For instance, if vendor kernel has added obj-y += vendor_file.o, script would not detect such addition.
        Similarly, implicit Kconfig "select FOO" from a vendor module is not detected. The assumption
        is that such differences would be caught during other build steps or during test.
      - All .cmd from GKI_KERNEL_OUT_DIR are scanned. If extra build steps are run (e.g. menuconfig) or build is
        old, then extra files might be added to the list of GKI source files.
    '''))
  parser.add_argument('gki_out_dir', metavar='GKI_KERNEL_OUT_DIR',
                      type=pathlib.Path,
                      help='Location of the GKI kernel output folder.')
  parser.add_argument('--gki-files', type=pathlib.Path,
                      help='Location to write GKI files list to.')
  parser.add_argument('vendor_tree', metavar='VENDOR_TREE', nargs='?',
                      type=pathlib.Path,
                      help='Location of the vendor source tree. '
                           'If not provided,then just the list of GKI kernel '
                           'output files is generated.')
  parser.add_argument('--changed-files', type=pathlib.Path,
                      help='Location to write changed files list to.')

  args = parser.parse_args()

  built_files, source_dir = extract_built_files(args.gki_out_dir)
  print(f'There are {len(built_files)} source files contributing to the build '
        f'in {args.gki_out_dir}.')
  if args.gki_files:
    with open(args.gki_files, mode='w') as f:
      f.writelines(f'{file}\n' for file in sorted(built_files))

  if args.vendor_tree:
    match, mismatch, errors = filecmp.cmpfiles(source_dir, args.vendor_tree,
                                               built_files)
    diff_files = {*mismatch, *errors}
    print(f'There are {len(diff_files)} source file(s) changed in vendor tree.')
    print('\n '.join(f'  {file}' for file in diff_files))
    if args.changed_files:
      with open(args.changed_files, mode='w') as f:
        f.writelines(f'{file}\n' for file in diff_files)
    if diff_files:
      sys.exit(2)
