/*
 * @file       fingerprint_tool.cpp
 * @author     Philipp Blanke (blp4hi) <philipp.blanke@de.bosch.com>
 * @date       Tue 14 Mar 2017, 18:20
 * @copyright  Robert Bosch Car Multimedia GmbH
 * @brief      Tool to create checksums and fingerprints of file trees.
 */
#define _XOPEN_SOURCE 500  // Use X/Open 5, incorporating POSIX 1995
#include <cerrno>
#include <climits>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <vector>
#include <algorithm>

#include <dirent.h>
#include <fcntl.h>
#include <ftw.h>
#include <libgen.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <unistd.h>
#include <errno.h>

#include <openssl/evp.h>
#include <openssl/md5.h>
#include <sys/capability.h>

#include "Fingerprint.h"

#include "my_etg.h"

// maximum depth of tree walk
#define MAX_DEPTH 256
#define MAX_READ_BUFFER_SIZE 4194304
#define VERSION 2

using namespace fingerprint;

void print_help()
{
   const char* help =
   "fingerprint_tool [OPTIONS] [ROOTDIR]\n\n"
   "  ROOTDIR           The root of the file tree which should be fingerprinted.\n"
   "                    This may be omitted when checking. In that case, the root file \n"
   "                    stored in the reference file is used.\n"
   "OPTIONS\n"
   "One of the following options can be chosen. If none is given, '-f -' is assumed.\n"
   "  -f|--fingerprint FILE Fingerprint ROOTDIR and store to FILE (- means stdout, DEFAULT).\n"
   "  -c|--check FILE       Check the fingerprint in FILE.\n\n"
   "You can modify which parts of ROOTDIR are fingerprinted by\n"
   "  -x|--exclude PATH     Exclude PATH, which can be a directory or file.Multiple statements are possible.\n"
   "     --no-realpath      Don't use realpath() on the ROOTDIR before fingerprinting.\n"
   "For checking of fingerprints, the following options can be used:\n"
   "     --nostat            Don't include stat information during check (the same as --nouid --nogid --nomode).\n"
   "     --nouid             Don't include user ID during check.\n"
   "     --nogid             Don't include group ID during check.\n"
   "     --nomode            Don't include permissions during check.\n"
   "     --nocap             Don't include file capabilities during check.\n"
   "     --md5sum            The given file was made with md5sum, don't do advanced checking.\n"
   "     --ignore PATH       Ignore the PATH at the beginning of each file given in md5sum file.\n"
   "     --print             Print out the computed and reference fingerprint.\n"
   "     --verify-only       Only verify the given fingerprint file, don't do further checking.\n"
   "  -V|--verbose           Debugging output (incremental: -V -V -V gives more output).\n"
   "  -e|--errmax NUM        Output maximum NUM errors during check (-1 means all, DEFAULT).\n\n"
   "  -v|--version           Print version and exit.\n\n"
   "  -h|--help              Print usage and exit.\n\n"
   "Copyright (C) Robert Bosch Car Multimedia GmbH.\n";
   fputs(help, stderr);
}


// ----------------------------------------------------------------------------
// ------------------------------------------------------------- STATIC STORAGE
// The following static variables are used in fingerprint::computeMd5()
// Length of root directory string.
static size_t _root_length = 0;
// Storage for all filenames and associated md5sums.
static Fingerprint _fingerprint_fs;
// Prefix which is ignored on checked files
static FileName _ignored;
// openssl context for digest computation
static EVP_MD_CTX* _ctx;

/* Compute file capabilities for a file
 * Use <sys/capability.h> api to retrieve the capabilities set in a file
 * */
void computeCapabilities(const char* file_name, DigestStat* ds)
{
   cap_t capability;
   ssize_t size;
   size_t start, startpos, length;
   std::string strCapability;

   capability = cap_get_file(file_name);

   if (! capability)
   {
      return;
   }

   strCapability = cap_to_text(capability, &size);
   // parse and get the capabilities set for the file
   // capabilities always starts with "= cap_chown,cap_kill+eip", so skip 2 indexes in the string
   length = strCapability.length();
   start  = strCapability.find("=");
   if (start == std::string::npos)
   {
      ETG_TRACE_FATAL(("Abort: No capability set for file %s", file_name));
      return;
   }
   else
   {
      startpos = start + 2;
   }

   std::string cap_string= strCapability.substr(startpos, length);
   const char *fbuf = cap_string.c_str();
   if (!cap_string.empty())
   {
      Digest capDigest;
      EVP_MD_CTX* ctx = EVP_MD_CTX_create();
      EVP_DigestInit(ctx, EVP_md5());
      EVP_DigestUpdate(ctx, (unsigned char*)fbuf, cap_string.size());
      EVP_DigestFinal_ex(ctx, capDigest.x, 0);
      EVP_MD_CTX_destroy(ctx);

      ds->setCapDigest(capDigest);
   }
   else
   {
      ETG_TRACE_FATAL(("Abort. File capability is empty for file %s\n", file_name));
   }

}

// ----------------------------------------------------------------------------
// --------------------------------------------------------- DIGEST COMPUTATION
/** @brief Compute DigestStat for the given file.  Used as function for
 * nftw(), which traverses a directory tree. */
int computeDigestStat(const char* path, const struct stat* sb,
      int typeflag, struct FTW* /* ftwbuf */)
{
   if ( typeflag == FTW_NS ) { // stop if file can't be read
      ETG_TRACE_FATAL(("Abort. Can't stat file %s\n", path));
      return FTW_STOP;
   }

   FileName file_name;
   if(strlen(path) <= _root_length) {
      file_name = ".";
   }
   else {
      file_name = FileName(path).substr(_root_length);
   }

   ETG_TRACE_USR4(("Current path %30s => %s", path, file_name.c_str() ));
   if (_fingerprint_fs.isExcluded(file_name)) {
      // current path matches excluded path
      if (typeflag == FTW_D) {
         // ETG_TRACE_USR4(("Skip subdir %s", path));
         return FTW_SKIP_SUBTREE;
      }
      return FTW_CONTINUE;
   }

   if (typeflag == FTW_SL) {
      // skip links for now.
      return FTW_CONTINUE;
   }

   DigestStat& ds = _fingerprint_fs.map(file_name);

   // Store this data before dereferencing a link
   // store stat data (ownership and permissions)
   ds.setUid(sb->st_uid);
   ds.setGid(sb->st_gid);
   ds.setMode(sb->st_mode);

   // compute file capabilities for regular files
   if (S_ISREG(sb->st_mode))
   {
      computeCapabilities(path, &ds);
   }

   // compute and store digest data
   const char* fpath = path;
   const struct stat* fsb = sb;

  //char* lpath = 0;   coverity fix for 22849. We never used this variable so commenting this variable as it is giving coverity warning.
#if 0
   struct stat lsb;
   if (typeflag == FTW_SL) {
      // dereference link for digest computation
      lpath = realpath(path, 0);
      if (lpath == NULL) {
         ETG_TRACE_ERR(("Can't dereference %s", path));
         return FTW_CONTINUE;
      }

      int res = stat(lpath, &lsb);
      if (res != 0) {
         free(lpath);
         ETG_TRACE_ERR(("Can't stat %s", path));
         return FTW_CONTINUE;
      }
      fpath = lpath;
      fsb = &lsb;
      ds.setLinkToReg(S_ISREG(fsb->st_mode));
   }
#endif

   size_t fsize = static_cast<size_t>(fsb->st_size);
   if ( S_ISREG(fsb->st_mode) ) { // compute Digest only for regular files
      int fd = 0;
      char* fbuf = 0;
      if (fsize > 0) {
         fd = open(fpath, O_RDONLY);
         if (fd < 0) {
            //if (lpath) { free(lpath); }  coverity fix for 22849
            ETG_TRACE_FATAL(("Can't open file %s", fpath));
            return FTW_STOP;
         }

         fbuf = (char*)mmap(0, fsize, PROT_READ, MAP_SHARED, fd, 0);
         if (fbuf == MAP_FAILED) { //extra handling only in case of mmap failure
            ETG_TRACE_FATAL(("Can't mmap file %s \nRetry with block based calculation", fpath));
            Digest blkdigest; //local object specific for mmap failure scenario
            static EVP_MD_CTX* _blkctx; //local object specific for mmap failure scenario
            _blkctx = EVP_MD_CTX_create();
      		EVP_DigestInit_ex(_blkctx, EVP_md5(), 0);
            size_t bufferSize = MAX_READ_BUFFER_SIZE;
            size_t read_next = fsize;
            char *buffer = new  ( std::nothrow ) char [bufferSize];  //Coverity fix for 19168
            if(0 == buffer) {
               ETG_TRACE_FATAL(("Can't allocate memory for file reading"));
               close(fd);
               return FTW_STOP;
            }
            unsigned int chunk = 0;
            unsigned int readBytes = 0;
            while(read_next > 0)
            {
                  if(bufferSize > read_next){
                     bufferSize = read_next;
                  }
                  chunk = read(fd, buffer, bufferSize);
                  if(0 == chunk) {
                     ETG_TRACE_FATAL(("Could not read from given handle. Read until now %u chars", readBytes));
                     delete[] buffer;
                     close(fd);
                     return FTW_STOP;
                  }
                  //if(0 == chunk) break;
                  read_next -=  chunk;
                  readBytes += chunk;
                  EVP_DigestUpdate(_blkctx, (unsigned char*)buffer, bufferSize);
            }
            if (fsize != readBytes){
               ETG_TRACE_FATAL(("File size %lu, read bytes from file %u", fsize, readBytes));
               delete[] buffer;
               close(fd);
               return FTW_STOP;
            }
            delete[] buffer;
            EVP_DigestFinal_ex(_blkctx, blkdigest.x, 0);
            ds.setDigest(blkdigest);
            if (fsize > 0) {
               close(fd);
            }
            //if (lpath) { free(lpath); }  coverity fix for 22849
            return FTW_CONTINUE;
         }
      }

      // compute Digest
      Digest digest;
      EVP_DigestInit_ex(_ctx, EVP_md5(), 0);
      EVP_DigestUpdate(_ctx, (unsigned char*)fbuf, fsize);
      EVP_DigestFinal_ex(_ctx, digest.x, 0);

      ds.setDigest(digest);
      if (fsize > 0) {
         // cleanup
         munmap(fbuf, (size_t)fsize);
         close(fd);
      }
   }

   //if (lpath) { free(lpath); }   coverity fix for 22849
   return FTW_CONTINUE;
}

int applyRealpath (FileName& fn)
{
   char* fn_real = realpath(fn.c_str(), 0);
   if (fn_real == NULL) {
      ETG_TRACE_FATAL(("Path is invalid: %s", fn.c_str() ));
      return EXIT_FAILURE;
   }
   fn.assign(fn_real);
   free(fn_real);
   return EXIT_SUCCESS;
}

// ----------------------------------------------------------------------------
// ----------------------------------------------------------------------- MAIN
int main(int argc, const char * const argv[])
{
   bool check = false;
   bool create = false;
   bool use_realpath = true;
   bool use_uid = true;
   bool use_gid = true;
   bool use_mode = true;
   bool use_cap = true;
   bool only_md5sum = false;
   bool print_fp = false;
   bool verify_only = false;
   unsigned int errmax = UINT_MAX;
   FileName filename = "-";
   FileName root;

   // parse parameters --------------------------------------------------------
   // skip program name
   if (argc>0) { argc--; argv++; }

   int i = 0;
   while (i < argc) {
      if ((0 == strcmp(argv[i], "-h")) || (0 == strcmp(argv[i], "--help"))) {
         print_help();
         return EXIT_SUCCESS;
      }
      else if ((0 == strcmp(argv[i], "-f")) || (0 == strcmp(argv[i], "--fingerprint"))) {
         if (check) {
            ETG_TRACE_FATAL(("Either --fingerprint or --check may be given, but not both."));
            return EXIT_FAILURE;
         }

         create = true;
         ++i;
         if (i >= argc) {
            ETG_TRACE_FATAL(( "Option %s needs a parameter.", argv[i-1] ));
            return EXIT_FAILURE;
         }
         filename.assign(argv[i]);
      }
      else if ((0 == strcmp(argv[i], "-c")) || (0 == strcmp(argv[i], "--check"))) {
         if (create) {
            ETG_TRACE_FATAL(("Either --fingerprint or --check may be given, but not both."));
            return EXIT_FAILURE;
         }

         check = true;
         ++i;
         if (i >= argc) {
            ETG_TRACE_FATAL(( "Option %s needs a parameter.", argv[i-1] ));
            return EXIT_FAILURE;
         }
         filename.assign(argv[i]);
      }
      else if ((0 == strcmp(argv[i], "-x")) || (0 == strcmp(argv[i], "--exclude"))) {
         ++i;
         if (i >= argc) {
            ETG_TRACE_FATAL(( "Option %s needs a parameter.", argv[i-1] ));
            return EXIT_FAILURE;
         }
         _fingerprint_fs.addExcluded(FileName(argv[i]));
      }
      else if (0 == strcmp(argv[i], "--no-realpath")) {
         use_realpath = false;
      }
      else if (0 == strcmp(argv[i], "--nostat")) {
         use_uid = false;
         use_gid = false;
         use_mode = false;
      }
      else if (0 == strcmp(argv[i], "--nouid")) {
         use_uid = false;
      }
      else if (0 == strcmp(argv[i], "--nogid")) {
         use_gid = false;
      }
      else if (0 == strcmp(argv[i], "--nomode")) {
         use_mode = false;
      }
      else if (0 == strcmp(argv[i], "--nocap")) {
    	 use_cap = false;
      }
      else if (0 == strcmp(argv[i], "--md5sum")) {
         only_md5sum = true;
         use_uid = false;
         use_gid = false;
         use_mode = false;
      }
      else if (0 == strcmp(argv[i], "--ignore")) {
         ++i;
         if (i >= argc) {
            ETG_TRACE_FATAL(( "Option %s needs a parameter.", argv[i-1] ));
            return EXIT_FAILURE;
         }
         _ignored = argv[i];
      }
      else if (0 == strcmp(argv[i], "--print")) {
         print_fp = true;
      }
      else if (0 == strcmp(argv[i], "--verify-only")) {
         verify_only = true;
      }
      else if ((0 == strcmp(argv[i], "-e")) || (0 == strcmp(argv[i], "--errmax"))) {
         ++i;
         if (i >= argc) {
            ETG_TRACE_FATAL(( "Option %s needs a parameter.", argv[i-1] ));
            return EXIT_FAILURE;
         }
         char *endptr;
         errno = 0;
         errmax =static_cast<unsigned int> (strtol(argv[i], &endptr, 10) );
         if (errno != 0) {
            ETG_TRACE_FATAL(( "Could not parse parameter for option %s.", argv[i-1] ));
            return EXIT_FAILURE;
         }
      }
      else if ((0 == strcmp(argv[i], "-V")) || (0 == strcmp(argv[i], "--verbose"))) {
         VERBOSITY++;
      }
      else if ((0 == strcmp(argv[i], "-v")) || (0 == strcmp(argv[i], "--version"))) {
         fprintf(stdout, "%d\n", VERSION);
         return EXIT_SUCCESS;
      }
      else if (argv[i][0] == '-') {
            ETG_TRACE_FATAL(( "Unknown option %s.", argv[i] ));
            print_help();
            return EXIT_FAILURE;
      }
      else {
         if ( ! root.empty() ) {
            ETG_TRACE_FATAL(("Only one root path can be given."));
            print_help();
            return EXIT_FAILURE;
         }
         else { root.assign(argv[i]); }
      }
      ++i;
   }

   // open file ---------------------------------------------------------------
   FILE* fingerprint_f;
   // reference fingerprint from file (for check)
   Fingerprint fingerprint_ref;
   if (check) {
      ETG_TRACE_USR1(("Checking fingerprint. Reference '%s'", filename.c_str()));
      if (filename == "-") {
         fingerprint_f = stdin;
      }
      else {
         fingerprint_f = fopen(filename.c_str(), "r");
         if ( ! fingerprint_f) {
            ETG_TRACE_FATAL(("Can't open FILE for reading: %s", filename.c_str() ));
            return EXIT_FAILURE;
         }
         // copy list of excluded files to _fingerprint_fs
      }
      fingerprint_ref.useUid(use_uid);
      fingerprint_ref.useGid(use_gid);
      fingerprint_ref.useMode(use_mode);
      fingerprint_ref.useCap(use_cap);
      fingerprint_ref.onlyMd5Sum(only_md5sum);
      fingerprint_ref.setErrMax(errmax);

      fingerprint_ref.fread(fingerprint_f, _ignored);
      fclose(fingerprint_f);
      // check integrity of the reference data
      if ( ! fingerprint_ref.checkRootDigest() ) {
         ETG_TRACE_FATAL(("Root digest of imported file is wrong. Corrupted file?"));
         return EXIT_FAILURE;
      }
      if (verify_only) {
         ETG_TRACE_USR1(("Verification of fingerprint file OK."));
         return EXIT_SUCCESS;
      }

      for (ExcludedList::const_iterator it = fingerprint_ref.excludedList().begin();
            it != fingerprint_ref.excludedList().end();
            ++it)
      {
         _fingerprint_fs.addExcluded(*it);
      }
   }
   else {
      if (filename == "-" ) {
         fingerprint_f = stdout;
      }
      else {
         fingerprint_f = fopen(filename.c_str(), "w");
         if ( ! fingerprint_f) {
            ETG_TRACE_FATAL(("Can't open FILE for writing: %s", filename.c_str() ));
            return EXIT_FAILURE;
         }
      }
   }

   // change paths to realpaths -----------------------------------------------
   if (use_realpath) {
      int res = applyRealpath(root);
      if (res != EXIT_SUCCESS) {
         ETG_TRACE_FATAL(("Can't apply realpath to %s", root.c_str() ));
         return res;
      }
   }

   // check parameters --------------------------------------------------------
   if (root.empty()) {
      if (check) {
         root = fingerprint_ref.root();
         ETG_TRACE_USR1(("Setting Rootpath for filesystem walk to %s", root.c_str() ));
      }
      else {
         ETG_TRACE_FATAL(("No ROOTDIR given."));
         print_help();
         return EXIT_FAILURE;
      }
   }
   else { // normalize directory and check if it is valid
      if (*(root.rbegin()) != '/') root.append("/");

      // is root a directory?
      DIR* dir = opendir(root.c_str());
      if (dir) { closedir(dir); }
      else {
         ETG_TRACE_FATAL(("Can't open ROOTDIR %s", root.c_str() ));
         return EXIT_FAILURE;
      }
   }

   _root_length = root.length();
   _fingerprint_fs.setRoot(root);

   // Make excluded paths relative to root, i.e., 
   // shorten excluded files if they start with the root path
   for (ExcludedList::iterator it = _fingerprint_fs.excludedList().begin();
         it != _fingerprint_fs.excludedList().end();
         it++)
   {
      if (it->compare(0, _root_length, root) == 0) {
         *it = it->substr(_root_length);
      }
   }

   // debug output
   ETG_TRACE_USR3(("Excluded from fingerprinting on filesystem:" ));
   for (ExcludedList::iterator it = _fingerprint_fs.excludedList().begin();
         it != _fingerprint_fs.excludedList().end();
         it++)
   {
      ETG_TRACE_USR3(("  %s", it->c_str() ));
   }

   // walk the walk -----------------------------------------------------------
   ETG_TRACE_USR1(("Fingerprinting STARTED."));

   // FTW_ACTIONRETVAL - evaluate extra return values from computeDigestStat
   // FTW_PHYS - don't follow symlinks.
   // FTW_MOUNT - stay on same file system (do not cross mount points)
   const int flags = FTW_ACTIONRETVAL | FTW_PHYS | FTW_MOUNT;

   OpenSSL_add_all_digests();
   _ctx = EVP_MD_CTX_create();

   int retval = nftw(root.c_str(), computeDigestStat, MAX_DEPTH, flags);
   
   EVP_MD_CTX_destroy(_ctx);
   if (retval != FTW_SKIP_SUBTREE && retval != FTW_CONTINUE) {
      ETG_TRACE_FATAL(("Could not complete file tree walk."));
      return EXIT_FAILURE;
   }

   _fingerprint_fs.computeRootDigest();

   int ret = EXIT_SUCCESS;
   if (check) {

      if ( only_md5sum ) {
         if ( ! fingerprint_ref.check(_fingerprint_fs)) {
            ETG_TRACE_FATAL(("Check failed."));
            ret = EXIT_FAILURE;
         }
         else {
            ETG_TRACE_USR1(("OK"));
         }
      }
      else {
         if ( fingerprint_ref.root() != _fingerprint_fs.root() ) {
            ETG_TRACE_FATAL(("ROOTDIRs are not the same. Filesystem: %s", _fingerprint_fs.root().c_str() ));
            ETG_TRACE_FATAL(("                           Reference:  %s", fingerprint_ref.root().c_str() ));
            ret = EXIT_FAILURE;
         }
         else if ( ! fingerprint_ref.check(_fingerprint_fs)) {
            ETG_TRACE_FATAL(("Check failed."));
            ret = EXIT_FAILURE;
         }
         else {
            ETG_TRACE_USR1(("OK"));
         }
         if (print_fp) {
            char rd_str[DIGEST_STR_LEN];
            _fingerprint_fs.rootDigest().print(rd_str, DIGEST_STR_LEN);
            fprintf(stdout, "Fingerprint=%s\n", rd_str);
         }
      }
   }
   else {
      _fingerprint_fs.fprint(fingerprint_f);
      fclose(fingerprint_f);
   }

   ETG_TRACE_USR1(("Fingerprinting ENDED."));

   return ret;
}
