/*
 * linux/drivers/char/exchnd/exchnd_modules.c
 *
 * Copyright (C) 2013 Advanced Driver Information Technology GmbH
 * Written by Kai Tomerius (ktomerius@de.adit-jv.com)
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as published
 * by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
 * GNU General Public License for more details.
 *
 */

/*
 * Exception Handler Modules
 *
 * The exception handler modules collect data of the exception or trigger
 * specific action. They allow to configured what data is collected and what
 * actions are done per exception type.
 */
#define pr_fmt(fmt) "exchnd: " fmt

#include <linux/exchnd.h>
#include <linux/kernel_stat.h>
#include <linux/reboot.h>
#include <linux/sched.h>
#include <linux/pid_namespace.h>

#include <linux/module.h>
#include <linux/tracepoint.h>
#include <asm/syscall.h>
#include <trace/syscall.h>

#include <linux/slab.h>
#include <linux/uaccess.h>

#include "exchnd_internal.h"

/*
 * exception handler module BACKTRACE
 *
 * The module collects the backtrace of the current process. If the stacktrace
 * is completely in user space, the generation is done in user space.
 * Shall not be executed in user space in case of OOM.
 */

static int exchnd_backtrace_execute(struct exception_info *info,
		enum exchnd_modules module)
{
	/* Common stuff. */
	struct exchnd_message_header header;
	unsigned char *data = NULL;
	struct task_struct *task = info->task;
	struct pt_regs *regs = info->regs;

	if (!task && !info->regs) {
		pr_warn("%s skipped as there is no way to retrieve frames.\n",
				__func__);
		goto exit;
	}

	header.type = module;
	header.trigger = info->trigger;
	header.seq_num = 0;
	header.sig = info->sig;

	if (task)
		header.pid = task->pid;
	else
		header.pid = 0;

	/* User space */
	if (task && task->mm) {
		header.length                = 0;
		header.flags.collected       = 0;
		header.flags.internal        = 0;
		header.flags.addition_needed = 1;
	} else {
		/* Kernel */
		/* TODO: Change GFP based on exception ?*/
		data = kzalloc(EXCHND_MAX_TRACE_LEN, GFP_ATOMIC);
		if (!data)
			return -ENOMEM;

		if (!task) {
			/* Retrieve task from regs */
			int sp = kernel_stack_pointer(regs);
			struct thread_info
				*thread = (struct thread_info *)
				(sp & ~(THREAD_SIZE - 1));
			task = thread->task;
			header.pid = task->pid;
		}

		header.flags.collected       = 1;
		header.flags.addition_needed = 0;

		header.length = exchnd_kbacktrace(regs, task, data);
	}

	info->write_func(&header, data);
	kfree(data);

exit:
	return 0;
}

/*
 * exception handler module BACKTRACE_ALL_THREADS
 *
 * The module collects the backtrace of all threads (including itself) of the
 * current process. If the trace is completely in user space, the
 * generation is done in user space.
 * Shall not be executed in user space in case of OOM.
 */

static int exchnd_backtrace_all_threads_execute(struct exception_info *info,
		enum exchnd_modules module)
{
	struct exchnd_message_header header;

	if (!info->task) {
		pr_warn("%s skipped as task is a NULL pointer.\n", __func__);
		goto exit;
	}

	/* User space */
	if (info->task->mm) {
		header.type = module;
		header.trigger = info->trigger;
		header.pid = info->task->pid;
		header.seq_num               = 0;
		header.length                = 0;
		header.flags.collected       = 0;
		header.flags.internal        = 0;
		header.flags.addition_needed = 1;
		info->write_func(&header, NULL);
	}

exit:
	return 0;
}

/*
 * exception handler module CGROUPS
 *
 * The module collects the cgroup information of the current process. If the
 * process is completely in user space, the generation is done in user space.
 */
static int exchnd_cgroups_execute(struct exception_info *info,
		enum exchnd_modules module)
{
	struct exchnd_message_header header;

	header.length = 0;
	header.type = module;
	header.trigger = info->trigger;
	header.pid = 0;
	header.seq_num               = 0;
	header.flags.collected       = 0;
	header.flags.internal        = 0;
	header.flags.addition_needed = 1;

	info->write_func(&header, NULL);

	return 0;
}

/*
 * exception handler module EHM_CPU_USAGE
 *
 * The module collects the workload of the CPUs.
 */

static int exchnd_cpu_usage_execute(struct exception_info *info,
		enum exchnd_modules module)
{
	struct exchnd_message_header header;

	header.length = 0;
	header.type = module;
	header.trigger = info->trigger;
	header.pid = 0;
	header.seq_num               = 0;
	header.flags.collected       = 0;
	header.flags.internal        = 0;
	header.flags.addition_needed = 1;

	info->write_func(&header, NULL);

	return 0;
}

#define EHM_FAULT_ADDRESS_HEADER "====== fault address:"
/*
 * exception handler module EHM_FAULT_ADDRESS
 *
 * The module collects the address of the code that crashed.
 */

static int exchnd_fault_address_execute(struct exception_info *info,
		enum exchnd_modules module)
{
	struct exchnd_message_header header;
	struct task_struct *task = info->task;
	unsigned char *data = NULL;
	unsigned long address = exchnd_get_fault(task);

	if (!task && !info->regs) {
		pr_warn("%s skipped as there is no way to retrieve task.\n",
				__func__);
		goto exit;
	}

	if (!task) {
		/* Retrieve task*/
		int sp = kernel_stack_pointer(info->regs);
		struct thread_info *thread = (struct thread_info *)
			(sp & ~(THREAD_SIZE - 1));
		task = thread->task;
	}

	header.type = module;
	header.trigger = info->trigger;
	header.pid = task->pid;
	header.seq_num               = 0;
	header.flags.collected       = 1;
	header.flags.internal        = 0;
	header.flags.addition_needed = 0;

	/* TODO: Change GFP based on exception ?*/
	data = kzalloc(EXCHND_MAX_ENTRY_LEN, GFP_ATOMIC);
	if (!data)
		return -ENOMEM;

	header.length = snprintf(data,
			EXCHND_MAX_ENTRY_LEN,
			EHM_FAULT_ADDRESS_HEADER " %p\n",
			(void *) address);

	info->write_func(&header, data);
	kfree(data);

exit:
	return 0;
}

/*
 * exception handler module EHM_FS_STATE
 *
 * The module collects the state of the file systems.
 */

static int exchnd_fs_state_execute(struct exception_info *info,
		enum exchnd_modules module)
{
	struct exchnd_message_header header;

	/* User space */
	header.length = 0;
	header.type = module;
	header.trigger = info->trigger;
	header.pid = 0;
	header.seq_num               = 0;
	header.flags.collected       = 0;
	header.flags.internal        = 0;
	header.flags.addition_needed = 1;

	info->write_func(&header, NULL);

	return 0;
}

/*
 * exception handler module EHM_PROCESSOR_REGISTERS
 *
 * The module collects the contents of the processor registers at the time
 * of the crash.
 */
static int exchnd_processor_registers_execute(struct exception_info *info,
		enum exchnd_modules module)
{
	struct exchnd_message_header header;
	unsigned char *data = NULL;
	struct pt_regs *regs = NULL;

	if (!info->task && !info->regs) {
		pr_warn("%s skipped as task is a NULL pointer.\n", __func__);
		goto exit;
	}

	/* TODO: Change GFP based on exception ?*/
	data = kzalloc(EXCHND_MAX_TRACE_LEN, GFP_ATOMIC);
	if (!data)
		return -ENOMEM;

	/* Init header. */
	header.type = module;
	header.trigger = info->trigger;
	header.seq_num               = 0;
	header.flags.internal        = 0;
	header.flags.collected       = 1;
	header.flags.addition_needed = 0;

	if (!info->task) {
		regs = info->regs;
		header.pid = 0;
	} else {
		/* Should work even if we are in current task. */
		regs = task_pt_regs(info->task);
		header.pid = info->task->pid;
	}

	header.length = exchnd_dump_regs(data, regs, info->task);

	info->write_func(&header, data);
	kfree(data);

exit:
	return 0;
}

/*
 * exception handler module EHM_SYSTEM_INFO
 *
 * The module collects information about the state of the system.
 */
static int exchnd_system_info_execute(struct exception_info *info,
		enum exchnd_modules module)
{
	return 0;
}

/*
 * exception handler module EHM_MEMORY_MAP
 *
 * The module collects the memory map of the process that crashed.
 */
static int exchnd_memory_map_execute(struct exception_info *info,
		enum exchnd_modules module)
{
	if (!info->task) {
		pr_warn("%s skipped as task is a NULL pointer.\n", __func__);
		goto exit;
	}

	/* User space */
	if (info->task->mm) {
		struct exchnd_message_header header;
		header.length = 0;
		header.type = module;
		header.trigger = info->trigger;
		header.pid = info->task->pid;
		header.seq_num               = 0;
		header.flags.collected       = 0;
		header.flags.internal        = 0;
		header.flags.addition_needed = 1;

		info->write_func(&header, NULL);
	}
	/* Kernel */
	/* TODO: Kernel thread handling */

exit:
	return 0;
}

#define EHM_MEMORY_USAGE_HEADER "====== memory usage:\n"
/*
 * exception handler module EHM_MEMORY_USAGE
 *
 * The module collects the memory usage of the complete system at the time of
 * the crash.
 */
static int exchnd_memory_usage_execute(struct exception_info *info,
		enum exchnd_modules module)
{
	struct exchnd_message_header header;
	unsigned char *data = NULL;
	struct sysinfo val;

	/* Init header. */
	header.type = module;
	header.trigger = info->trigger;
	header.pid                   = 0;
	header.seq_num               = 0;
	header.flags.collected       = 1;
	header.flags.internal        = 0;
	header.flags.addition_needed = 0;

	/* Getting meminfo. */
	si_meminfo(&val);

	/* Formatting */
#define K(x) ((x) << (PAGE_SHIFT - 10)) /* Display in kilobytes. */
	/* TODO: Change GFP based on exception ?*/
	data = kzalloc(EXCHND_MAX_TRACE_LEN, GFP_ATOMIC);
	if (!data)
		return -ENOMEM;

	header.length = snprintf(data,
			EXCHND_MAX_TRACE_LEN,
			EHM_MEMORY_USAGE_HEADER
			"MemTotal:       %8lu kB\n"
			"MemFree:        %8lu kB\n"
			"Buffers:        %8lu kB\n"
			"HighTotal:      %8lu kB\n"
			"HighFree:       %8lu kB\n",
			K(val.totalram),
			K(val.freeram),
			K(val.bufferram),
			K(val.totalhigh),
			K(val.freehigh));

	info->write_func(&header, data);
	kfree(data);

	return 0;
}

#define EHM_PROCESS_LIST_HEADER "===== process list:\n"
/*
 * exception handler module EHM_PROCESS_LIST
 *
 * The module collects a list of all existing processes at the time of the
 * crash.
 */
static int exchnd_process_list_execute(struct exception_info *info,
		enum exchnd_modules module)
{
	struct exchnd_message_header header;
	unsigned char *data = NULL;

	if (!info->task) {
		pr_warn("%s skipped as task is a NULL pointer.\n", __func__);
		goto exit;
	}

	header.type = module;
	header.trigger = info->trigger;
	header.pid = info->task->pid;
	header.seq_num = 0;
	header.flags.internal = 0;

	/* User space */
	if (info->task->mm) {
		header.length                = 0;
		header.flags.collected       = 0;
		header.flags.addition_needed = 1;
	} else {
		/* Kernel */
		/* TODO: Kernel thread handling */
		unsigned int len = 0;
		struct task_struct *tsk;

		/* TODO: Change GFP based on exception ?*/
		/* 5*MAX_TRACE_LEN to ensure enough space. */
		data = kzalloc(5*EXCHND_MAX_TRACE_LEN, GFP_ATOMIC);
		if (!data)
			return -ENOMEM;

		header.flags.collected       = 1;
		header.flags.addition_needed = 0;

		len = snprintf(data,
				EXCHND_MAX_ENTRY_LEN,
				EHM_PROCESS_LIST_HEADER);

		rcu_read_lock();
		for_each_process(tsk) {
			/* TODO: Do we really need this ? */
			pid_t tid = task_tgid_vnr(tsk);
			int spaces = 5 - snprintf(NULL, 0, "%d", tid);

			len += snprintf(data + len,
					5*EXCHND_MAX_TRACE_LEN - len,
					"%d %*c %s\n",
					tid,
					spaces,
					' ',
					tsk->comm);

			if (len >= 5*EXCHND_MAX_TRACE_LEN) {
				data[--len] = '\0';
				break;
			}
		};
		rcu_read_unlock();
		data[len] = '\n';
		header.length = len;
	}

	info->write_func(&header, data);
	kfree(data);

exit:
	return 0;
}


#define EHM_STACK_DUMP_HEADER "===== stack dump:\n"
/*
 * exception handler module EHM_STACK_DUMP
 *
 * The module collects the contents of the active stack at the time of the
 * crash.
 * Shall not be executed in user space in case of OOM.
 */
static int exchnd_stack_dump_execute(struct exception_info *info,
		enum exchnd_modules module)
{
	struct exchnd_message_header header;
	struct task_struct *task = info->task;
	struct pt_regs *regs = info->regs;
	struct thread_info *thread = NULL;
	unsigned char *data = NULL;
	unsigned long *sp = NULL;
	unsigned long *sp_aligned = NULL;
	unsigned long *start_stack = NULL;
	int i = 0, len = 0;

	if (!task && !regs) {
		pr_warn("%s skipped as task is a NULL pointer.\n", __func__);
		goto exit;
	}

	/* TODO: Change GFP based on exception ?*/
	data = kzalloc(EXCHND_MAX_TRACE_LEN, GFP_ATOMIC);
	if (!data)
		return -ENOMEM;

	/* Init header. */
	header.type = module;
	header.trigger = info->trigger;
	header.sig = info->sig;
	header.seq_num               = 0;
	header.flags.internal        = 0;
	header.flags.collected       = 1;
	header.flags.addition_needed = 0;

	if (!task) {
		regs = info->regs;
		header.pid = 0;
	} else {
		/* Should work even if we are in current task. */
		regs = task_pt_regs(info->task);
		header.pid = info->task->pid;
	}

	if (task && task->mm) {
		sp = (unsigned long *)KSTK_ESP(task);
		start_stack = (unsigned long *)task->mm->start_stack;
		len += snprintf(data,
				EXCHND_MAX_TRACE_LEN - len,
				"%lX",
				(unsigned long) start_stack);
		header.flags.internal        = 1;
		header.flags.addition_needed = 1;
		goto dump;
	}

	if (task) {
		thread = task_thread_info(task);
		sp = exchnd_get_sp(task);
	} else {
		/* Retrieve SP and thread*/
		int sp_temp = kernel_stack_pointer(regs);
		sp = (unsigned long *)sp_temp;
		thread = (struct thread_info *)(sp_temp & ~(THREAD_SIZE - 1));
	}

	/* Stack grows downward from top of thread. */
	start_stack = (unsigned long *)((unsigned long) thread + THREAD_SIZE);
	sp_aligned = (unsigned long *)((unsigned long) sp & ~0xf);

	len += snprintf(data,
			EXCHND_MAX_TRACE_LEN,
			EHM_STACK_DUMP_HEADER "From <%p> to <%p>:",
			sp,
			start_stack);

	for (i = 0; (sp_aligned < start_stack) &&
			((unsigned long)sp != 0); i++) {
		int j = 0;
		int DUMP_PER_LINE = 8;

		len += snprintf(data + len,
				EXCHND_MAX_TRACE_LEN - len,
				"\n<%04lx>",
				(unsigned long)sp_aligned & 0xffff);

		while (sp_aligned < sp) {
			len += snprintf(data + len,
					EXCHND_MAX_TRACE_LEN - len,
					"         ");
			sp_aligned++;
			j++;
		}

		for (; j < DUMP_PER_LINE && sp_aligned < start_stack; j++) {
			len += snprintf(data + len,
					EXCHND_MAX_TRACE_LEN - len,
					" %08lx",
					*sp_aligned);
			sp_aligned++;

			if (len + 9 > EXCHND_MAX_TRACE_LEN) {
				len = EXCHND_MAX_TRACE_LEN - 1;
				data[len] = '\0';
				break;
			}
		}

		if (len + 10 > EXCHND_MAX_TRACE_LEN) {
			len = EXCHND_MAX_TRACE_LEN - 1;
			data[len] = '\0';
			break;
		}
	}
	data[len] = '\0';

dump:
	header.length = len;
	info->write_func(&header, data);
	kfree(data);

exit:
	return 0;
}

/*
 * exception handler module EHM_THREAD_LIST
 *
 * The module collects the list of threads belonging to the process that
 * crashed.
 */
static int exchnd_thread_list_execute(struct exception_info *info,
		enum exchnd_modules module)
{
	if (!info->task) {
		pr_warn("%s skipped as task is a NULL pointer.\n", __func__);
		goto exit;
	}

	/* User space */
	if (info->task->mm) {
		struct exchnd_message_header header;
		header.length = 0;
		header.type = module;
		header.trigger = info->trigger;
		header.pid = info->task->pid;
		header.seq_num               = 0;
		header.flags.collected       = 0;
		header.flags.internal        = 0;
		header.flags.addition_needed = 1;

		info->write_func(&header, NULL);
	}
	/* Kernel */
	/* TODO: Kernel thread handling */

exit:
	return 0;
}

/**
 * exchnd_sys_restart_execute - module that restarts the system
 * @info: information about the exception to handle
 *
 * The function represent the exception handler module EHM_SYS_RESTART. It
 * is an action module and restarts the system.
 *
 * Return: 0 for success, error code otherwise
 */
static int exchnd_sys_restart_execute(struct exception_info *info,
		enum exchnd_modules module)
{
	char buf[] = "Restarting the system.\n";
	char cmd[] = "Exchnd SYS_RESTART";
	struct exchnd_message_header header;
	header.length                = strlen(buf);
	header.type                  = EHM_SYS_RESTART;
	header.trigger               = info->trigger;
	if (info->task != NULL)
		header.pid           = info->task->pid;
	else
		header.pid           = 0;
	header.seq_num               = 0;
	header.flags.collected       = 1;
	header.flags.internal        = 0;
	header.flags.addition_needed = 0;

	/* Write directly in errmem as we will most likely not return
	 * in user land.
	 */
	em_write(&header, buf);

	if (!in_irq() && !in_softirq()) {
		/* We don't want this restart to be handled. */
		exchnd_trigger_list[ET_RESTART]
			.deinit(&exchnd_trigger_list[ET_RESTART]);
		kernel_restart(cmd);
	}
	emergency_restart();

	return 0;
}

#define EHM_MEMORY_DUMP_HEADER "====== memory dump around address"
/*
 * exception handler module EHM_MEMORY_DUMP
 *
 * The module collects the memory around the address which access lead to the
 * crash.
 * Shall not be executed in user space in case of OOM.
 */
static int exchnd_memory_dump_execute(struct exception_info *info,
		enum exchnd_modules module)
{
	struct exchnd_message_header header;
	unsigned char *data = NULL;
	unsigned int len = 0;
	unsigned long faulty_add = 0;
	int i = 0;

	/* Line to dump should be even */
	int LINES_TO_DUMP = 6;
	int DUMPS_PER_LINE = 4;
	int ITER_START = ((LINES_TO_DUMP-2)*DUMPS_PER_LINE)/2;
	int ITER_END = (LINES_TO_DUMP*DUMPS_PER_LINE)/2;
	int MAX_DUMP_LEN = LINES_TO_DUMP*EXCHND_MAX_ENTRY_LEN;

	if (!info->task) {
		pr_warn("%s skipped as task is a NULL pointer.\n", __func__);
		goto exit;
	}

	faulty_add = KSTK_EIP(info->task);

	if (unlikely(!faulty_add))
		return 0;

	/* TODO: Change GFP based on exception ?*/
	data = kzalloc(MAX_DUMP_LEN, GFP_ATOMIC);
	if (!data)
		return -ENOMEM;

	header.type = module;
	header.trigger = info->trigger;
	header.pid = info->task->pid;
	header.seq_num               = 0;
	header.flags.collected       = 1;
	header.flags.internal        = 0;
	header.flags.addition_needed = 0;

	if (info->task->mm) {
		len = snprintf(data,
				MAX_DUMP_LEN,
				"%lX",
				faulty_add);
		header.flags.internal        = 1;
		header.flags.addition_needed = 1;
	} else {
		len = snprintf(data,
				MAX_DUMP_LEN,
				EHM_MEMORY_DUMP_HEADER " %p:\n",
				(void *) faulty_add);

		/* Formatting output */
		for (i = -ITER_START; i < ITER_END;) {
			int j = 0;
			/* Getting address from unsigned long */
			unsigned long *add = (unsigned long *)
				(faulty_add + i * sizeof(long));

			len += snprintf(data + len,
					MAX_DUMP_LEN - len,
					"<%p>: ",
					add);

			for (j = 0; j < DUMPS_PER_LINE; j++) {
				len += snprintf(data+len,
						MAX_DUMP_LEN - len,
						"%08lx",
						*add);
				i++;
				add = (unsigned long *)
					(faulty_add + i * sizeof(long));
			}

			len += snprintf(data + len,
					MAX_DUMP_LEN - len,
					"\n");

			if (len >= MAX_DUMP_LEN) {
				len = MAX_DUMP_LEN - 1;
				break;
			}
		}
		data[len] = '\n';
	}

	header.length = len;
	info->write_func(&header, data);
	kfree(data);

exit:
	return 0;
}

/* Library path */
static char exchnd_lib_name[5][EXH_PATH_MAX] = {
	"exchnd_lib.so",
	"exchnd_lib.so",
	"exchnd_lib.so",
	"exchnd_lib.so",
	"exchnd_lib.so"
};

/*
 * exchnd_set_lib_path - Library path setter
 *
 * Allows user to define a specific library name and path for an application
 * specific module.
 *
 * return an error if module value is out of bounds.
 */
int exchnd_set_lib_path(struct exchnd_conf_app_spec *conf)
{
	if ((conf->module < EHM_APPLICATION_SPECIFIC1) ||
			(conf->module > EHM_APPLICATION_SPECIFIC5))
		return -EINVAL;

	strncpy(exchnd_lib_name[conf->module - EHM_APPLICATION_SPECIFIC1],
			conf->lib_path,
			EXH_PATH_MAX - 1);

	return 0;
}

/*
 * exception handler module EHM_APPLICATION_SPECIFICX
 *
 * This function is the common part to any application specific module.
 */
static int exchnd_application_specific_execute(struct exception_info *info,
		enum exchnd_modules module)
{
	struct exchnd_message_header header;
	char *lib_name = NULL;
	int ret = -EINVAL;

	if ((module < EHM_APPLICATION_SPECIFIC1) ||
			(module > EHM_APPLICATION_SPECIFIC5)) {
		pr_err("Invalid value for module: %d.\n", module);
		goto exit;
	}

	ret = 0;

	lib_name = exchnd_lib_name[module - EHM_APPLICATION_SPECIFIC1];

	header.type = module;
	header.trigger = info->trigger;
	header.pid = info->task->pid;
	header.seq_num               = 0;
	header.flags.collected       = 1;
	header.flags.internal        = 1;
	header.flags.addition_needed = 1;

	header.length = strlen(lib_name) + 1;
	info->write_func(&header, (unsigned char *)lib_name);

exit:
	return ret;
}

#define EH_DEFAULT_HIST_COUNT 4096
#define EH_READ_HIST_COUNT 64
static int exchnd_read_hist_count = EH_READ_HIST_COUNT;
static unsigned int exchnd_trace_pid;

void exchnd_set_trace_pid(int pid)
{
	exchnd_trace_pid = pid;
}

void exchnd_set_hist_size(int size)
{
	if (size > EH_DEFAULT_HIST_COUNT/4)
		size = EH_DEFAULT_HIST_COUNT/4;
	if (size < 0)
		size = EH_READ_HIST_COUNT;

	pr_info("Modifying history size to %d.\n", size);
	exchnd_read_hist_count = size;
}

#define EHM_HIST_SYSCALL_HEADER "====== syscall history\n"
/*
 * exception handler module EHM_HIST_SYSCALLS
 *
 * Provide the list of last hist_count system calls that were made.
 *
 */
static struct exchnd_hist_syscall {
	pid_t		pid;			/* process ID */
	int		nr;			/* Sys call NR */
	unsigned long	args[6];		/* Sys call arguments */
	u64		time;			/* Jiffies */
	char		cpu;			/* CPU id */
	char		comm[TASK_COMM_LEN];	/* task comm */
} *syscall_rb, *temp_syscall;

static DEFINE_SPINLOCK(syscall_rb_lock);
static unsigned int syscall_index;
static char syscall_names[NR_syscalls][KSYM_SYMBOL_LEN] = { { '\0' } };
static int syscall_reading;
static int syscall_pad;

unsigned int get_syscall_index(void)
{
	return syscall_index;
}

static void exchnd_sys_entry(void *data, struct pt_regs *regs, long id)
{
	int tmp_idx;

	if (id < 0)
		return;

	if (id >= NR_syscalls)
		return;

	/* Bad luck for data loss, but we can live with that.
	 * We don't want to lock during syscall. System is already slowed down
	 * enough by executing this code.
	 */
	if (syscall_reading)
		return;

	if (exchnd_trace_pid &&
			(exchnd_trace_pid != current->group_leader->pid))
		return;

	tmp_idx = syscall_index++;
	tmp_idx &= (EH_DEFAULT_HIST_COUNT-1);

	/* Getting basic information */
	syscall_rb[tmp_idx].pid = current->pid;
	syscall_rb[tmp_idx].nr = id;
	preempt_disable();
	syscall_rb[tmp_idx].cpu = smp_processor_id();
	preempt_enable();
	syscall_rb[tmp_idx].time = cpu_clock(syscall_rb[tmp_idx].cpu);
	memcpy(syscall_rb[tmp_idx].comm, current->comm, TASK_COMM_LEN);
	/* Getting args */
	syscall_get_arguments(current, regs, 0, 6, syscall_rb[tmp_idx].args);
}

static int exchnd_hist_syscall_execute(struct exception_info *info,
		enum exchnd_modules module)
{
	struct exchnd_message_header header;
	unsigned char *data = NULL;

	unsigned int i = 0;
	unsigned long flags;
	unsigned int start = 0;
	unsigned int length = 0;
	int size = sizeof(struct exchnd_hist_syscall)*exchnd_read_hist_count;
	int cpylen = 0;
	int ret = 0;

	if (!info->task) {
		pr_warn("%s skipped as task is a NULL pointer.\n", __func__);
		ret = 0;
		goto exit;
	}

	if (in_interrupt())
		data = kzalloc(2*EXCHND_MAX_TRACE_LEN, GFP_ATOMIC);
	else
		data = kzalloc(2*EXCHND_MAX_TRACE_LEN, GFP_KERNEL);

	if (!data) {
		ret = -ENOMEM;
		goto exit;
	}

	/* Initialize the temp buffer to be used for reading */
	if (in_interrupt())
		temp_syscall = kzalloc(size, GFP_ATOMIC);
	else
		temp_syscall = kzalloc(size, GFP_KERNEL);

	if (!temp_syscall) {
		pr_err("Unable to initialize history ring buffer.\n");
		ret = -ENOMEM;
		goto free_data;
	}

	/* Init header. Data has to be directly sent to outputs. */
	header.type = module;
	header.trigger = info->trigger;
	header.seq_num               = 0;
	header.flags.internal        = 0;
	header.flags.collected       = 1;
	header.flags.addition_needed = 0;
	header.pid                   = 0;

	spin_lock_irqsave(&syscall_rb_lock, flags);
	if (!syscall_rb) {
		/* Nothing to read anymore */
		spin_unlock_irqrestore(&syscall_rb_lock, flags);
		goto free_temp_syscall;
	}

	size = sizeof(struct exchnd_hist_syscall);

	i = info->syscall_index;

	start = (i - exchnd_read_hist_count) & (EH_DEFAULT_HIST_COUNT - 1);
	if ((start + 1) == EH_DEFAULT_HIST_COUNT)
		start = 0;

	syscall_reading++;
	/* We are somewhere in the middle of the history */
	if ((EH_DEFAULT_HIST_COUNT - start) >= exchnd_read_hist_count)
		memcpy(temp_syscall, &syscall_rb[start],
				size*exchnd_read_hist_count);
	else {
		cpylen = size*(EH_DEFAULT_HIST_COUNT - start);
		memcpy(temp_syscall, syscall_rb + start, cpylen);

		cpylen = size*(exchnd_read_hist_count) - cpylen;
		memcpy(temp_syscall + (EH_DEFAULT_HIST_COUNT - start),
				syscall_rb, cpylen);
	}
	syscall_reading--;
	spin_unlock_irqrestore(&syscall_rb_lock, flags);

	length += snprintf(data + length,
			2*EXCHND_MAX_TRACE_LEN - length,
			EHM_HIST_SYSCALL_HEADER);

	for (i = 0; i < exchnd_read_hist_count; i++) {
		/* Skip unused elements */
		if (temp_syscall[i].time == 0)
			continue;

		/* Send the message to user space once we have one "block" */
		if (length >= EXCHND_MAX_TRACE_LEN) {
			header.length = length;
			info->write_func(&header, data);
			length = 0;
		}

		length += snprintf(data + length,
				2*EXCHND_MAX_TRACE_LEN - length,
				"%20llu: %-*s(%3d) from %-*s(%5d) on CPU %d. Args 0x%08lx 0x%08lx 0x%08lx 0x%08lx 0x%08lx 0x%08lx\n",
				temp_syscall[i].time,
				syscall_pad,
				syscall_names[temp_syscall[i].nr],
				temp_syscall[i].nr,
				TASK_COMM_LEN,
				temp_syscall[i].comm,
				temp_syscall[i].pid,
				temp_syscall[i].cpu,
				temp_syscall[i].args[0],
				temp_syscall[i].args[1],
				temp_syscall[i].args[2],
				temp_syscall[i].args[3],
				temp_syscall[i].args[4],
				temp_syscall[i].args[5]);
	}

	header.length = length;

	info->write_func(&header, data);

free_temp_syscall:
	kfree(temp_syscall);
	temp_syscall = NULL;
free_data:
	kfree(data);
exit:
	return ret;
}

static inline void exchnd_set_traceflags(struct task_struct *t)
{
	if (exchnd_trace_pid && (exchnd_trace_pid != t->group_leader->pid) &&
			test_tsk_thread_flag(t, TIF_SYSCALL_TRACEPOINT))
		clear_tsk_thread_flag(t, TIF_SYSCALL_TRACEPOINT);

	if ((!exchnd_trace_pid || (exchnd_trace_pid == t->group_leader->pid)) &&
			!test_tsk_thread_flag(t, TIF_SYSCALL_TRACEPOINT))
		set_tsk_thread_flag(t, TIF_SYSCALL_TRACEPOINT);
}

static void enable_trace(void *data,
		struct task_struct *prev,
		struct task_struct *next)
{
	if (prev->mm)
		exchnd_set_traceflags(prev);

	if (next->mm)
		exchnd_set_traceflags(next);
}

static void *exchnd_hist_syscall_init(struct exchnd_module *mod)
{
	int res = 0, i = 0;
	unsigned long flags;
	unsigned long (*sys_symbol)(int) = NULL;
	int size = sizeof(struct exchnd_hist_syscall)*EH_DEFAULT_HIST_COUNT;

	if (exchnd_get_debug() & EXCHND_DEBUG_MODULE)
		pr_info("Initializing %s.\n",
				exchnd_mod_names[EHM_HIST_SYSCALLS]);

	if (mod->opaque)
		goto exit;

	sys_symbol = (void *)
		kallsyms_lookup_name("arch_syscall_addr");
	if (!sys_symbol) {
		pr_err("Unable to find arch_syscall_addr.\n");
		goto exit;
	}

	/* Initialize the ring buffer */
	syscall_rb = kzalloc(size, GFP_KERNEL);
	if (!syscall_rb) {
		pr_err("Unable to initialize history ring buffer.\n");
		goto exit;
	}

	syscall_index = 0;
	res = tracepoint_probe_register("sys_enter", &exchnd_sys_entry,
			&syscall_rb_lock);
	if (res) {
		pr_err("sys_enter probe registration failed (%d)\n", res);
		goto free_syscall_rb;
	}

	res = tracepoint_probe_register("sched_switch", &enable_trace, NULL);
	if (res) {
		pr_err("sched_switch probe registration failed (%d)\n", res);
		goto unregister_sys_enter;
	}

	syscall_pad = 0;

	for (i = 0; i < NR_syscalls; i++) {
			sprint_symbol_no_offset(syscall_names[i],
					(*sys_symbol)(i));
			if (strlen(syscall_names[i]) > syscall_pad)
				syscall_pad = strlen(syscall_names[i]);
	}

	spin_lock_irqsave(&syscall_rb_lock, flags);
	syscall_reading = 0;

	mod->opaque = &exchnd_sys_entry;
	mod->execute = exchnd_hist_syscall_execute;
	spin_unlock_irqrestore(&syscall_rb_lock, flags);

	return mod->opaque;

unregister_sys_enter:
	tracepoint_probe_unregister("sys_enter",
			&exchnd_sys_entry,
			&syscall_rb_lock);
free_syscall_rb:
	kfree(syscall_rb);
	syscall_rb = NULL;
exit:
	return NULL;
}

static void exchnd_hist_syscall_deinit(struct exchnd_module *mod)
{
	unsigned long flags;
	struct exchnd_hist_syscall *to_free = NULL;

	/* Safely destroy the ring buffer */
	spin_lock_irqsave(&syscall_rb_lock, flags);

	if (!mod->opaque) {
		spin_unlock_irqrestore(&syscall_rb_lock, flags);
		return;
	}

	/* First prevent execution */
	mod->execute = NULL;
	/* Prevent concurrent de-init */
	mod->opaque = NULL;
	/* Ignore ongoing syscalls */
	syscall_reading++;

	spin_unlock_irqrestore(&syscall_rb_lock, flags);

	if (exchnd_get_debug() & EXCHND_DEBUG_MODULE)
		pr_info("De-initializing %s.\n",
				exchnd_mod_names[EHM_HIST_SYSCALLS]);

	/* Unregister the probe */
	tracepoint_probe_unregister("sys_enter",
			&exchnd_sys_entry,
			&syscall_rb_lock);

	tracepoint_probe_unregister("sched_switch",
			&enable_trace,
			NULL);

	spin_lock_irqsave(&syscall_rb_lock, flags);
	/* Safely destroy the ring buffer */
	to_free = syscall_rb;
	syscall_rb = NULL;
	spin_unlock_irqrestore(&syscall_rb_lock, flags);

	kfree(to_free);
}

#define EHM_HIST_TASKSWITCHES_HEADER "====== task switch history:\n"
/*
 * exception handler module EHM_HIST_TASKSWITCHES
 *
 * Provide the list of last EH_DEFAULT_HIST_COUNT task switches that were made.
 *
 */
static struct exchnd_hist_tswitch {
	pid_t		ppid;			/* prev process ID */
	pid_t		npid;			/* next process ID */
	u64		time;			/* Jiffies */
	char		cpu;			/* CPU id */
	char		pcomm[TASK_COMM_LEN];	/* prev task comm */
	char		ncomm[TASK_COMM_LEN];	/* next task comm */
} *tswitch_rb;

static DEFINE_SPINLOCK(tswitch_rb_lock);
static unsigned int tswitch_index;
static int taskswitch_reading;

unsigned int get_tswitch_index(void)
{
	return tswitch_index;
}

static int exchnd_hist_tswitch_execute(struct exception_info *info,
		enum exchnd_modules module)
{
	struct exchnd_message_header header;
	unsigned char *data = NULL;
	int size = 0;
	int cpylen = 0;
	int ret = 0;

	unsigned int i = 0;
	unsigned long flags;
	unsigned int start;
	unsigned int length = 0;

	struct exchnd_hist_tswitch *temp_tswitch = NULL;
	int temp_size =
		sizeof(struct exchnd_hist_tswitch)*exchnd_read_hist_count;

	if (!info->task) {
		pr_warn("%s skipped as task is a NULL pointer.\n", __func__);
		ret = 0;
		goto exit;
	}

	if (in_interrupt())
		data = kzalloc(2*EXCHND_MAX_TRACE_LEN, GFP_ATOMIC);
	else
		data = kzalloc(2*EXCHND_MAX_TRACE_LEN, GFP_KERNEL);

	if (!data) {
		ret = -ENOMEM;
		goto exit;
	}

	if (in_interrupt())
		temp_tswitch = kzalloc(temp_size, GFP_ATOMIC);
	else
		temp_tswitch = kzalloc(temp_size, GFP_KERNEL);

	if (!temp_tswitch) {
		pr_err("Unable to initialize temporary history buffer.\n");
		ret = -ENOMEM;
		goto free_data;
	}

	/* Init header. Data has to be directly sent to outputs. */
	header.type = module;
	header.trigger = info->trigger;
	header.seq_num               = 0;
	header.flags.internal        = 0;
	header.flags.collected       = 1;
	header.flags.addition_needed = 0;
	header.pid                   = 0;

	spin_lock_irqsave(&tswitch_rb_lock, flags);
	if (!tswitch_rb) {
		/* Nothing to read anymore */
		spin_unlock_irqrestore(&tswitch_rb_lock, flags);
		goto free_temp_tswitch;
	}

	size = sizeof(struct exchnd_hist_tswitch);

	/* Search for task in the buffer */
	i = info->tswitch_index;

	start = (i - exchnd_read_hist_count) & (EH_DEFAULT_HIST_COUNT - 1);
	if ((start + 1) == EH_DEFAULT_HIST_COUNT)
		start = 0;

	taskswitch_reading++;
	/* We are somewhere in the middle of the history */
	if ((EH_DEFAULT_HIST_COUNT - start) >= exchnd_read_hist_count)
		memcpy(temp_tswitch, &tswitch_rb[start],
				size*exchnd_read_hist_count);
	else {
		cpylen = size*(EH_DEFAULT_HIST_COUNT - start);
		memcpy(temp_tswitch, tswitch_rb + start, cpylen);

		cpylen = size*(exchnd_read_hist_count) - cpylen;
		memcpy(temp_tswitch + (EH_DEFAULT_HIST_COUNT - start),
				tswitch_rb, cpylen);
	}
	taskswitch_reading--;

	spin_unlock_irqrestore(&tswitch_rb_lock, flags);

	length += snprintf(data + length,
			2*EXCHND_MAX_TRACE_LEN - length,
			EHM_HIST_TASKSWITCHES_HEADER);

	for (i = 0; i < exchnd_read_hist_count; i++) {
		/* Skip unused elements */
		if (temp_tswitch[i].time == 0)
			continue;

		/* Send the message to user space once we have one "block" */
		if (length >= EXCHND_MAX_TRACE_LEN) {
			header.length = length;
			info->write_func(&header, data);
			length = 0;
		}

		length += snprintf(data + length,
				2*EXCHND_MAX_TRACE_LEN - length,
				"%llu: switch from %s(%d) to %s(%d) on CPU %d\n",
				temp_tswitch[i].time,
				temp_tswitch[i].pcomm,
				temp_tswitch[i].ppid,
				temp_tswitch[i].ncomm,
				temp_tswitch[i].npid,
				temp_tswitch[i].cpu);
	}

	header.length = length;

	info->write_func(&header, data);

free_temp_tswitch:
	kfree(temp_tswitch);
	temp_tswitch = NULL;
free_data:
	kfree(data);
exit:
	return ret;
}

static void exchnd_tswitch(void *data,
		struct task_struct *prev,
		struct task_struct *next)
{
	int tmp_idx;

	/* Bad luck for data loss, but we can live with that */
	if (taskswitch_reading)
		return;

	if (exchnd_trace_pid &&
			(exchnd_trace_pid != prev->group_leader->pid) &&
			(exchnd_trace_pid != next->group_leader->pid))
		return;

	tmp_idx = tswitch_index++;
	tmp_idx &= (EH_DEFAULT_HIST_COUNT-1);

	/* Getting basic information */
	tswitch_rb[tmp_idx].ppid = prev->pid;
	tswitch_rb[tmp_idx].npid = next->pid;
	preempt_disable();
	tswitch_rb[tmp_idx].cpu = smp_processor_id();
	preempt_enable();
	tswitch_rb[tmp_idx].time = cpu_clock(tswitch_rb[tmp_idx].cpu);
	memcpy(tswitch_rb[tmp_idx].pcomm, prev->comm, TASK_COMM_LEN);
	memcpy(tswitch_rb[tmp_idx].ncomm, next->comm, TASK_COMM_LEN);
}

void *exchnd_hist_tswitch_init(struct exchnd_module *mod)
{
	int res = 0;
	int size = sizeof(struct exchnd_hist_tswitch)*EH_DEFAULT_HIST_COUNT;

	if (exchnd_get_debug() & EXCHND_DEBUG_MODULE)
		pr_info("Initializing %s.\n",
				exchnd_mod_names[EHM_HIST_TASKSWITCHES]);

	if (mod->opaque)
		goto exit;

	/* Initialize the ring buffer */
	tswitch_rb = kzalloc(size, GFP_KERNEL);
	if (!tswitch_rb) {
		pr_err("Unable to initialize history ring buffer.\n");
		goto exit;
	}

	tswitch_index = 0;
	taskswitch_reading = 0;

	res = tracepoint_probe_register("sched_switch", &exchnd_tswitch, NULL);
	if (res) {
		pr_err("sched_switch probe registration failed (%d)\n", res);
		goto free_tswitch_rb;
	}

	mod->opaque = &exchnd_tswitch;
	mod->execute = exchnd_hist_tswitch_execute;

	return mod->opaque;

free_tswitch_rb:
	kfree(tswitch_rb);
	tswitch_rb = NULL;
exit:
	return NULL;
}

void exchnd_hist_tswitch_deinit(struct exchnd_module *mod)
{
	unsigned long flags;
	struct exchnd_hist_tswitch *to_free = NULL;

	/* Prevent execution of the collector. */
	spin_lock_irqsave(&tswitch_rb_lock, flags);

	if (!mod->opaque) {
		spin_unlock_irqrestore(&tswitch_rb_lock, flags);
		return;
	}

	/* First prevent execution */
	mod->execute = NULL;
	/* Prevent concurrent de-init */
	mod->opaque = NULL;
	/* Ignore ongoing task switches */
	taskswitch_reading++;

	spin_unlock_irqrestore(&tswitch_rb_lock, flags);

	if (exchnd_get_debug() & EXCHND_DEBUG_MODULE)
		pr_info("De-initializing %s.\n",
				exchnd_mod_names[EHM_HIST_TASKSWITCHES]);

	/* First unregister the probe */
	tracepoint_probe_unregister("sched_switch", &exchnd_tswitch, NULL);

	/* Safely destroy the ring buffer */
	spin_lock_irqsave(&tswitch_rb_lock, flags);
	to_free = tswitch_rb;
	tswitch_rb = NULL;
	spin_unlock_irqrestore(&tswitch_rb_lock, flags);

	kfree(to_free);
}

#define EHM_SCHED_INFO_HEADER "====== scheduler information:\n"
/*
 * exception handler module EHM_SCHED_INF
 *
 * The module collects scheduling related information that are available in
 * task structure.
 */
#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
static int exchnd_sched_inf_execute(struct exception_info *info,
		enum exchnd_modules module)
{
	struct exchnd_message_header header;
	unsigned char *data = NULL;
	unsigned int len = 0;

	if (!info->task) {
		pr_warn("%s skipped as task is a NULL pointer.\n", __func__);
		return 0;
	}

	if (in_interrupt())
		data = kzalloc(EXCHND_MAX_TRACE_LEN, GFP_ATOMIC);
	else
		data = kzalloc(EXCHND_MAX_TRACE_LEN, GFP_KERNEL);

	if (!data)
		return -ENOMEM;

	header.type = module;
	header.trigger = info->trigger;
	header.pid = info->task->pid;
	header.seq_num               = 0;
	header.flags.collected       = 1;
	header.flags.internal        = 0;
	header.flags.addition_needed = 0;

	len = snprintf(data, EXCHND_MAX_TRACE_LEN, EHM_SCHED_INFO_HEADER);

	len += snprintf(data + len,
			EXCHND_MAX_TRACE_LEN - len,
			"pcount:       %20lu\n"
			"run_delay:    %20llu\n"
			"last arrival: %20llu\n"
			"last_queued:  %20llu\n",
			info->task->sched_info.pcount,
			info->task->sched_info.run_delay,
			info->task->sched_info.last_arrival,
			info->task->sched_info.last_queued);

	header.length = len;
	info->write_func(&header, data);
	kfree(data);

	return 0;
}
#else
static int exchnd_sched_inf_execute(struct exception_info *info,
		enum exchnd_modules module)
{
	pr_warn("%s can't be executed as there is no data available.\n",
			__func__);

	return 0;
}
#endif

/* Defines the callbacks and data of the available EH nodules */
struct exchnd_module exchnd_module_list[EHM_LAST_ELEMENT] = {
	[EHM_BACKTRACE] = {
		.execute = exchnd_backtrace_execute },
	[EHM_BACKTRACE_ALL_THREADS] = {
		.execute = exchnd_backtrace_all_threads_execute },
	[EHM_CGROUPS] = {
		.execute = exchnd_cgroups_execute },
	[EHM_CORE_DUMP] = {
		.execute = NULL },
	[EHM_CPU_USAGE] = {
		.execute = exchnd_cpu_usage_execute },
	[EHM_FAULT_ADDRESS] = {
		.execute = exchnd_fault_address_execute },
	[EHM_FS_STATE] = {
		.execute = exchnd_fs_state_execute },
	[EHM_HIST_SYSCALLS] = {
		.execute = NULL,
		.init = exchnd_hist_syscall_init,
		.deinit = exchnd_hist_syscall_deinit },
	[EHM_HIST_TASKSWITCHES] = {
		.execute = NULL,
		.init = exchnd_hist_tswitch_init,
		.deinit = exchnd_hist_tswitch_deinit },
	[EHM_LRU_MEM_PAGES] = {
		.execute = NULL },
	[EHM_MEMORY_DUMP] = {
		.execute = exchnd_memory_dump_execute },
	[EHM_MEMORY_MAP] = {
		.execute = exchnd_memory_map_execute },
	[EHM_MEMORY_USAGE] = {
		.execute = exchnd_memory_usage_execute },
	[EHM_PROCESSOR_REGISTERS] = {
		.execute = exchnd_processor_registers_execute },
	[EHM_SYSTEM_INFO] = {
		.execute = exchnd_system_info_execute },
	[EHM_PROCESS_LIST] = {
		.execute = exchnd_process_list_execute },
	[EHM_SCHED_INF] = {
		.execute = exchnd_sched_inf_execute },
	[EHM_STACK_DUMP] = {
		.execute = exchnd_stack_dump_execute },
	[EHM_THREAD_LIST] = {
		.execute = exchnd_thread_list_execute },
	[EHM_ACTION_START] = {
		.execute = NULL },
	[EHM_SYS_RESTART] = {
		.execute = exchnd_sys_restart_execute },
	[EHM_APPLICATION_SPECIFIC1] = {
		.execute = exchnd_application_specific_execute },
	[EHM_APPLICATION_SPECIFIC2] = {
		.execute = exchnd_application_specific_execute },
	[EHM_APPLICATION_SPECIFIC3] = {
		.execute = exchnd_application_specific_execute },
	[EHM_APPLICATION_SPECIFIC4] = {
		.execute = exchnd_application_specific_execute },
	[EHM_APPLICATION_SPECIFIC5] = {
		.execute = exchnd_application_specific_execute },
};
