/*
 * af_inc.c -
 *
 *   Copyright (c) 2012 Robert Bosch GmbH, Hildesheim
 *
 *   This program is free software; you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation; either version 2 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program; if not, write to the Free Software
 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
 */
#include <linux/module.h>
#include <linux/init.h>
#include <linux/kmod.h>
#include <linux/slab.h>
#include <linux/list.h>
#include <linux/spinlock.h>
#include <linux/rcupdate.h>
#include <linux/uaccess.h>
#include <linux/net.h>
#include <linux/netdevice.h>
#include <linux/socket.h>
#include <linux/if_ether.h>
#include <linux/inetdevice.h>
#include <linux/if_arp.h>
#include <linux/skbuff.h>
#include <net/net_namespace.h>
#include <net/sock.h>
#include <linux/printk.h>
#include <linux/wait.h>
#include <net/tcp.h>
#include "linux/inc.h"
#include "af_inc.h"

static __initdata const char banner[] = KERN_INFO
	"INC: Inter Node Communications (" "1.0" ")\n";

MODULE_DESCRIPTION("Inter Node Communication PF_INC core");
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Peter Waechtler <external.Peter.Waechtler@de.bosch.com");

MODULE_ALIAS_NETPROTO(PF_INC);

/* table of registered INC protocols */
static const struct inc_proto *proto_tab[INC_NPROTO] __read_mostly;
static DEFINE_SPINLOCK(proto_tab_lock);

static DEFINE_SPINLOCK(lookup_lock);
static DEFINE_MUTEX(incnet_lock);

#define INC_MAX_BINDINGS (256)
#define INC_MAX_CHANNELS INC_MAX_BINDINGS
#define INC_MAX_FC_DEVICES 2

/*we only support 8bit ports on linklayer !*/
#define INC_PORT(inetport) ((inetport)&0x00ff)

static struct inc_sock *sk_bindings[INC_MAX_BINDINGS];

struct ssi_fc {
	int fc_state;
	wait_queue_head_t fc_waitq;
};

struct fc_device {
	bool registered;
	int ifindex;
	struct ssi_fc fc[INC_MAX_CHANNELS];
};

static struct fc_device fc_dev[INC_MAX_FC_DEVICES];

static inline struct inc_sock *inc_sk(const struct sock *sk)
{
	return (struct inc_sock *)sk;
}

static void init_sk_lookup(void)
{
	int i;

	for (i = 0; i < INC_MAX_BINDINGS; i++)
		sk_bindings[i] = NULL;

	for (i = 0; i < INC_MAX_FC_DEVICES; i++)
		fc_dev[i].registered = 0;
}

/*search matching socket:
-state connected
-bound to requested port
-same deviceindex, if bound to device
*/
static struct sock *inc_lookup(int ifindex, __be16 port)
{
	struct inc_sock *isk = NULL;
	int i;

	PDEBUG("port %d if %d\n", ntohs(port), ifindex);

	/*RECEIVE ON AUTOBIND PORTS IS NOT POSSIBLE,
	AS REMOTE SIDE CAN'T KNOW THAT ADDRESS !!*/
	if (ntohs(port) >= INC_PORT(INC_AUTOBIND_PORT_MIN)) {
			PDEBUG("ignore receive on autobind port %d if %d\n",
							ntohs(port), ifindex);
			return NULL;
	}

	spin_lock_bh(&lookup_lock);
	for (i = 0; i < INC_MAX_BINDINGS; i++) {
		isk = sk_bindings[i];
		if (!isk)
			continue;
		PDEBUG("port %d(%d) addr %pI4 bound_if %d state %d\n",
				ntohs(isk->inet_sport),
				INC_PORT(ntohs(isk->inet_sport)),
				&isk->inet_saddr,
				isk->sk.sk_bound_dev_if, isk->sk.sk_state);

		if ((INC_PORT(ntohs(isk->inet_sport)) == ntohs(port)) &&
			(isk->sk.sk_state == TCP_ESTABLISHED) &&
			((isk->sk.sk_bound_dev_if == 0) ||
			(isk->sk.sk_bound_dev_if == ifindex)))
			break;
	}
	spin_unlock_bh(&lookup_lock);

	if (!isk)
		return NULL;
	return &isk->sk;
}


/*accepted: cloned server port on accept ->skip in use test*/
static int inc_lookup_try(struct sock *sk, __be32 addr, __be16 port,
		bool accepted)
{
	int i;
	struct inc_sock *isk = NULL;
	if (!accepted) {
		for (i = 0; i < INC_MAX_BINDINGS; i++) {
			isk = sk_bindings[i];
			if (!isk)
				continue;
			if (isk->inet_saddr == INADDR_ANY) {
				PERRMEM("local INADDR_ANY not supported\n");
				return -1;
			}
			if ((INC_PORT(ntohs(isk->inet_sport)) ==
					INC_PORT(ntohs(port))) &&
					(isk->inet_saddr == addr)) {
				/*already in use!*/
				PERRMEM("%pI4:%d already used by 0x%p\n",
						&addr, ntohs(port), isk);
				return -1;
			}
		}
	}
	for (i = 0; i < INC_MAX_BINDINGS; i++) {
		isk = sk_bindings[i];
		if (!isk) {
			sk_bindings[i] = inc_sk(sk);
			inc_sk(sk)->inet_sport = port;
			inc_sk(sk)->inet_saddr = addr;
			PDEBUG("added port %d addr %pI4 for %p\n",
					ntohs(port), &addr, inc_sk(sk));
			return 0;
		}
	}
	PERRMEM("no space left to add port %d\n", ntohs(port));
	return -2;/*no space left*/
}

int inc_lookup_add(struct sock *sk, __be32 addr, __be16 port, bool accepted)
{
	int ret;
	if (INC_PORT(ntohs(port)) == 0xff) {
		PERRMEM("%pI4:%d: lun ff reserved\n", &addr, ntohs(port));
		return -1;
	}
	if (port == INC_PORT_ANY) {
		/*!!loop uses port in host byteorder!!*/
		for (port = INC_AUTOBIND_PORT_MIN;
				port < INC_AUTOBIND_PORT_MAX; port++) {
			spin_lock_bh(&lookup_lock);
			ret = inc_lookup_try(sk, addr, htons(port), 0);
			spin_unlock_bh(&lookup_lock);
			if (ret != -1) /*stop on no space or success*/
				break;
		}
		PDEBUG("AUTO: port %d addr %pI4 ret %d\n", port, &addr, ret);
	} else {
		/*dont bind in autobind region*/
		if (INC_PORT(ntohs(port)) >= INC_PORT(INC_AUTOBIND_PORT_MIN)) {
			PDEBUG("can't use autobind port %d (%d/%d)\n",
					ntohs(port), INC_PORT(ntohs(port)),
					INC_PORT(INC_AUTOBIND_PORT_MIN));
			return -1;
		}
		spin_lock_bh(&lookup_lock);
		ret = inc_lookup_try(sk, addr, port, accepted);
		spin_unlock_bh(&lookup_lock);
		PDEBUG("MAN: port %d addr %pI4 ret %d\n",
				ntohs(port), &addr, ret);
	}
	return ret;

}
EXPORT_SYMBOL(inc_lookup_add);


int inc_lookup_rm(struct sock *sk)
{
	int i;
	int removed = 0;

	spin_lock_bh(&lookup_lock);
	for (i = 0; i < INC_MAX_BINDINGS; i++)
		if (sk_bindings[i] == inc_sk(sk)) {
			PDEBUG("remove port %d addr %pI4 for %p\n",
				ntohs(inc_sk(sk)->inet_sport),
				&inc_sk(sk)->inet_saddr, inc_sk(sk));
			sk_bindings[i] = NULL;
			removed++;
		}
	spin_unlock_bh(&lookup_lock);
	return removed;
}
EXPORT_SYMBOL(inc_lookup_rm);


wait_queue_head_t *inc_lookup_waitq(int ifindex, __be16 port)
{
	int i;
	wait_queue_head_t *waitq = NULL;

	spin_lock_bh(&lookup_lock);
	if (port < INC_MAX_CHANNELS) {
		for (i = 0; i < INC_MAX_FC_DEVICES; i++) {
			if (fc_dev[i].registered &&
					fc_dev[i].ifindex == ifindex) {
				waitq = &fc_dev[i].fc[port].fc_waitq;
				break;
			}
		}
	} else {
		PERRMEM("INC_MAX_CHANNELS exceeded: %d\n", port);
	}
	spin_unlock_bh(&lookup_lock);

	return waitq;
}
EXPORT_SYMBOL(inc_lookup_waitq);


void inc_channel_setstate(int ifindex, __be16 port, int newstate)
{
	int i;

	if (port < INC_MAX_CHANNELS) {
		for (i = 0; i < INC_MAX_FC_DEVICES; i++) {
			if (fc_dev[i].registered &&
					fc_dev[i].ifindex == ifindex) {
				set_mb(fc_dev[i].fc[port].fc_state, newstate);
				break;
			}
		}
	} else {
		PERRMEM("INC_MAX_CHANNELS exceeded: %d\n", port);
	}
}
EXPORT_SYMBOL(inc_channel_setstate);


int inc_channel_getstate(int ifindex, __be16 port)
{
	int i;

	if (port < INC_MAX_CHANNELS) {
		for (i = 0; i < INC_MAX_FC_DEVICES; i++) {
			if (fc_dev[i].registered &&
					fc_dev[i].ifindex == ifindex)
				return fc_dev[i].fc[port].fc_state;
		}
	} else {
		PERRMEM("INC_MAX_CHANNELS exceeded: %d\n", port);
	}

	return 0;
}
EXPORT_SYMBOL(inc_channel_getstate);


/*
 * af_inc socket functions
 */
int inc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
{
	struct sock *sk = sock->sk;
	void __user *argp = (void __user *)arg;
	int err = -ENOIOCTLCMD;

	PDEBUG("\n");
	switch (cmd) {
	case SIOCGSTAMP:
		err = sock_get_timestamp(sk, (struct timeval __user *)argp);
		break;
	case SIOCGSTAMPNS:
		err = sock_get_timestampns(sk, argp);
		break;
	default:
		break;
	}
	return err;
}
EXPORT_SYMBOL(inc_ioctl);

static void inc_sock_destruct(struct sock *sk)
{
	int rc;
	skb_queue_purge(&sk->sk_receive_queue);
	rc = inc_lookup_rm(sk);
}


struct sock *inc_new(struct net *net, struct socket *sock, int protocol,
							struct proto *prot)
{
	struct sock *sk = sk_alloc(net, PF_INC, GFP_KERNEL, prot);
	int err = 0;
	if (!sk)
		return NULL;

	sock_init_data(sock, sk);
	sk->sk_destruct = inc_sock_destruct;
	sk->sk_protocol = protocol;
	sk->sk_bound_dev_if = 0;
	if (sk->sk_prot->init)
		err = sk->sk_prot->init(sk);

	if (err) {
		/* release sk on errors */
		sock_orphan(sk);
		sock_put(sk);
		return NULL;
	}
	return sk;
}
EXPORT_SYMBOL(inc_new);

static int inc_create(struct net *net, struct socket *sock,
		int protocol, int kern)
{
	struct sock *sk;
	const struct inc_proto *cp = NULL;
	int err = 0;
	int i;
	int try_request = 1;
	PDEBUG("\n");
	sock->state = SS_UNCONNECTED;

	if (protocol < 0 || protocol >= INC_NPROTO)
		return -EINVAL;

	if (net != &init_net)
		return -EAFNOSUPPORT;

	/*no specific protocol selection*/
	if (protocol != INC_PROTO_DEFAULT)
		return -EPROTONOSUPPORT;

proto_lookup:
	spin_lock(&proto_tab_lock);
	for (i = 0; i < INC_NPROTO; i++) {
		if (proto_tab[i]) {
			if (sock->type == proto_tab[i]->type) {
				cp = proto_tab[i];
				if (!try_module_get(cp->prot->owner)) {
					cp = NULL;
					try_request = 0;
					break;
				}
			}
		}
	}
	spin_unlock(&proto_tab_lock);

	if (!cp && try_request) {
		try_request = 0;
		request_module("net-pf-%d-type-%d", PF_INC, sock->type);
		goto proto_lookup;
	}

	/* check for available protocol and correct usage */
	if (!cp)
		return -EPROTONOSUPPORT;

	sock->ops = cp->ops;

	sk = inc_new(net, sock, protocol, cp->prot);

	module_put(cp->prot->owner);
	return err;
}

/*
 * af_inc tx path
 */

/**
 * inc_send - transmit an INC frame
 * @skb: pointer to socket buffer with frame in data section
 *
 *
 * Return:
 *  0 on success
 *  -ENETDOWN when the selected interface is down
 *  -ENOBUFS on full driver queue (see net_xmit_errno())
 *  -EPERM when trying to send on a non-INC interface
 *  -EINVAL when the skb->data does not contain a valid INC frame
 */
int inc_send(struct sk_buff *skb, int loop)
{
	int err = 0;
	if (skb->len < sizeof(struct inc_hdr)) {
		kfree_skb(skb);
		return -EINVAL;
	}

	if (skb->dev->type != ETH_P_INC) {
		kfree_skb(skb);
		return -EPERM;
	}

	if (!(skb->dev->flags & IFF_UP)) {
		kfree_skb(skb);
		return -ENETDOWN;
	}
	skb->priority = skb->sk->sk_priority;
	skb->protocol = htons(ETH_P_INC);
	skb_reset_network_header(skb);
	skb_reset_transport_header(skb);

	PDEBUG("skb->len: %d\n", skb->len);
	/* indication for the driver: no loopback required */
	skb->pkt_type = PACKET_HOST;


	if (loop) {
		struct sk_buff *newskb = skb_clone(skb, GFP_KERNEL);
		if (newskb)
			netif_rx_ni(newskb);
		kfree_skb(skb);
		return err;
	}

	/* send to netdevice */
	err = dev_queue_xmit(skb);
	PDEBUG("dev_queue_xmit: %d\n", err);
	if (err > 0)
		err = net_xmit_errno(err);
	return err;
}
EXPORT_SYMBOL(inc_send);


static int flowcontrol(struct sock *sk, unsigned short port,
					uint8_t fc, int ifindex)
{
	int is_fc = 0;
	wait_queue_head_t *waitq;

	switch (fc) {
	case INC_CHANNEL_XOFF:
		PDEBUG("CHANNEL XOFF: on port %d\n", port);
		inc_channel_setstate(ifindex, port, fc);
		is_fc = 1;
		break;
	case INC_CHANNEL_XON:
		PDEBUG("CHANNEL XON: on port %d\n", port);
		inc_channel_setstate(ifindex, port, fc);
		PDEBUG("wakeup:\n");
		waitq = inc_lookup_waitq(ifindex, port);
		if (waitq)
			wake_up_all(waitq);
		else
			PERR("INC_CHANNEL_XON: waitq not found: %d %d\n",
					ifindex, port);
		is_fc = 1;
		break;
	case INC_CHANNEL_BROKEN:
		PDEBUG("CHANNEL BROKEN: on port %d\n", port);
		if (sk)
			sk->sk_err = ECONNABORTED;
		/* wake potential waiters to return with error */
		waitq = inc_lookup_waitq(ifindex, port);
		if (waitq)
			wake_up_all(waitq);
		else
			PERR("INC_CHANNEL_BROKEN: waitq not found: %d %d\n",
					ifindex, port);
		is_fc = 1;
		break;
	default:
		break;
	}

	return is_fc;
}
/*
 * af_inc rx path
 */
static int inc_rcv(struct sk_buff *skb, struct net_device *dev,
		   struct packet_type *pt, struct net_device *orig_dev)
{
	struct inc_hdr *ih = (struct inc_hdr *)skb->data;
	struct sock *sk;
	int err;

	PDEBUG("dev->name: %s\n", dev->name);
	if (dev->type != ETH_P_INC || !net_eq(dev_net(dev), &init_net)) {
		kfree_skb(skb);
		return 0;
	}

	skb_pull(skb, sizeof(struct inc_hdr));
	sk = inc_lookup(dev->ifindex, htons(ih->dest_lun));
	if (flowcontrol(sk, ih->dest_lun, ih->dest_node, dev->ifindex)) {
		/* consume the skbuff allocated by the netdevice driver */
		PDEBUG("inc_lookup: sk=%p\n", sk);
		consume_skb(skb);
		return 0;
	}

	if (sk) {
		err = sock_queue_rcv_skb(sk, skb);
		if (err < 0) {
			PDEBUG("sock_queue_rcv_skb: %d\n", err);
			kfree_skb(skb);
		}
	} else {
		/* consume the skbuff allocated by the netdevice driver */
		PDEBUG("inc_lookup: sk=%p\n", sk);
		consume_skb(skb);
	}

	return 0;
}

/*
 * af_inc protocol functions
 */

/**
 * inc_proto_register - register INC transport protocol
 * @cp: pointer to INC protocol structure
 *
 * Return:
 *  0 on success
 *  -EINVAL invalid (out of range) protocol number
 *  -EBUSY  protocol already in use
 *  -ENOBUF if proto_register() fails
 */
int inc_proto_register(const struct inc_proto *cp)
{
	int proto = cp->protocol;
	int err = 0;

	if (proto < 0 || proto >= INC_NPROTO) {
		PERR("protocol no %d out of range\n", proto);
		return -EINVAL;
	}

	err = proto_register(cp->prot, 0);
	if (err < 0)
		return err;

	spin_lock(&proto_tab_lock);
	if (proto_tab[proto]) {
		PERR("protocol %d already registered\n", proto);
		err = -EBUSY;
	} else {
		proto_tab[proto] = cp;
	}
	spin_unlock(&proto_tab_lock);

	if (err < 0)
		proto_unregister(cp->prot);

	return err;
}
EXPORT_SYMBOL(inc_proto_register);

/**
 * inc_proto_unregister - unregister transport protocol
 * @cp: pointer to protocol structure
 */
void inc_proto_unregister(const struct inc_proto *cp)
{
	int proto = cp->protocol;

	spin_lock(&proto_tab_lock);
	if (!proto_tab[proto]) {
		PERR("BUG: inc: protocol %d is not registered\n", proto);
	}
	proto_tab[proto] = NULL;
	spin_unlock(&proto_tab_lock);

	proto_unregister(cp->prot);
}
EXPORT_SYMBOL(inc_proto_unregister);

/*
 * af_inc notifier to create/remove INC netdevice specific structs
 */
static int inc_notifier(struct notifier_block *nb, unsigned long msg,
			void *data)
{
	int i, j;
	bool device_found = 0;
	wait_queue_head_t *waitq;
	struct net_device *dev = (struct net_device *)data;

	if (!net_eq(dev_net(dev), &init_net))
		return NOTIFY_DONE;

	if (dev->type != ETH_P_INC)
		return NOTIFY_DONE;

	switch (msg) {

	case NETDEV_REGISTER:
		PDEBUG("register dev %s ifindex %d\n",
				dev->name, dev->ifindex);

		for (i = 0; i < INC_MAX_FC_DEVICES; i++) {
			if (fc_dev[i].registered)
				continue;
			fc_dev[i].registered = 1;
			fc_dev[i].ifindex = dev->ifindex;
			for (j = 0; j < INC_MAX_CHANNELS; j++) {
				fc_dev[i].fc[j].fc_state = 0;
				init_waitqueue_head(&fc_dev[i].fc[j].fc_waitq);
			}
			device_found = 1;
			PDEBUG("fc_dev[%d] registered\n", i);
			break;
		}

		if (!device_found)
			PERR("INC_MAX_FC_DEVICES (%d) exceeded by: %s\n",
					INC_MAX_FC_DEVICES, dev->name);

		break;

	case NETDEV_UNREGISTER:
		PDEBUG("un-register dev %s ifindex %d\n",
				dev->name, dev->ifindex);

		for (i = 0; i < INC_MAX_FC_DEVICES; i++) {
			if (fc_dev[i].registered &&
					fc_dev[i].ifindex == dev->ifindex) {
				fc_dev[i].registered = 0;
				device_found = 1;
				PDEBUG("fc_dev[%d] unregistered\n", i);
				break;
			}
		}

		if (!device_found)
			PERR("couldn't unregister dev %s\n", dev->name);

		break;

	case NETDEV_DOWN:
		PDEBUG("going-down dev %s ifindex %d\n",
				dev->name, dev->ifindex);

		for (i = 0; i < INC_MAX_CHANNELS; i++) {
			inc_channel_setstate(dev->ifindex, i, 0);
			waitq = inc_lookup_waitq(dev->ifindex, i);
			if (waitq)
				wake_up_all(waitq);
		}

		break;
	}

	return NOTIFY_DONE;
}

/*
 * af_inc module init/exit functions
 */

static struct packet_type inc_packet __read_mostly = {
	.type = cpu_to_be16(ETH_P_INC),
	.dev  = NULL,
	.func = inc_rcv,
};

static struct net_proto_family inc_family_ops __read_mostly = {
	.family = PF_INC,
	.create = inc_create,
	.owner  = THIS_MODULE,
};

/* notifier block for netdevice event */
static struct notifier_block inc_netdev_notifier __read_mostly = {
	.notifier_call = inc_notifier,
};


static __init int inc_init(void)
{
	printk(banner);
	init_sk_lookup();
	/* protocol register */
	sock_register(&inc_family_ops);
	register_netdevice_notifier(&inc_netdev_notifier);
	dev_add_pack(&inc_packet);

	return 0;
}

static __exit void inc_exit(void)
{
	/* protocol unregister */
	dev_remove_pack(&inc_packet);
	unregister_netdevice_notifier(&inc_netdev_notifier);
	sock_unregister(PF_INC);
	rcu_barrier(); /* Wait for completion of call_rcu()'s */
}

module_init(inc_init);
module_exit(inc_exit);
