From 6dc878a8ca39e93f70c42f3dd7260bde10c1e0f1 Mon Sep 17 00:00:00 2001 From: Gao feng Date: Thu, 4 Oct 2012 20:15:48 +0000 Subject: [PATCH 01/61] netlink: add reference of module in netlink_dump_start I get a panic when I use ss -a and rmmod inet_diag at the same time. It's because netlink_dump uses inet_diag_dump which belongs to module inet_diag. I search the codes and find many modules have the same problem. We need to add a reference to the module which the cb->dump belongs to. Thanks for all help from Stephen,Jan,Eric,Steffen and Pablo. Change From v3: change netlink_dump_start to inline,suggestion from Pablo and Eric. Change From v2: delete netlink_dump_done,and call module_put in netlink_dump and netlink_sock_destruct. Signed-off-by: Gao feng Signed-off-by: David S. Miller --- include/linux/netlink.h | 20 ++++++++++++++++---- net/netlink/af_netlink.c | 29 +++++++++++++++++++++-------- 2 files changed, 37 insertions(+), 12 deletions(-) diff --git a/include/linux/netlink.h b/include/linux/netlink.h index f80c56ac4d8..6d3af05c107 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -245,6 +245,8 @@ struct netlink_callback { struct netlink_callback *cb); int (*done)(struct netlink_callback *cb); void *data; + /* the module that dump function belong to */ + struct module *module; u16 family; u16 min_dump_alloc; unsigned int prev_seq, seq; @@ -262,14 +264,24 @@ __nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int fla struct netlink_dump_control { int (*dump)(struct sk_buff *skb, struct netlink_callback *); - int (*done)(struct netlink_callback*); + int (*done)(struct netlink_callback *); void *data; + struct module *module; u16 min_dump_alloc; }; -extern int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, - const struct nlmsghdr *nlh, - struct netlink_dump_control *control); +extern int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + struct netlink_dump_control *control); +static inline int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + struct netlink_dump_control *control) +{ + if (!control->module) + control->module = THIS_MODULE; + + return __netlink_dump_start(ssk, skb, nlh, control); +} #endif /* __KERNEL__ */ diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 0f2e3ad69c4..01e944a017a 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -169,6 +169,8 @@ static void netlink_sock_destruct(struct sock *sk) if (nlk->cb) { if (nlk->cb->done) nlk->cb->done(nlk->cb); + + module_put(nlk->cb->module); netlink_destroy_callback(nlk->cb); } @@ -1758,6 +1760,7 @@ static int netlink_dump(struct sock *sk) nlk->cb = NULL; mutex_unlock(nlk->cb_mutex); + module_put(cb->module); netlink_consume_callback(cb); return 0; @@ -1767,9 +1770,9 @@ errout_skb: return err; } -int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, - const struct nlmsghdr *nlh, - struct netlink_dump_control *control) +int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb, + const struct nlmsghdr *nlh, + struct netlink_dump_control *control) { struct netlink_callback *cb; struct sock *sk; @@ -1784,6 +1787,7 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, cb->done = control->done; cb->nlh = nlh; cb->data = control->data; + cb->module = control->module; cb->min_dump_alloc = control->min_dump_alloc; atomic_inc(&skb->users); cb->skb = skb; @@ -1794,19 +1798,28 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, return -ECONNREFUSED; } nlk = nlk_sk(sk); - /* A dump is in progress... */ + mutex_lock(nlk->cb_mutex); + /* A dump is in progress... */ if (nlk->cb) { mutex_unlock(nlk->cb_mutex); netlink_destroy_callback(cb); - sock_put(sk); - return -EBUSY; + ret = -EBUSY; + goto out; } + /* add reference of module which cb->dump belongs to */ + if (!try_module_get(cb->module)) { + mutex_unlock(nlk->cb_mutex); + netlink_destroy_callback(cb); + ret = -EPROTONOSUPPORT; + goto out; + } + nlk->cb = cb; mutex_unlock(nlk->cb_mutex); ret = netlink_dump(sk); - +out: sock_put(sk); if (ret) @@ -1817,7 +1830,7 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, */ return -EINTR; } -EXPORT_SYMBOL(netlink_dump_start); +EXPORT_SYMBOL(__netlink_dump_start); void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err) { From 809d5fc9bf6589276a12bd4fd611e4c7ff9940c3 Mon Sep 17 00:00:00 2001 From: Gao feng Date: Thu, 4 Oct 2012 20:15:49 +0000 Subject: [PATCH 02/61] infiniband: pass rdma_cm module to netlink_dump_start set netlink_dump_control.module to avoid panic. Signed-off-by: Gao feng Cc: Roland Dreier Cc: Sean Hefty Signed-off-by: David S. Miller --- drivers/infiniband/core/cma.c | 3 ++- drivers/infiniband/core/netlink.c | 1 + include/rdma/rdma_netlink.h | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 26b37603dcf..4fff27a7e37 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -3498,7 +3498,8 @@ out: } static const struct ibnl_client_cbs cma_cb_table[] = { - [RDMA_NL_RDMA_CM_ID_STATS] = { .dump = cma_get_id_stats }, + [RDMA_NL_RDMA_CM_ID_STATS] = { .dump = cma_get_id_stats, + .module = THIS_MODULE }, }; static int __init cma_init(void) diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c index fe10a949aef..da06abde9e0 100644 --- a/drivers/infiniband/core/netlink.c +++ b/drivers/infiniband/core/netlink.c @@ -154,6 +154,7 @@ static int ibnl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) { struct netlink_dump_control c = { .dump = client->cb_table[op].dump, + .module = client->cb_table[op].module, }; return netlink_dump_start(nls, skb, nlh, &c); } diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h index 3c5363ab867..bd3d8b24b42 100644 --- a/include/rdma/rdma_netlink.h +++ b/include/rdma/rdma_netlink.h @@ -39,6 +39,7 @@ struct rdma_cm_id_stats { struct ibnl_client_cbs { int (*dump)(struct sk_buff *skb, struct netlink_callback *nlcb); + struct module *module; }; int ibnl_init(void); From acb600def2110b1310466c0e485c0d26299898ae Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 5 Oct 2012 06:23:55 +0000 Subject: [PATCH 03/61] net: remove skb recycling Over time, skb recycling infrastructure got litle interest and many bugs. Generic rx path skb allocation is now using page fragments for efficient GRO / TCP coalescing, and recyling a tx skb for rx path is not worth the pain. Last identified bug is that fat skbs can be recycled and it can endup using high order pages after few iterations. With help from Maxime Bizon, who pointed out that commit 87151b8689d (net: allow pskb_expand_head() to get maximum tailroom) introduced this regression for recycled skbs. Instead of fixing this bug, lets remove skb recycling. Drivers wanting really hot skbs should use build_skb() anyway, to allocate/populate sk_buff right before netif_receive_skb() Signed-off-by: Eric Dumazet Cc: Maxime Bizon Signed-off-by: David S. Miller --- drivers/net/ethernet/calxeda/xgmac.c | 19 +------- drivers/net/ethernet/freescale/gianfar.c | 27 ++--------- drivers/net/ethernet/freescale/gianfar.h | 2 - drivers/net/ethernet/freescale/ucc_geth.c | 29 +++--------- drivers/net/ethernet/freescale/ucc_geth.h | 2 - drivers/net/ethernet/marvell/mv643xx_eth.c | 18 +------ drivers/net/ethernet/stmicro/stmmac/stmmac.h | 1 - .../net/ethernet/stmicro/stmmac/stmmac_main.c | 20 +------- include/linux/skbuff.h | 24 ---------- net/core/skbuff.c | 47 ------------------- 10 files changed, 16 insertions(+), 173 deletions(-) diff --git a/drivers/net/ethernet/calxeda/xgmac.c b/drivers/net/ethernet/calxeda/xgmac.c index 2b4b4f529ab..16814b34d4b 100644 --- a/drivers/net/ethernet/calxeda/xgmac.c +++ b/drivers/net/ethernet/calxeda/xgmac.c @@ -375,7 +375,6 @@ struct xgmac_priv { unsigned int tx_tail; void __iomem *base; - struct sk_buff_head rx_recycle; unsigned int dma_buf_sz; dma_addr_t dma_rx_phy; dma_addr_t dma_tx_phy; @@ -672,9 +671,7 @@ static void xgmac_rx_refill(struct xgmac_priv *priv) p = priv->dma_rx + entry; if (priv->rx_skbuff[entry] == NULL) { - skb = __skb_dequeue(&priv->rx_recycle); - if (skb == NULL) - skb = netdev_alloc_skb(priv->dev, priv->dma_buf_sz); + skb = netdev_alloc_skb(priv->dev, priv->dma_buf_sz); if (unlikely(skb == NULL)) break; @@ -887,17 +884,7 @@ static void xgmac_tx_complete(struct xgmac_priv *priv) desc_get_buf_len(p), DMA_TO_DEVICE); } - /* - * If there's room in the queue (limit it to size) - * we add this skb back into the pool, - * if it's the right size. - */ - if ((skb_queue_len(&priv->rx_recycle) < - DMA_RX_RING_SZ) && - skb_recycle_check(skb, priv->dma_buf_sz)) - __skb_queue_head(&priv->rx_recycle, skb); - else - dev_kfree_skb(skb); + dev_kfree_skb(skb); } if (dma_ring_space(priv->tx_head, priv->tx_tail, DMA_TX_RING_SZ) > @@ -1016,7 +1003,6 @@ static int xgmac_open(struct net_device *dev) dev->dev_addr); } - skb_queue_head_init(&priv->rx_recycle); memset(&priv->xstats, 0, sizeof(struct xgmac_extra_stats)); /* Initialize the XGMAC and descriptors */ @@ -1053,7 +1039,6 @@ static int xgmac_stop(struct net_device *dev) napi_disable(&priv->napi); writel(0, priv->base + XGMAC_DMA_INTR_ENA); - skb_queue_purge(&priv->rx_recycle); /* Disable the MAC core */ xgmac_mac_disable(priv->base); diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index a1b52ec3b93..1d03dcdd5e5 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -1765,7 +1765,6 @@ static void free_skb_resources(struct gfar_private *priv) sizeof(struct rxbd8) * priv->total_rx_ring_size, priv->tx_queue[0]->tx_bd_base, priv->tx_queue[0]->tx_bd_dma_base); - skb_queue_purge(&priv->rx_recycle); } void gfar_start(struct net_device *dev) @@ -1943,8 +1942,6 @@ static int gfar_enet_open(struct net_device *dev) enable_napi(priv); - skb_queue_head_init(&priv->rx_recycle); - /* Initialize a bunch of registers */ init_registers(dev); @@ -2533,16 +2530,7 @@ static int gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue) bytes_sent += skb->len; - /* If there's room in the queue (limit it to rx_buffer_size) - * we add this skb back into the pool, if it's the right size - */ - if (skb_queue_len(&priv->rx_recycle) < rx_queue->rx_ring_size && - skb_recycle_check(skb, priv->rx_buffer_size + - RXBUF_ALIGNMENT)) { - gfar_align_skb(skb); - skb_queue_head(&priv->rx_recycle, skb); - } else - dev_kfree_skb_any(skb); + dev_kfree_skb_any(skb); tx_queue->tx_skbuff[skb_dirtytx] = NULL; @@ -2608,7 +2596,7 @@ static void gfar_new_rxbdp(struct gfar_priv_rx_q *rx_queue, struct rxbd8 *bdp, static struct sk_buff *gfar_alloc_skb(struct net_device *dev) { struct gfar_private *priv = netdev_priv(dev); - struct sk_buff *skb = NULL; + struct sk_buff *skb; skb = netdev_alloc_skb(dev, priv->rx_buffer_size + RXBUF_ALIGNMENT); if (!skb) @@ -2621,14 +2609,7 @@ static struct sk_buff *gfar_alloc_skb(struct net_device *dev) struct sk_buff *gfar_new_skb(struct net_device *dev) { - struct gfar_private *priv = netdev_priv(dev); - struct sk_buff *skb = NULL; - - skb = skb_dequeue(&priv->rx_recycle); - if (!skb) - skb = gfar_alloc_skb(dev); - - return skb; + return gfar_alloc_skb(dev); } static inline void count_errors(unsigned short status, struct net_device *dev) @@ -2787,7 +2768,7 @@ int gfar_clean_rx_ring(struct gfar_priv_rx_q *rx_queue, int rx_work_limit) if (unlikely(!newskb)) newskb = skb; else if (skb) - skb_queue_head(&priv->rx_recycle, skb); + dev_kfree_skb(skb); } else { /* Increment the number of packets */ rx_queue->stats.rx_packets++; diff --git a/drivers/net/ethernet/freescale/gianfar.h b/drivers/net/ethernet/freescale/gianfar.h index 4141ef2ddaf..22eabc13ca9 100644 --- a/drivers/net/ethernet/freescale/gianfar.h +++ b/drivers/net/ethernet/freescale/gianfar.h @@ -1080,8 +1080,6 @@ struct gfar_private { u32 cur_filer_idx; - struct sk_buff_head rx_recycle; - /* RX queue filer rule set*/ struct ethtool_rx_list rx_list; struct mutex rx_queue_access; diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c index 16428843922..dfa0aaaab00 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.c +++ b/drivers/net/ethernet/freescale/ucc_geth.c @@ -209,14 +209,12 @@ static struct list_head *dequeue(struct list_head *lh) static struct sk_buff *get_new_skb(struct ucc_geth_private *ugeth, u8 __iomem *bd) { - struct sk_buff *skb = NULL; + struct sk_buff *skb; - skb = __skb_dequeue(&ugeth->rx_recycle); + skb = netdev_alloc_skb(ugeth->ndev, + ugeth->ug_info->uf_info.max_rx_buf_length + + UCC_GETH_RX_DATA_BUF_ALIGNMENT); if (!skb) - skb = netdev_alloc_skb(ugeth->ndev, - ugeth->ug_info->uf_info.max_rx_buf_length + - UCC_GETH_RX_DATA_BUF_ALIGNMENT); - if (skb == NULL) return NULL; /* We need the data buffer to be aligned properly. We will reserve @@ -2020,8 +2018,6 @@ static void ucc_geth_memclean(struct ucc_geth_private *ugeth) iounmap(ugeth->ug_regs); ugeth->ug_regs = NULL; } - - skb_queue_purge(&ugeth->rx_recycle); } static void ucc_geth_set_multi(struct net_device *dev) @@ -2230,8 +2226,6 @@ static int ucc_struct_init(struct ucc_geth_private *ugeth) return -ENOMEM; } - skb_queue_head_init(&ugeth->rx_recycle); - return 0; } @@ -3274,12 +3268,7 @@ static int ucc_geth_rx(struct ucc_geth_private *ugeth, u8 rxQ, int rx_work_limit if (netif_msg_rx_err(ugeth)) ugeth_err("%s, %d: ERROR!!! skb - 0x%08x", __func__, __LINE__, (u32) skb); - if (skb) { - skb->data = skb->head + NET_SKB_PAD; - skb->len = 0; - skb_reset_tail_pointer(skb); - __skb_queue_head(&ugeth->rx_recycle, skb); - } + dev_free_skb(skb); ugeth->rx_skbuff[rxQ][ugeth->skb_currx[rxQ]] = NULL; dev->stats.rx_dropped++; @@ -3349,13 +3338,7 @@ static int ucc_geth_tx(struct net_device *dev, u8 txQ) dev->stats.tx_packets++; - if (skb_queue_len(&ugeth->rx_recycle) < RX_BD_RING_LEN && - skb_recycle_check(skb, - ugeth->ug_info->uf_info.max_rx_buf_length + - UCC_GETH_RX_DATA_BUF_ALIGNMENT)) - __skb_queue_head(&ugeth->rx_recycle, skb); - else - dev_kfree_skb(skb); + dev_kfree_skb(skb); ugeth->tx_skbuff[txQ][ugeth->skb_dirtytx[txQ]] = NULL; ugeth->skb_dirtytx[txQ] = diff --git a/drivers/net/ethernet/freescale/ucc_geth.h b/drivers/net/ethernet/freescale/ucc_geth.h index f71b3e7b12d..75f337163ce 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.h +++ b/drivers/net/ethernet/freescale/ucc_geth.h @@ -1214,8 +1214,6 @@ struct ucc_geth_private { /* index of the first skb which hasn't been transmitted yet. */ u16 skb_dirtytx[NUM_TX_QUEUES]; - struct sk_buff_head rx_recycle; - struct ugeth_mii_info *mii_info; struct phy_device *phydev; phy_interface_t phy_interface; diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c index 087b9e0669f..84c13263c51 100644 --- a/drivers/net/ethernet/marvell/mv643xx_eth.c +++ b/drivers/net/ethernet/marvell/mv643xx_eth.c @@ -412,7 +412,6 @@ struct mv643xx_eth_private { u8 work_rx_refill; int skb_size; - struct sk_buff_head rx_recycle; /* * RX state. @@ -673,9 +672,7 @@ static int rxq_refill(struct rx_queue *rxq, int budget) struct rx_desc *rx_desc; int size; - skb = __skb_dequeue(&mp->rx_recycle); - if (skb == NULL) - skb = netdev_alloc_skb(mp->dev, mp->skb_size); + skb = netdev_alloc_skb(mp->dev, mp->skb_size); if (skb == NULL) { mp->oom = 1; @@ -989,14 +986,7 @@ static int txq_reclaim(struct tx_queue *txq, int budget, int force) desc->byte_cnt, DMA_TO_DEVICE); } - if (skb != NULL) { - if (skb_queue_len(&mp->rx_recycle) < - mp->rx_ring_size && - skb_recycle_check(skb, mp->skb_size)) - __skb_queue_head(&mp->rx_recycle, skb); - else - dev_kfree_skb(skb); - } + dev_kfree_skb(skb); } __netif_tx_unlock(nq); @@ -2349,8 +2339,6 @@ static int mv643xx_eth_open(struct net_device *dev) napi_enable(&mp->napi); - skb_queue_head_init(&mp->rx_recycle); - mp->int_mask = INT_EXT; for (i = 0; i < mp->rxq_count; i++) { @@ -2445,8 +2433,6 @@ static int mv643xx_eth_stop(struct net_device *dev) mib_counters_update(mp); del_timer_sync(&mp->mib_counters_timer); - skb_queue_purge(&mp->rx_recycle); - for (i = 0; i < mp->rxq_count; i++) rxq_deinit(mp->rxq + i); for (i = 0; i < mp->txq_count; i++) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index e872e1da313..7d51a65ab09 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -50,7 +50,6 @@ struct stmmac_priv { unsigned int dirty_rx; struct sk_buff **rx_skbuff; dma_addr_t *rx_skbuff_dma; - struct sk_buff_head rx_recycle; struct net_device *dev; dma_addr_t dma_rx_phy; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 3be88331d17..c6cdbc4eb05 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -747,18 +747,7 @@ static void stmmac_tx(struct stmmac_priv *priv) priv->hw->ring->clean_desc3(p); if (likely(skb != NULL)) { - /* - * If there's room in the queue (limit it to size) - * we add this skb back into the pool, - * if it's the right size. - */ - if ((skb_queue_len(&priv->rx_recycle) < - priv->dma_rx_size) && - skb_recycle_check(skb, priv->dma_buf_sz)) - __skb_queue_head(&priv->rx_recycle, skb); - else - dev_kfree_skb(skb); - + dev_kfree_skb(skb); priv->tx_skbuff[entry] = NULL; } @@ -1169,7 +1158,6 @@ static int stmmac_open(struct net_device *dev) priv->eee_enabled = stmmac_eee_init(priv); napi_enable(&priv->napi); - skb_queue_head_init(&priv->rx_recycle); netif_start_queue(dev); return 0; @@ -1222,7 +1210,6 @@ static int stmmac_release(struct net_device *dev) kfree(priv->tm); #endif napi_disable(&priv->napi); - skb_queue_purge(&priv->rx_recycle); /* Free the IRQ lines */ free_irq(dev->irq, dev); @@ -1388,10 +1375,7 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv) if (likely(priv->rx_skbuff[entry] == NULL)) { struct sk_buff *skb; - skb = __skb_dequeue(&priv->rx_recycle); - if (skb == NULL) - skb = netdev_alloc_skb_ip_align(priv->dev, - bfsize); + skb = netdev_alloc_skb_ip_align(priv->dev, bfsize); if (unlikely(skb == NULL)) break; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index b33a3a1f205..6a2c34e6d96 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -589,9 +589,6 @@ static inline struct sk_buff *alloc_skb_fclone(unsigned int size, return __alloc_skb(size, priority, SKB_ALLOC_FCLONE, NUMA_NO_NODE); } -extern void skb_recycle(struct sk_buff *skb); -extern bool skb_recycle_check(struct sk_buff *skb, int skb_size); - extern struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src); extern int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask); extern struct sk_buff *skb_clone(struct sk_buff *skb, @@ -2645,27 +2642,6 @@ static inline void skb_checksum_none_assert(const struct sk_buff *skb) bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off); -static inline bool skb_is_recycleable(const struct sk_buff *skb, int skb_size) -{ - if (irqs_disabled()) - return false; - - if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) - return false; - - if (skb_is_nonlinear(skb) || skb->fclone != SKB_FCLONE_UNAVAILABLE) - return false; - - skb_size = SKB_DATA_ALIGN(skb_size + NET_SKB_PAD); - if (skb_end_offset(skb) < skb_size) - return false; - - if (skb_shared(skb) || skb_cloned(skb)) - return false; - - return true; -} - /** * skb_head_is_locked - Determine if the skb->head is locked down * @skb: skb to check diff --git a/net/core/skbuff.c b/net/core/skbuff.c index cdc28598f4e..6e04b1fa11f 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -655,53 +655,6 @@ void consume_skb(struct sk_buff *skb) } EXPORT_SYMBOL(consume_skb); -/** - * skb_recycle - clean up an skb for reuse - * @skb: buffer - * - * Recycles the skb to be reused as a receive buffer. This - * function does any necessary reference count dropping, and - * cleans up the skbuff as if it just came from __alloc_skb(). - */ -void skb_recycle(struct sk_buff *skb) -{ - struct skb_shared_info *shinfo; - - skb_release_head_state(skb); - - shinfo = skb_shinfo(skb); - memset(shinfo, 0, offsetof(struct skb_shared_info, dataref)); - atomic_set(&shinfo->dataref, 1); - - memset(skb, 0, offsetof(struct sk_buff, tail)); - skb->data = skb->head + NET_SKB_PAD; - skb_reset_tail_pointer(skb); -} -EXPORT_SYMBOL(skb_recycle); - -/** - * skb_recycle_check - check if skb can be reused for receive - * @skb: buffer - * @skb_size: minimum receive buffer size - * - * Checks that the skb passed in is not shared or cloned, and - * that it is linear and its head portion at least as large as - * skb_size so that it can be recycled as a receive buffer. - * If these conditions are met, this function does any necessary - * reference count dropping and cleans up the skbuff as if it - * just came from __alloc_skb(). - */ -bool skb_recycle_check(struct sk_buff *skb, int skb_size) -{ - if (!skb_is_recycleable(skb, skb_size)) - return false; - - skb_recycle(skb); - - return true; -} -EXPORT_SYMBOL(skb_recycle_check); - static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) { new->tstamp = old->tstamp; From 5b896029e9c4c9fd67eed80b7400dd68bdd93f52 Mon Sep 17 00:00:00 2001 From: Peter Senna Tschudin Date: Fri, 5 Oct 2012 10:41:03 +0000 Subject: [PATCH 04/61] drivers/net/ethernet/dec/tulip/dmfe.c: fix error return code The function dmfe_init_one() return 0 for success and negative value for most of its internal tests failures. There are three exceptions that are error cases going to err_out_*:. Fore this three cases the function abort its success execution path, but returns non negative value, making it dificult for a caller function to notice the error. This patch fixes the error cases that do not return negative values. This was found by Coccinelle, but the code change was made by hand. This patch is not robot generated. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // ( if@p1 (\(ret < 0\|ret != 0\)) { ... return ret; } | ret@p1 = 0 ) ... when != ret = e1 when != &ret *if(...) { ... when != ret = e2 when forall return ret; } // Signed-off-by: Peter Senna Tschudin Signed-off-by: David S. Miller --- drivers/net/ethernet/dec/tulip/dmfe.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/dec/tulip/dmfe.c b/drivers/net/ethernet/dec/tulip/dmfe.c index 4d6fe604fa6..d23755ea9bc 100644 --- a/drivers/net/ethernet/dec/tulip/dmfe.c +++ b/drivers/net/ethernet/dec/tulip/dmfe.c @@ -446,13 +446,17 @@ static int __devinit dmfe_init_one (struct pci_dev *pdev, /* Allocate Tx/Rx descriptor memory */ db->desc_pool_ptr = pci_alloc_consistent(pdev, sizeof(struct tx_desc) * DESC_ALL_CNT + 0x20, &db->desc_pool_dma_ptr); - if (!db->desc_pool_ptr) + if (!db->desc_pool_ptr) { + err = -ENOMEM; goto err_out_res; + } db->buf_pool_ptr = pci_alloc_consistent(pdev, TX_BUF_ALLOC * TX_DESC_CNT + 4, &db->buf_pool_dma_ptr); - if (!db->buf_pool_ptr) + if (!db->buf_pool_ptr) { + err = -ENOMEM; goto err_out_free_desc; + } db->first_tx_desc = (struct tx_desc *) db->desc_pool_ptr; db->first_tx_desc_dma = db->desc_pool_dma_ptr; @@ -462,8 +466,10 @@ static int __devinit dmfe_init_one (struct pci_dev *pdev, db->chip_id = ent->driver_data; /* IO type range. */ db->ioaddr = pci_iomap(pdev, 0, 0); - if (!db->ioaddr) + if (!db->ioaddr) { + err = -ENOMEM; goto err_out_free_buf; + } db->chip_revision = pdev->revision; db->wol_mode = 0; From 52428d9169f8b4c26e806fb7473833bce3077de5 Mon Sep 17 00:00:00 2001 From: Peter Senna Tschudin Date: Fri, 5 Oct 2012 10:41:04 +0000 Subject: [PATCH 05/61] drivers/net/ethernet/natsemi/natsemi.c: fix error return code The function natsemi_probe1() return 0 for success and negative value for most of its internal tests failures. There is one exception that is error case going to err_create_file:. Fore this error case the function abort its success execution path, but returns non negative value, making it difficult for a caller function to notice the error. This patch fixes the error case that do not return negative value. This was found by Coccinelle, but the code change was made by hand. This patch is not robot generated. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // ( if@p1 (\(ret < 0\|ret != 0\)) { ... return ret; } | ret@p1 = 0 ) ... when != ret = e1 when != &ret *if(...) { ... when != ret = e2 when forall return ret; } // Signed-off-by: Peter Senna Tschudin Acked-by: Francois Romieu Signed-off-by: David S. Miller --- drivers/net/ethernet/natsemi/natsemi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/natsemi/natsemi.c b/drivers/net/ethernet/natsemi/natsemi.c index 5b61d12f8b9..dbaaa99a0d4 100644 --- a/drivers/net/ethernet/natsemi/natsemi.c +++ b/drivers/net/ethernet/natsemi/natsemi.c @@ -947,8 +947,8 @@ static int __devinit natsemi_probe1 (struct pci_dev *pdev, i = register_netdev(dev); if (i) goto err_register_netdev; - - if (NATSEMI_CREATE_FILE(pdev, dspcfg_workaround)) + i = NATSEMI_CREATE_FILE(pdev, dspcfg_workaround); + if (i) goto err_create_file; if (netif_msg_drv(np)) { From 0968a9d1c8f1d0d91e64faf49f6b1ef4f8775aa6 Mon Sep 17 00:00:00 2001 From: Peter Senna Tschudin Date: Fri, 5 Oct 2012 10:41:07 +0000 Subject: [PATCH 06/61] drivers/net/ethernet/sis/sis900.c: fix error return code The function sis900_probe() return 0 for success and negative value for most of its internal tests failures. There is one exception that is error case going to err_out_cleardev:. Fore this error case, the function abort its success execution path, but returns non negative value, making it difficult for a caller function to notice the error. This patch fixes the error case that do not return negative value. This was found by Coccinelle, but the code change was made by hand. This patch is not robot generated. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // ( if@p1 (\(ret < 0\|ret != 0\)) { ... return ret; } | ret@p1 = 0 ) ... when != ret = e1 when != &ret *if(...) { ... when != ret = e2 when forall return ret; } // Signed-off-by: Peter Senna Tschudin Signed-off-by: David S. Miller --- drivers/net/ethernet/sis/sis900.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/sis/sis900.c b/drivers/net/ethernet/sis/sis900.c index 203d9c6ec23..fb9f6b38511 100644 --- a/drivers/net/ethernet/sis/sis900.c +++ b/drivers/net/ethernet/sis/sis900.c @@ -478,8 +478,10 @@ static int __devinit sis900_probe(struct pci_dev *pci_dev, /* IO region. */ ioaddr = pci_iomap(pci_dev, 0, 0); - if (!ioaddr) + if (!ioaddr) { + ret = -ENOMEM; goto err_out_cleardev; + } sis_priv = netdev_priv(net_dev); sis_priv->ioaddr = ioaddr; From 87286477c4688e5324e24916a1b2da4972ea84ba Mon Sep 17 00:00:00 2001 From: Peter Senna Tschudin Date: Fri, 5 Oct 2012 11:33:03 +0000 Subject: [PATCH 07/61] drivers/net/irda/irtty-sir.c: fix error return code The function irtty_open() return 0 for success and negative value for most of its internal tests failures. There is one exception that is error case going to out_put:. For this error case, the function abort its success execution path, but returns non negative value, making it difficult for a caller function to notice the error. This patch fixes the error case that do not return negative value. This was found by Coccinelle, but the code change was made by hand. This patch is not robot generated. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // ( if@p1 (\(ret < 0\|ret != 0\)) { ... return ret; } | ret@p1 = 0 ) ... when != ret = e1 when != &ret *if(...) { ... when != ret = e2 when forall return ret; } // Signed-off-by: Peter Senna Tschudin Signed-off-by: David S. Miller --- drivers/net/irda/irtty-sir.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/irda/irtty-sir.c b/drivers/net/irda/irtty-sir.c index 30087ca23a0..6e4d4b62c9a 100644 --- a/drivers/net/irda/irtty-sir.c +++ b/drivers/net/irda/irtty-sir.c @@ -459,8 +459,10 @@ static int irtty_open(struct tty_struct *tty) /* allocate private device info block */ priv = kzalloc(sizeof(*priv), GFP_KERNEL); - if (!priv) + if (!priv) { + ret = -ENOMEM; goto out_put; + } priv->magic = IRTTY_MAGIC; priv->tty = tty; From 6734011f3c9a17070f157656f8b15e051e37aee1 Mon Sep 17 00:00:00 2001 From: Peter Senna Tschudin Date: Fri, 5 Oct 2012 11:33:04 +0000 Subject: [PATCH 08/61] drivers/net/irda/mcs7780.c: fix error return code The function mcs_probe() return 0 for success and negative value for most of its internal tests failures. There is one exception that is error case going to error2:. For this error case, the function abort its success execution path, but returns non negative value, making it difficult for a caller function to notice the error. This patch fixes the error case that do not return negative value. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // ( if@p1 (\(ret < 0\|ret != 0\)) { ... return ret; } | ret@p1 = 0 ) ... when != ret = e1 when != &ret *if(...) { ... when != ret = e2 when forall return ret; } // Signed-off-by: Peter Senna Tschudin Signed-off-by: David S. Miller --- drivers/net/irda/mcs7780.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/irda/mcs7780.c b/drivers/net/irda/mcs7780.c index 1a00b5990cb..f07c340990d 100644 --- a/drivers/net/irda/mcs7780.c +++ b/drivers/net/irda/mcs7780.c @@ -920,8 +920,10 @@ static int mcs_probe(struct usb_interface *intf, ndev->netdev_ops = &mcs_netdev_ops; - if (!intf->cur_altsetting) + if (!intf->cur_altsetting) { + ret = -ENOMEM; goto error2; + } ret = mcs_find_endpoints(mcs, intf->cur_altsetting->endpoint, intf->cur_altsetting->desc.bNumEndpoints); From cbd841ca91b4f87d62263f8d86af1b14a745c413 Mon Sep 17 00:00:00 2001 From: Peter Senna Tschudin Date: Fri, 5 Oct 2012 11:33:05 +0000 Subject: [PATCH 09/61] drivers/net/irda/pxaficp_ir.c: fix error return code The function pxa_irda_probe() return 0 for success and negative value for most of its internal tests failures. There is one exception that is error case going to err_mem_3:. For this error case, the function abort its success execution path, but returns non negative value, making it difficult for a caller function to notice the error. This patch fixes the error case that do not return negative value. This was found by Coccinelle, but the code change was made by hand. This patch is not robot generated. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // ( if@p1 (\(ret < 0\|ret != 0\)) { ... return ret; } | ret@p1 = 0 ) ... when != ret = e1 when != &ret *if(...) { ... when != ret = e2 when forall return ret; } // Signed-off-by: Peter Senna Tschudin Signed-off-by: David S. Miller --- drivers/net/irda/pxaficp_ir.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/irda/pxaficp_ir.c b/drivers/net/irda/pxaficp_ir.c index 002a442bf73..858de05bdb7 100644 --- a/drivers/net/irda/pxaficp_ir.c +++ b/drivers/net/irda/pxaficp_ir.c @@ -846,8 +846,10 @@ static int pxa_irda_probe(struct platform_device *pdev) goto err_mem_2; dev = alloc_irdadev(sizeof(struct pxa_irda)); - if (!dev) + if (!dev) { + err = -ENOMEM; goto err_mem_3; + } SET_NETDEV_DEV(dev, &pdev->dev); si = netdev_priv(dev); From cd9d11607ebe106acab6cf3ad578e72c956823a1 Mon Sep 17 00:00:00 2001 From: Peter Senna Tschudin Date: Fri, 5 Oct 2012 11:33:06 +0000 Subject: [PATCH 10/61] drivers/net/irda/sa1100_ir.c: fix error return code The function sa1100_irda_probe() return 0 for success and negative value for most of its internal tests failures. There is one exception that is error case going to err_mem_4:. For this error case, the function abort its success execution path, but returns non negative value, making it difficult for a caller function to notice the error. This patch fixes the error case that do not return negative value. This was found by Coccinelle, but the code change was made by hand. This patch is not robot generated. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // ( if@p1 (\(ret < 0\|ret != 0\)) { ... return ret; } | ret@p1 = 0 ) ... when != ret = e1 when != &ret *if(...) { ... when != ret = e2 when forall return ret; } // Signed-off-by: Peter Senna Tschudin Signed-off-by: David S. Miller --- drivers/net/irda/sa1100_ir.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/irda/sa1100_ir.c b/drivers/net/irda/sa1100_ir.c index e25067552b2..42fde9ed23e 100644 --- a/drivers/net/irda/sa1100_ir.c +++ b/drivers/net/irda/sa1100_ir.c @@ -940,8 +940,10 @@ static int sa1100_irda_probe(struct platform_device *pdev) goto err_mem_3; dev = alloc_irdadev(sizeof(struct sa1100_irda)); - if (!dev) + if (!dev) { + err = -ENOMEM; goto err_mem_4; + } SET_NETDEV_DEV(dev, &pdev->dev); From 812b074b5ba2937d2edc0e5b0019fa163ba86882 Mon Sep 17 00:00:00 2001 From: Peter Senna Tschudin Date: Fri, 5 Oct 2012 11:33:02 +0000 Subject: [PATCH 11/61] drivers/net/irda/sh_irda.c: fix error return code The function sh_irda_probe() return 0 for success and negative value for most of its internal tests failures. There is one exception that is error case going to err_mem_4:. For this error case, the function abort its success execution path, but returns non negative value, making it difficult for a caller function to notice the error. This patch fixes the error case that do not return negative value. This was found by Coccinelle, but the code change was made by hand. This patch is not robot generated. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // ( if@p1 (\(ret < 0\|ret != 0\)) { ... return ret; } | ret@p1 = 0 ) ... when != ret = e1 when != &ret *if(...) { ... when != ret = e2 when forall return ret; } // Signed-off-by: Peter Senna Tschudin Signed-off-by: David S. Miller --- drivers/net/irda/sh_irda.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/irda/sh_irda.c b/drivers/net/irda/sh_irda.c index eb315b8d07a..4b746d9bd8e 100644 --- a/drivers/net/irda/sh_irda.c +++ b/drivers/net/irda/sh_irda.c @@ -808,8 +808,8 @@ static int __devinit sh_irda_probe(struct platform_device *pdev) goto err_mem_4; platform_set_drvdata(pdev, ndev); - - if (request_irq(irq, sh_irda_irq, IRQF_DISABLED, "sh_irda", self)) { + err = request_irq(irq, sh_irda_irq, IRQF_DISABLED, "sh_irda", self); + if (err) { dev_warn(&pdev->dev, "Unable to attach sh_irda interrupt\n"); goto err_mem_4; } From 14834540cafff24944affd6290f01359799a48ce Mon Sep 17 00:00:00 2001 From: Peter Senna Tschudin Date: Fri, 5 Oct 2012 12:10:49 +0000 Subject: [PATCH 12/61] drivers/net/irda/sh_sir.c: fix error return code The function sh_sir_probe() return 0 for success and negative value for most of its internal tests failures. There are two exceptions that are error cases going to err_mem_*:. For this two cases, the function abort its success execution path, but returns non negative value, making it dificult for a caller function to notice the error. This patch fixes the error cases that do not return negative values. This was found by Coccinelle, but the code change was made by hand. This patch is not robot generated. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // ( if@p1 (\(ret < 0\|ret != 0\)) { ... return ret; } | ret@p1 = 0 ) ... when != ret = e1 when != &ret *if(...) { ... when != ret = e2 when forall return ret; } // Signed-off-by: Peter Senna Tschudin Signed-off-by: David S. Miller --- drivers/net/irda/sh_sir.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/irda/sh_sir.c b/drivers/net/irda/sh_sir.c index 79510942556..624ac1939e8 100644 --- a/drivers/net/irda/sh_sir.c +++ b/drivers/net/irda/sh_sir.c @@ -741,6 +741,7 @@ static int __devinit sh_sir_probe(struct platform_device *pdev) self->clk = clk_get(&pdev->dev, clk_name); if (IS_ERR(self->clk)) { dev_err(&pdev->dev, "cannot get clock \"%s\"\n", clk_name); + err = -ENODEV; goto err_mem_3; } @@ -760,8 +761,8 @@ static int __devinit sh_sir_probe(struct platform_device *pdev) goto err_mem_4; platform_set_drvdata(pdev, ndev); - - if (request_irq(irq, sh_sir_irq, IRQF_DISABLED, "sh_sir", self)) { + err = request_irq(irq, sh_sir_irq, IRQF_DISABLED, "sh_sir", self); + if (err) { dev_warn(&pdev->dev, "Unable to attach sh_sir interrupt\n"); goto err_mem_4; } From 2dfc96719187d3cb632922a9455abd008db467eb Mon Sep 17 00:00:00 2001 From: Peter Senna Tschudin Date: Fri, 5 Oct 2012 12:10:50 +0000 Subject: [PATCH 13/61] drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c: fix error return code The function qlcnic_probe() return 0 for success and negative value for most of its internal tests failures. There is one exception that is error case going to err_out_free_netdev:. For this error case, the function abort its success execution path, but returns non negative value, making it difficult for a caller function to notice the error. This patch fixes the error case that do not return negative value. This was found by Coccinelle, but the code change was made by hand. This patch is not robot generated. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // ( if@p1 (\(ret < 0\|ret != 0\)) { ... return ret; } | ret@p1 = 0 ) ... when != ret = e1 when != &ret *if(...) { ... when != ret = e2 when forall return ret; } // Signed-off-by: Peter Senna Tschudin Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c index 473ce134ca6..24ad17ec7fc 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c @@ -1601,7 +1601,8 @@ qlcnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) adapter->netdev = netdev; adapter->pdev = pdev; - if (qlcnic_alloc_adapter_resources(adapter)) + err = qlcnic_alloc_adapter_resources(adapter); + if (err) goto err_out_free_netdev; adapter->dev_rst_time = jiffies; From 86e506e39909d356ca1c76c00022f3c64f5b7665 Mon Sep 17 00:00:00 2001 From: Peter Senna Tschudin Date: Fri, 5 Oct 2012 12:10:51 +0000 Subject: [PATCH 14/61] drivers/net/ethernet/amd/amd8111e.c: fix error return code The function amd8111e_probe_one() return 0 for success and negative value for most of its internal tests failures. There are two exceptions that are error cases going to err_free_reg:. For this two cases, the function abort its success execution path, but returns non negative value, making it dificult for a caller function to notice the error. This patch fixes the error cases that do not return negative values. This was found by Coccinelle, but the code change was made by hand. This patch is not robot generated. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // ( if@p1 (\(ret < 0\|ret != 0\)) { ... return ret; } | ret@p1 = 0 ) ... when != ret = e1 when != &ret *if(...) { ... when != ret = e2 when forall return ret; } // Signed-off-by: Peter Senna Tschudin Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/amd8111e.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/amd/amd8111e.c b/drivers/net/ethernet/amd/amd8111e.c index 64d0d9c1afa..3491d4312fc 100644 --- a/drivers/net/ethernet/amd/amd8111e.c +++ b/drivers/net/ethernet/amd/amd8111e.c @@ -1845,6 +1845,7 @@ static int __devinit amd8111e_probe_one(struct pci_dev *pdev, if((pm_cap = pci_find_capability(pdev, PCI_CAP_ID_PM))==0){ printk(KERN_ERR "amd8111e: No Power Management capability, " "exiting.\n"); + err = -ENODEV; goto err_free_reg; } @@ -1852,6 +1853,7 @@ static int __devinit amd8111e_probe_one(struct pci_dev *pdev, if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) < 0) { printk(KERN_ERR "amd8111e: DMA not supported," "exiting.\n"); + err = -ENODEV; goto err_free_reg; } From 691299201b1416e823bbada0df316c6408167aa7 Mon Sep 17 00:00:00 2001 From: Peter Senna Tschudin Date: Fri, 5 Oct 2012 12:10:52 +0000 Subject: [PATCH 15/61] drivers/net/ethernet/amd/au1000_eth.c: fix error return code The function au1000_probe() return 0 for success and negative value for most of its internal tests failures. There are exceptions that are error cases going to err_out:. For this cases, the function abort its success execution path, but returns non negative value, making it dificult for a caller function to notice the error. This patch fixes the error cases that do not return negative values. This was found by Coccinelle, but the code change was made by hand. This patch is not robot generated. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // ( if@p1 (\(ret < 0\|ret != 0\)) { ... return ret; } | ret@p1 = 0 ) ... when != ret = e1 when != &ret *if(...) { ... when != ret = e2 when forall return ret; } // Signed-off-by: Peter Senna Tschudin Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/au1000_eth.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/amd/au1000_eth.c b/drivers/net/ethernet/amd/au1000_eth.c index 397596b078d..f195acfa2df 100644 --- a/drivers/net/ethernet/amd/au1000_eth.c +++ b/drivers/net/ethernet/amd/au1000_eth.c @@ -1174,8 +1174,10 @@ static int __devinit au1000_probe(struct platform_device *pdev) snprintf(aup->mii_bus->id, MII_BUS_ID_SIZE, "%s-%x", pdev->name, aup->mac_id); aup->mii_bus->irq = kmalloc(sizeof(int)*PHY_MAX_ADDR, GFP_KERNEL); - if (aup->mii_bus->irq == NULL) + if (aup->mii_bus->irq == NULL) { + err = -ENOMEM; goto err_out; + } for (i = 0; i < PHY_MAX_ADDR; ++i) aup->mii_bus->irq[i] = PHY_POLL; @@ -1190,7 +1192,8 @@ static int __devinit au1000_probe(struct platform_device *pdev) goto err_mdiobus_reg; } - if (au1000_mii_probe(dev) != 0) + err = au1000_mii_probe(dev); + if (err != 0) goto err_out; pDBfree = NULL; @@ -1205,6 +1208,7 @@ static int __devinit au1000_probe(struct platform_device *pdev) } aup->pDBfree = pDBfree; + err = -ENODEV; for (i = 0; i < NUM_RX_DMA; i++) { pDB = au1000_GetFreeDB(aup); if (!pDB) @@ -1213,6 +1217,8 @@ static int __devinit au1000_probe(struct platform_device *pdev) aup->rx_dma_ring[i]->buff_stat = (unsigned)pDB->dma_addr; aup->rx_db_inuse[i] = pDB; } + + err = -ENODEV; for (i = 0; i < NUM_TX_DMA; i++) { pDB = au1000_GetFreeDB(aup); if (!pDB) From 97db4b9d02a8fc2f06514f10f6b426b97f6f7a0d Mon Sep 17 00:00:00 2001 From: Peter Senna Tschudin Date: Fri, 5 Oct 2012 12:10:53 +0000 Subject: [PATCH 16/61] drivers/net/ethernet/natsemi/xtsonic.c: fix error return code The function sonic_probe1() return 0 for success and negative value for most of its internal tests failures. There is one exception that is error case going to out:. For this error case, the function abort its success execution path, but returns non negative value, making it difficult for a caller function to notice the error. This patch fixes the error case that do not return negative value. This was found by Coccinelle, but the code change was made by hand. This patch is not robot generated. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // ( if@p1 (\(ret < 0\|ret != 0\)) { ... return ret; } | ret@p1 = 0 ) ... when != ret = e1 when != &ret *if(...) { ... when != ret = e2 when forall return ret; } // Signed-off-by: Peter Senna Tschudin Signed-off-by: David S. Miller --- drivers/net/ethernet/natsemi/xtsonic.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/natsemi/xtsonic.c b/drivers/net/ethernet/natsemi/xtsonic.c index e01c0a07a93..7dfe88398d7 100644 --- a/drivers/net/ethernet/natsemi/xtsonic.c +++ b/drivers/net/ethernet/natsemi/xtsonic.c @@ -205,6 +205,7 @@ static int __init sonic_probe1(struct net_device *dev) if (lp->descriptors == NULL) { printk(KERN_ERR "%s: couldn't alloc DMA memory for " " descriptors.\n", dev_name(lp->device)); + err = -ENOMEM; goto out; } From 043c4789726e6abb5be6115cb054af4926481952 Mon Sep 17 00:00:00 2001 From: Peter Senna Tschudin Date: Fri, 5 Oct 2012 12:40:52 +0000 Subject: [PATCH 17/61] drivers/net/ethernet/renesas/sh_eth.c: fix error return code The function sh_eth_drv_probe() return 0 for success and negative value for most of its internal tests failures. There is one exception that is error case going to out_release:. For this error case, the function abort its success execution path, but returns non negative value, making it difficult for a caller function to notice the error. This patch fixes the error case that do not return negative value. This was found by Coccinelle, but the code change was made by hand. This patch is not robot generated. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // ( if@p1 (\(ret < 0\|ret != 0\)) { ... return ret; } | ret@p1 = 0 ) ... when != ret = e1 when != &ret *if(...) { ... when != ret = e2 when forall return ret; } // Signed-off-by: Peter Senna Tschudin Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/sh_eth.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index bad8f2eec9b..c8bfea0524d 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -2438,6 +2438,7 @@ static int sh_eth_drv_probe(struct platform_device *pdev) rtsu = platform_get_resource(pdev, IORESOURCE_MEM, 1); if (!rtsu) { dev_err(&pdev->dev, "Not found TSU resource\n"); + ret = -ENODEV; goto out_release; } mdp->tsu_addr = ioremap(rtsu->start, From 8c65ef4b8a21ab7edc7f35bb9a3892b0051b0158 Mon Sep 17 00:00:00 2001 From: Peter Senna Tschudin Date: Fri, 5 Oct 2012 12:40:53 +0000 Subject: [PATCH 18/61] drivers/net/ethernet/sun/niu.c: fix error return code The function niu_pci_init_one() return 0 for success and negative value for most of its internal tests failures. There is one exception that is error case going to err_out_free_res:. For this error case, the function abort its success execution path, but returns non negative value, making it difficult for a caller function to notice the error. This patch fixes the error case that do not return negative value. This was found by Coccinelle, but the code change was made by hand. This patch is not robot generated. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // ( if@p1 (\(ret < 0\|ret != 0\)) { ... return ret; } | ret@p1 = 0 ) ... when != ret = e1 when != &ret *if(...) { ... when != ret = e2 when forall return ret; } // Signed-off-by: Peter Senna Tschudin Signed-off-by: David S. Miller --- drivers/net/ethernet/sun/niu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c index 8419bf385e0..275b430aeb7 100644 --- a/drivers/net/ethernet/sun/niu.c +++ b/drivers/net/ethernet/sun/niu.c @@ -9788,6 +9788,7 @@ static int __devinit niu_pci_init_one(struct pci_dev *pdev, if (!pci_is_pcie(pdev)) { dev_err(&pdev->dev, "Cannot find PCI Express capability, aborting\n"); + err = -ENODEV; goto err_out_free_res; } From 4df128341ed5728bc8d8f75709258967f8cc2f5f Mon Sep 17 00:00:00 2001 From: Peter Senna Tschudin Date: Fri, 5 Oct 2012 12:40:54 +0000 Subject: [PATCH 19/61] drivers/net/ethernet/sun/sungem.c: fix error return code The function gem_init_one() return 0 for success and negative value for most of its internal tests failures. There is one exception that is error case going to err_out_free_consistent:. For this error case, the function abort its success execution path, but returns non negative value, making it difficult for a caller function to notice the error. This patch fixes the error case that do not return negative value. This was found by Coccinelle, but the code change was made by hand. This patch is not robot generated. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // ( if@p1 (\(ret < 0\|ret != 0\)) { ... return ret; } | ret@p1 = 0 ) ... when != ret = e1 when != &ret *if(...) { ... when != ret = e2 when forall return ret; } // Signed-off-by: Peter Senna Tschudin Signed-off-by: David S. Miller --- drivers/net/ethernet/sun/sungem.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/sun/sungem.c b/drivers/net/ethernet/sun/sungem.c index 9ae12d0c963..6c8695ec7cb 100644 --- a/drivers/net/ethernet/sun/sungem.c +++ b/drivers/net/ethernet/sun/sungem.c @@ -2963,7 +2963,8 @@ static int __devinit gem_init_one(struct pci_dev *pdev, goto err_out_iounmap; } - if (gem_get_device_address(gp)) + err = gem_get_device_address(gp); + if (err) goto err_out_free_consistent; dev->netdev_ops = &gem_netdev_ops; From bbcf61fb30279c99e51b9d20b231e8513c5e6b1d Mon Sep 17 00:00:00 2001 From: Peter Senna Tschudin Date: Fri, 5 Oct 2012 12:40:55 +0000 Subject: [PATCH 20/61] drivers/net/ethernet/marvell/skge.c: fix error return code The function skge_probe() return 0 for success and negative value for most of its internal tests failures. There is one exception that is error case going to err_out_led_off:. For this error case, the function abort its success execution path, but returns non negative value, making it difficult for a caller function to notice the error. This patch fixes the error case that do not return negative value. This was found by Coccinelle, but the code change was made by hand. This patch is not robot generated. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // ( if@p1 (\(ret < 0\|ret != 0\)) { ... return ret; } | ret@p1 = 0 ) ... when != ret = e1 when != &ret *if(...) { ... when != ret = e2 when forall return ret; } // Signed-off-by: Peter Senna Tschudin Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/skge.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/skge.c b/drivers/net/ethernet/marvell/skge.c index 5a30bf82309..91836b54da3 100644 --- a/drivers/net/ethernet/marvell/skge.c +++ b/drivers/net/ethernet/marvell/skge.c @@ -3945,8 +3945,10 @@ static int __devinit skge_probe(struct pci_dev *pdev, skge_board_name(hw), hw->chip_rev); dev = skge_devinit(hw, 0, using_dac); - if (!dev) + if (!dev) { + err = -ENOMEM; goto err_out_led_off; + } /* Some motherboards are broken and has zero in ROM. */ if (!is_valid_ether_addr(dev->dev_addr)) From 0bd8ba18b9384e1c9104eee1d912fd5c8b4234c0 Mon Sep 17 00:00:00 2001 From: Peter Senna Tschudin Date: Fri, 5 Oct 2012 12:40:56 +0000 Subject: [PATCH 21/61] drivers/net/ethernet/marvell/sky2.c: fix error return code The function sky2_probe() return 0 for success and negative value for most of its internal tests failures. There are two exceptions that are error cases going to err_out*:. For this two cases, the function abort its success execution path, but returns non negative value, making it dificult for a caller function to notice the error. This patch fixes the error cases that do not return negative values. This was found by Coccinelle, but the code change was made by hand. This patch is not robot generated. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // ( if@p1 (\(ret < 0\|ret != 0\)) { ... return ret; } | ret@p1 = 0 ) ... when != ret = e1 when != &ret *if(...) { ... when != ret = e2 when forall return ret; } // Signed-off-by: Peter Senna Tschudin Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/sky2.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c index 2b0748dba8b..78946feab4a 100644 --- a/drivers/net/ethernet/marvell/sky2.c +++ b/drivers/net/ethernet/marvell/sky2.c @@ -4924,6 +4924,7 @@ static int __devinit sky2_probe(struct pci_dev *pdev, if (~reg == 0) { dev_err(&pdev->dev, "PCI configuration read error\n"); + err = -EIO; goto err_out; } @@ -4993,8 +4994,10 @@ static int __devinit sky2_probe(struct pci_dev *pdev, hw->st_size = hw->ports * roundup_pow_of_two(3*RX_MAX_PENDING + TX_MAX_PENDING); hw->st_le = pci_alloc_consistent(pdev, hw->st_size * sizeof(struct sky2_status_le), &hw->st_dma); - if (!hw->st_le) + if (!hw->st_le) { + err = -ENOMEM; goto err_out_reset; + } dev_info(&pdev->dev, "Yukon-2 %s chip revision %d\n", sky2_name(hw->chip_id, buf1, sizeof(buf1)), hw->chip_rev); From e1f165032c8bade3a6bdf546f8faf61fda4dd01c Mon Sep 17 00:00:00 2001 From: "ramesh.nagappa@gmail.com" Date: Fri, 5 Oct 2012 19:10:15 +0000 Subject: [PATCH 22/61] net: Fix skb_under_panic oops in neigh_resolve_output The retry loop in neigh_resolve_output() and neigh_connected_output() call dev_hard_header() with out reseting the skb to network_header. This causes the retry to fail with skb_under_panic. The fix is to reset the network_header within the retry loop. Signed-off-by: Ramesh Nagappa Reviewed-by: Shawn Lu Reviewed-by: Robert Coulson Reviewed-by: Billie Alsup Signed-off-by: David S. Miller --- net/core/neighbour.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index baca771caae..22571488730 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1301,8 +1301,6 @@ int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb) if (!dst) goto discard; - __skb_pull(skb, skb_network_offset(skb)); - if (!neigh_event_send(neigh, skb)) { int err; struct net_device *dev = neigh->dev; @@ -1312,6 +1310,7 @@ int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb) neigh_hh_init(neigh, dst); do { + __skb_pull(skb, skb_network_offset(skb)); seq = read_seqbegin(&neigh->ha_lock); err = dev_hard_header(skb, dev, ntohs(skb->protocol), neigh->ha, NULL, skb->len); @@ -1342,9 +1341,8 @@ int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb) unsigned int seq; int err; - __skb_pull(skb, skb_network_offset(skb)); - do { + __skb_pull(skb, skb_network_offset(skb)); seq = read_seqbegin(&neigh->ha_lock); err = dev_hard_header(skb, dev, ntohs(skb->protocol), neigh->ha, NULL, skb->len); From 51ec04038c113a811b177baa85d293feff9ce995 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 5 Oct 2012 20:43:30 +0000 Subject: [PATCH 23/61] ipv6: GRO should be ECN friendly IPv4 side of the problem was addressed in commit a9e050f4e7f9d (net: tcp: GRO should be ECN friendly) This patch does the same, but for IPv6 : A Traffic Class mismatch doesnt mean flows are different, but instead should force a flush of previous packets. This patch removes artificial packet reordering problem. Signed-off-by: Eric Dumazet Cc: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/af_inet6.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index e22e6d88bac..f757e3b7cfb 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -880,22 +880,25 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, nlen = skb_network_header_len(skb); for (p = *head; p; p = p->next) { - struct ipv6hdr *iph2; + const struct ipv6hdr *iph2; + __be32 first_word; /* */ if (!NAPI_GRO_CB(p)->same_flow) continue; iph2 = ipv6_hdr(p); + first_word = *(__be32 *)iph ^ *(__be32 *)iph2 ; - /* All fields must match except length. */ + /* All fields must match except length and Traffic Class. */ if (nlen != skb_network_header_len(p) || - memcmp(iph, iph2, offsetof(struct ipv6hdr, payload_len)) || + (first_word & htonl(0xF00FFFFF)) || memcmp(&iph->nexthdr, &iph2->nexthdr, nlen - offsetof(struct ipv6hdr, nexthdr))) { NAPI_GRO_CB(p)->same_flow = 0; continue; } - + /* flush if Traffic Class fields are different */ + NAPI_GRO_CB(p)->flush |= !!(first_word & htonl(0x0FF00000)); NAPI_GRO_CB(p)->flush |= flush; } From ca07e43e288956a0ad5e6bd075f7aa1fca3bca00 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 6 Oct 2012 22:28:06 +0000 Subject: [PATCH 24/61] net: gro: fix a potential crash in skb_gro_reset_offset Before accessing skb first fragment, better make sure there is one. This is probably not needed for old kernels, since an ethernet frame cannot contain only an ethernet header, but the recent GRO addition to tunnels makes this patch needed. Also skb_gro_reset_offset() can be static, it actually allows compiler to inline it. Signed-off-by: Eric Dumazet Cc: Herbert Xu Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 - net/core/dev.c | 14 ++++++++------ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 01646aa53b0..a659fd0ba96 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1663,7 +1663,6 @@ extern int netpoll_trap(void); #endif extern int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb); -extern void skb_gro_reset_offset(struct sk_buff *skb); static inline unsigned int skb_gro_offset(const struct sk_buff *skb) { diff --git a/net/core/dev.c b/net/core/dev.c index 1e0a1847c3b..de2bad717d5 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3631,20 +3631,22 @@ gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) } EXPORT_SYMBOL(napi_skb_finish); -void skb_gro_reset_offset(struct sk_buff *skb) +static void skb_gro_reset_offset(struct sk_buff *skb) { + const struct skb_shared_info *pinfo = skb_shinfo(skb); + const skb_frag_t *frag0 = &pinfo->frags[0]; + NAPI_GRO_CB(skb)->data_offset = 0; NAPI_GRO_CB(skb)->frag0 = NULL; NAPI_GRO_CB(skb)->frag0_len = 0; if (skb->mac_header == skb->tail && - !PageHighMem(skb_frag_page(&skb_shinfo(skb)->frags[0]))) { - NAPI_GRO_CB(skb)->frag0 = - skb_frag_address(&skb_shinfo(skb)->frags[0]); - NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(&skb_shinfo(skb)->frags[0]); + pinfo->nr_frags && + !PageHighMem(skb_frag_page(frag0))) { + NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0); + NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(frag0); } } -EXPORT_SYMBOL(skb_gro_reset_offset); gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { From d717f14ee0ffb53cb59bd069a33ea36d03a5406e Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sun, 7 Oct 2012 03:23:06 +0000 Subject: [PATCH 25/61] vxlan: remove unused including Remove including that don't need it. dpatch engine is used to auto generate this patch. (https://github.com/weiyj/dpatch) Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 51de9edb55f..6f95580bf9d 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -28,7 +28,6 @@ #include #include #include -#include #include #include #include From 9545f4e2beb1b4faae5b57250f18b0047fa191fc Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Sun, 7 Oct 2012 03:41:50 +0000 Subject: [PATCH 26/61] ptp: use list_move instead of list_del/list_add Using list_move() instead of list_del() + list_add(). dpatch engine is used to auto generate this patch. (https://github.com/weiyj/dpatch) Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/ptp.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c index 5b3dd028ce8..0767043f44a 100644 --- a/drivers/net/ethernet/sfc/ptp.c +++ b/drivers/net/ethernet/sfc/ptp.c @@ -640,8 +640,7 @@ static void efx_ptp_drop_time_expired_events(struct efx_nic *efx) evt = list_entry(cursor, struct efx_ptp_event_rx, link); if (time_after(jiffies, evt->expiry)) { - list_del(&evt->link); - list_add(&evt->link, &ptp->evt_free_list); + list_move(&evt->link, &ptp->evt_free_list); netif_warn(efx, hw, efx->net_dev, "PTP rx event dropped\n"); } @@ -684,8 +683,7 @@ static enum ptp_packet_state efx_ptp_match_rx(struct efx_nic *efx, match->state = PTP_PACKET_STATE_MATCHED; rc = PTP_PACKET_STATE_MATCHED; - list_del(&evt->link); - list_add(&evt->link, &ptp->evt_free_list); + list_move(&evt->link, &ptp->evt_free_list); break; } } @@ -820,8 +818,7 @@ static int efx_ptp_stop(struct efx_nic *efx) /* Drop any pending receive events */ spin_lock_bh(&efx->ptp_data->evt_lock); list_for_each_safe(cursor, next, &efx->ptp_data->evt_list) { - list_del(cursor); - list_add(cursor, &efx->ptp_data->evt_free_list); + list_move(cursor, &efx->ptp_data->evt_free_list); } spin_unlock_bh(&efx->ptp_data->evt_lock); From 594f88e96ebaf290e6509b37fff84b310ec1f155 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 8 Oct 2012 10:12:11 +0300 Subject: [PATCH 27/61] cxgb4: allocate enough data in t4_memory_rw() MEMWIN0_APERTURE is the size in bytes. Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/t4_hw.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index 137a24438d9..e914c4113b8 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -417,7 +417,7 @@ static int t4_memory_rw(struct adapter *adap, int mtype, u32 addr, u32 len, if ((addr & 0x3) || (len & 0x3)) return -EINVAL; - data = vmalloc(MEMWIN0_APERTURE/sizeof(__be32)); + data = vmalloc(MEMWIN0_APERTURE); if (!data) return -ENOMEM; From d851c12b60471188e15e5c8405b289073e8dd025 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Sun, 7 Oct 2012 22:47:25 +0000 Subject: [PATCH 28/61] ipv4: Always invalidate or update the route on pmtu events Some protocols, like IPsec still cache routes. So we need to invalidate the old route on pmtu events to avoid the reuse of stale routes. We also need to update the mtu and expire time of the route if we already use a nh exception route, otherwise we ignore newly learned pmtu values after the first expiration. With this patch we always invalidate or update the route on pmtu events. Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- net/ipv4/route.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index ff622069fce..90ba8358a89 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -904,22 +904,29 @@ out: kfree_skb(skb); return 0; } -static u32 __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) +static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) { + struct dst_entry *dst = &rt->dst; struct fib_result res; if (mtu < ip_rt_min_pmtu) mtu = ip_rt_min_pmtu; + if (!rt->rt_pmtu) { + dst->obsolete = DST_OBSOLETE_KILL; + } else { + rt->rt_pmtu = mtu; + dst->expires = max(1UL, jiffies + ip_rt_mtu_expires); + } + rcu_read_lock(); - if (fib_lookup(dev_net(rt->dst.dev), fl4, &res) == 0) { + if (fib_lookup(dev_net(dst->dev), fl4, &res) == 0) { struct fib_nh *nh = &FIB_RES_NH(res); update_or_create_fnhe(nh, fl4->daddr, 0, mtu, jiffies + ip_rt_mtu_expires); } rcu_read_unlock(); - return mtu; } static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, @@ -929,14 +936,7 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, struct flowi4 fl4; ip_rt_build_flow_key(&fl4, sk, skb); - mtu = __ip_rt_update_pmtu(rt, &fl4, mtu); - - if (!rt->rt_pmtu) { - dst->obsolete = DST_OBSOLETE_KILL; - } else { - rt->rt_pmtu = mtu; - rt->dst.expires = max(1UL, jiffies + ip_rt_mtu_expires); - } + __ip_rt_update_pmtu(rt, &fl4, mtu); } void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, From 7f92d334ba19a0d8e96f8f8f092219553367d921 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Sun, 7 Oct 2012 22:48:18 +0000 Subject: [PATCH 29/61] ipv4: Don't create nh exeption when the device mtu is smaller than the reported pmtu When a local tool like tracepath tries to send packets bigger than the device mtu, we create a nh exeption and set the pmtu to device mtu. The device mtu does not expire, so check if the device mtu is smaller than the reported pmtu and don't crerate a nh exeption in that case. Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- net/ipv4/route.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 90ba8358a89..741df67a81e 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -909,6 +909,9 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) struct dst_entry *dst = &rt->dst; struct fib_result res; + if (dst->dev->mtu < mtu) + return; + if (mtu < ip_rt_min_pmtu) mtu = ip_rt_min_pmtu; From ee9a8f7ab2edf801b8b514c310455c94acc232f6 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Mon, 8 Oct 2012 00:56:54 +0000 Subject: [PATCH 30/61] ipv4: Don't report stale pmtu values to userspace We report cached pmtu values even if they are already expired. Change this to not report these values after they are expired and fix a race in the expire time calculation, as suggested by Eric Dumazet. Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- net/ipv4/route.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 741df67a81e..132e0dfee53 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2187,8 +2187,18 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, nla_put_be32(skb, RTA_GATEWAY, rt->rt_gateway)) goto nla_put_failure; + expires = rt->dst.expires; + if (expires) { + unsigned long now = jiffies; + + if (time_before(now, expires)) + expires -= now; + else + expires = 0; + } + memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics)); - if (rt->rt_pmtu) + if (rt->rt_pmtu && expires) metrics[RTAX_MTU - 1] = rt->rt_pmtu; if (rtnetlink_put_metrics(skb, metrics) < 0) goto nla_put_failure; @@ -2198,13 +2208,6 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, goto nla_put_failure; error = rt->dst.error; - expires = rt->dst.expires; - if (expires) { - if (time_before(jiffies, expires)) - expires -= jiffies; - else - expires = 0; - } if (rt_is_input_route(rt)) { if (nla_put_u32(skb, RTA_IIF, rt->rt_iif)) From 404d9e3fc322023619cf0a9c5c6efbbaf7e14ee8 Mon Sep 17 00:00:00 2001 From: Vipul Pandya Date: Mon, 8 Oct 2012 02:59:43 +0000 Subject: [PATCH 31/61] cxgb4: Address various sparse warnings This patch fixes type assignment issues, function definition and symbol shadowing which triggered sparse warnings. Signed-off-by: Jay Hernandez Signed-off-by: Vipul Pandya Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 1 + .../net/ethernet/chelsio/cxgb4/cxgb4_main.c | 54 ++++++++++--------- drivers/net/ethernet/chelsio/cxgb4/t4_hw.c | 13 +++-- 3 files changed, 39 insertions(+), 29 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index 31752b24434..a4da893ac1e 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -696,6 +696,7 @@ int t4_seeprom_wp(struct adapter *adapter, bool enable); int get_vpd_params(struct adapter *adapter, struct vpd_params *p); int t4_load_fw(struct adapter *adapter, const u8 *fw_data, unsigned int size); unsigned int t4_flash_cfg_addr(struct adapter *adapter); +int t4_load_cfg(struct adapter *adapter, const u8 *cfg_data, unsigned int size); int t4_check_fw_version(struct adapter *adapter); int t4_prep_adapter(struct adapter *adapter); int t4_port_init(struct adapter *adap, int mbox, int pf, int vf); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 6b9f6bb2f7e..604f4f87f55 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -443,7 +443,10 @@ int dbfifo_int_thresh = 10; /* 10 == 640 entry threshold */ module_param(dbfifo_int_thresh, int, 0644); MODULE_PARM_DESC(dbfifo_int_thresh, "doorbell fifo interrupt threshold"); -int dbfifo_drain_delay = 1000; /* usecs to sleep while draining the dbfifo */ +/* + * usecs to sleep while draining the dbfifo + */ +static int dbfifo_drain_delay = 1000; module_param(dbfifo_drain_delay, int, 0644); MODULE_PARM_DESC(dbfifo_drain_delay, "usecs to sleep while draining the dbfifo"); @@ -636,7 +639,7 @@ static void name_msix_vecs(struct adapter *adap) static int request_msix_queue_irqs(struct adapter *adap) { struct sge *s = &adap->sge; - int err, ethqidx, ofldqidx = 0, rdmaqidx = 0, msi = 2; + int err, ethqidx, ofldqidx = 0, rdmaqidx = 0, msi_index = 2; err = request_irq(adap->msix_info[1].vec, t4_sge_intr_msix, 0, adap->msix_info[1].desc, &s->fw_evtq); @@ -644,56 +647,60 @@ static int request_msix_queue_irqs(struct adapter *adap) return err; for_each_ethrxq(s, ethqidx) { - err = request_irq(adap->msix_info[msi].vec, t4_sge_intr_msix, 0, - adap->msix_info[msi].desc, + err = request_irq(adap->msix_info[msi_index].vec, + t4_sge_intr_msix, 0, + adap->msix_info[msi_index].desc, &s->ethrxq[ethqidx].rspq); if (err) goto unwind; - msi++; + msi_index++; } for_each_ofldrxq(s, ofldqidx) { - err = request_irq(adap->msix_info[msi].vec, t4_sge_intr_msix, 0, - adap->msix_info[msi].desc, + err = request_irq(adap->msix_info[msi_index].vec, + t4_sge_intr_msix, 0, + adap->msix_info[msi_index].desc, &s->ofldrxq[ofldqidx].rspq); if (err) goto unwind; - msi++; + msi_index++; } for_each_rdmarxq(s, rdmaqidx) { - err = request_irq(adap->msix_info[msi].vec, t4_sge_intr_msix, 0, - adap->msix_info[msi].desc, + err = request_irq(adap->msix_info[msi_index].vec, + t4_sge_intr_msix, 0, + adap->msix_info[msi_index].desc, &s->rdmarxq[rdmaqidx].rspq); if (err) goto unwind; - msi++; + msi_index++; } return 0; unwind: while (--rdmaqidx >= 0) - free_irq(adap->msix_info[--msi].vec, + free_irq(adap->msix_info[--msi_index].vec, &s->rdmarxq[rdmaqidx].rspq); while (--ofldqidx >= 0) - free_irq(adap->msix_info[--msi].vec, + free_irq(adap->msix_info[--msi_index].vec, &s->ofldrxq[ofldqidx].rspq); while (--ethqidx >= 0) - free_irq(adap->msix_info[--msi].vec, &s->ethrxq[ethqidx].rspq); + free_irq(adap->msix_info[--msi_index].vec, + &s->ethrxq[ethqidx].rspq); free_irq(adap->msix_info[1].vec, &s->fw_evtq); return err; } static void free_msix_queue_irqs(struct adapter *adap) { - int i, msi = 2; + int i, msi_index = 2; struct sge *s = &adap->sge; free_irq(adap->msix_info[1].vec, &s->fw_evtq); for_each_ethrxq(s, i) - free_irq(adap->msix_info[msi++].vec, &s->ethrxq[i].rspq); + free_irq(adap->msix_info[msi_index++].vec, &s->ethrxq[i].rspq); for_each_ofldrxq(s, i) - free_irq(adap->msix_info[msi++].vec, &s->ofldrxq[i].rspq); + free_irq(adap->msix_info[msi_index++].vec, &s->ofldrxq[i].rspq); for_each_rdmarxq(s, i) - free_irq(adap->msix_info[msi++].vec, &s->rdmarxq[i].rspq); + free_irq(adap->msix_info[msi_index++].vec, &s->rdmarxq[i].rspq); } /** @@ -2535,9 +2542,8 @@ static int read_eq_indices(struct adapter *adap, u16 qid, u16 *pidx, u16 *cidx) ret = t4_mem_win_read_len(adap, addr, (__be32 *)&indices, 8); if (!ret) { - indices = be64_to_cpu(indices); - *cidx = (indices >> 25) & 0xffff; - *pidx = (indices >> 9) & 0xffff; + *cidx = (be64_to_cpu(indices) >> 25) & 0xffff; + *pidx = (be64_to_cpu(indices) >> 9) & 0xffff; } return ret; } @@ -3634,10 +3640,10 @@ static int adap_init0_no_config(struct adapter *adapter, int reset) * field selections will fit in the 36-bit budget. */ if (tp_vlan_pri_map != TP_VLAN_PRI_MAP_DEFAULT) { - int i, bits = 0; + int j, bits = 0; - for (i = TP_VLAN_PRI_MAP_FIRST; i <= TP_VLAN_PRI_MAP_LAST; i++) - switch (tp_vlan_pri_map & (1 << i)) { + for (j = TP_VLAN_PRI_MAP_FIRST; j <= TP_VLAN_PRI_MAP_LAST; j++) + switch (tp_vlan_pri_map & (1 << j)) { case 0: /* compressed filter field not enabled */ break; diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index e914c4113b8..32eec15fe4c 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -380,9 +380,11 @@ static int t4_mem_win_rw(struct adapter *adap, u32 addr, __be32 *data, int dir) /* Collecting data 4 bytes at a time upto MEMWIN0_APERTURE */ for (i = 0; i < MEMWIN0_APERTURE; i = i+0x4) { if (dir) - *data++ = t4_read_reg(adap, (MEMWIN0_BASE + i)); + *data++ = (__force __be32) t4_read_reg(adap, + (MEMWIN0_BASE + i)); else - t4_write_reg(adap, (MEMWIN0_BASE + i), *data++); + t4_write_reg(adap, (MEMWIN0_BASE + i), + (__force u32) *data++); } return 0; @@ -744,7 +746,7 @@ static int t4_read_flash(struct adapter *adapter, unsigned int addr, if (ret) return ret; if (byte_oriented) - *data = htonl(*data); + *data = (__force __u32) (htonl(*data)); } return 0; } @@ -992,7 +994,7 @@ int t4_load_fw(struct adapter *adap, const u8 *fw_data, unsigned int size) int ret, addr; unsigned int i; u8 first_page[SF_PAGE_SIZE]; - const u32 *p = (const u32 *)fw_data; + const __be32 *p = (const __be32 *)fw_data; const struct fw_hdr *hdr = (const struct fw_hdr *)fw_data; unsigned int sf_sec_size = adap->params.sf_size / adap->params.sf_nsec; unsigned int fw_img_start = adap->params.sf_fw_start; @@ -2315,7 +2317,8 @@ int t4_mem_win_read_len(struct adapter *adap, u32 addr, __be32 *data, int len) t4_read_reg(adap, PCIE_MEM_ACCESS_OFFSET); for (i = 0; i < len; i += 4) - *data++ = t4_read_reg(adap, (MEMWIN0_BASE + off + i)); + *data++ = (__force __be32) t4_read_reg(adap, + (MEMWIN0_BASE + off + i)); return 0; } From a2af139ff1cd85df586690ff626619ab1ee88b0a Mon Sep 17 00:00:00 2001 From: Graham Gower Date: Mon, 8 Oct 2012 08:34:50 +0000 Subject: [PATCH 32/61] skge: Add DMA mask quirk for Marvell 88E8001 on ASUS P5NSLI motherboard Marvell 88E8001 on an ASUS P5NSLI motherboard is unable to send/receive packets on a system with >4gb ram unless a 32bit DMA mask is used. This issue has been around for years and a fix was sent 3.5 years ago, but there was some debate as to whether it should instead be fixed as a PCI quirk. http://www.spinics.net/lists/netdev/msg88670.html However, 18 months later a similar workaround was introduced for another chipset exhibiting the same problem. http://www.spinics.net/lists/netdev/msg142287.html Signed-off-by: Graham Gower Signed-off-by: Jan Ceuleers Acked-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/skge.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/marvell/skge.c b/drivers/net/ethernet/marvell/skge.c index 91836b54da3..3f7dab46626 100644 --- a/drivers/net/ethernet/marvell/skge.c +++ b/drivers/net/ethernet/marvell/skge.c @@ -4155,6 +4155,13 @@ static struct dmi_system_id skge_32bit_dma_boards[] = { DMI_MATCH(DMI_BOARD_NAME, "nForce"), }, }, + { + .ident = "ASUS P5NSLI", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), + DMI_MATCH(DMI_BOARD_NAME, "P5NSLI") + }, + }, {} }; From 2e71a6f8084e7ac87166dd77d99c44190fb844fc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 6 Oct 2012 08:08:49 +0000 Subject: [PATCH 33/61] net: gro: selective flush of packets Current GRO can hold packets in gro_list for almost unlimited time, in case napi->poll() handler consumes its budget over and over. In this case, napi_complete()/napi_gro_flush() are not called. Another problem is that gro_list is flushed in non friendly way : We scan the list and complete packets in the reverse order. (youngest packets first, oldest packets last) This defeats priorities that sender could have cooked. Since GRO currently only store TCP packets, we dont really notice the bug because of retransmits, but this behavior can add unexpected latencies, particularly on mice flows clamped by elephant flows. This patch makes sure no packet can stay more than 1 ms in queue, and only in stress situations. It also complete packets in the right order to minimize latencies. Signed-off-by: Eric Dumazet Cc: Herbert Xu Cc: Jesse Gross Cc: Tom Herbert Cc: Yuchung Cheng Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/skge.c | 2 +- drivers/net/ethernet/realtek/8139cp.c | 2 +- include/linux/netdevice.h | 15 ++++++---- net/core/dev.c | 42 +++++++++++++++++++++------ 4 files changed, 44 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/marvell/skge.c b/drivers/net/ethernet/marvell/skge.c index 3f7dab46626..9b9c2ac5c4c 100644 --- a/drivers/net/ethernet/marvell/skge.c +++ b/drivers/net/ethernet/marvell/skge.c @@ -3189,7 +3189,7 @@ static int skge_poll(struct napi_struct *napi, int to_do) if (work_done < to_do) { unsigned long flags; - napi_gro_flush(napi); + napi_gro_flush(napi, false); spin_lock_irqsave(&hw->hw_lock, flags); __napi_complete(napi); hw->intr_mask |= napimask[skge->port]; diff --git a/drivers/net/ethernet/realtek/8139cp.c b/drivers/net/ethernet/realtek/8139cp.c index 995d0cfc4c0..1c818254b7b 100644 --- a/drivers/net/ethernet/realtek/8139cp.c +++ b/drivers/net/ethernet/realtek/8139cp.c @@ -563,7 +563,7 @@ rx_next: if (cpr16(IntrStatus) & cp_rx_intr_mask) goto rx_status_loop; - napi_gro_flush(napi); + napi_gro_flush(napi, false); spin_lock_irqsave(&cp->lock, flags); __napi_complete(napi); cpw16_f(IntrMask, cp_intr_mask); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index a659fd0ba96..0a36fff75bd 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1497,19 +1497,22 @@ struct napi_gro_cb { /* This indicates where we are processing relative to skb->data. */ int data_offset; - /* This is non-zero if the packet may be of the same flow. */ - int same_flow; - /* This is non-zero if the packet cannot be merged with the new skb. */ int flush; /* Number of segments aggregated. */ - int count; + u16 count; + + /* This is non-zero if the packet may be of the same flow. */ + u8 same_flow; /* Free the skb? */ - int free; + u8 free; #define NAPI_GRO_FREE 1 #define NAPI_GRO_FREE_STOLEN_HEAD 2 + + /* jiffies when first packet was created/queued */ + unsigned long age; }; #define NAPI_GRO_CB(skb) ((struct napi_gro_cb *)(skb)->cb) @@ -2156,7 +2159,7 @@ extern gro_result_t dev_gro_receive(struct napi_struct *napi, extern gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb); extern gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb); -extern void napi_gro_flush(struct napi_struct *napi); +extern void napi_gro_flush(struct napi_struct *napi, bool flush_old); extern struct sk_buff * napi_get_frags(struct napi_struct *napi); extern gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, diff --git a/net/core/dev.c b/net/core/dev.c index de2bad717d5..d44668f63c8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3471,17 +3471,31 @@ out: return netif_receive_skb(skb); } -inline void napi_gro_flush(struct napi_struct *napi) +/* napi->gro_list contains packets ordered by age. + * youngest packets at the head of it. + * Complete skbs in reverse order to reduce latencies. + */ +void napi_gro_flush(struct napi_struct *napi, bool flush_old) { - struct sk_buff *skb, *next; + struct sk_buff *skb, *prev = NULL; - for (skb = napi->gro_list; skb; skb = next) { - next = skb->next; - skb->next = NULL; - napi_gro_complete(skb); + /* scan list and build reverse chain */ + for (skb = napi->gro_list; skb != NULL; skb = skb->next) { + skb->prev = prev; + prev = skb; + } + + for (skb = prev; skb; skb = prev) { + skb->next = NULL; + + if (flush_old && NAPI_GRO_CB(skb)->age == jiffies) + return; + + prev = skb->prev; + napi_gro_complete(skb); + napi->gro_count--; } - napi->gro_count = 0; napi->gro_list = NULL; } EXPORT_SYMBOL(napi_gro_flush); @@ -3542,6 +3556,7 @@ enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) napi->gro_count++; NAPI_GRO_CB(skb)->count = 1; + NAPI_GRO_CB(skb)->age = jiffies; skb_shinfo(skb)->gso_size = skb_gro_len(skb); skb->next = napi->gro_list; napi->gro_list = skb; @@ -3878,7 +3893,7 @@ void napi_complete(struct napi_struct *n) if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state))) return; - napi_gro_flush(n); + napi_gro_flush(n, false); local_irq_save(flags); __napi_complete(n); local_irq_restore(flags); @@ -3983,8 +3998,17 @@ static void net_rx_action(struct softirq_action *h) local_irq_enable(); napi_complete(n); local_irq_disable(); - } else + } else { + if (n->gro_list) { + /* flush too old packets + * If HZ < 1000, flush all packets. + */ + local_irq_enable(); + napi_gro_flush(n, HZ >= 1000); + local_irq_disable(); + } list_move_tail(&n->poll_list, &sd->poll_list); + } } netpoll_poll_unlock(have); From 48cc32d38a52d0b68f91a171a8d00531edc6a46e Mon Sep 17 00:00:00 2001 From: Florian Zumbiehl Date: Sun, 7 Oct 2012 15:51:58 +0000 Subject: [PATCH 34/61] vlan: don't deliver frames for unknown vlans to protocols 6a32e4f9dd9219261f8856f817e6655114cfec2f made the vlan code skip marking vlan-tagged frames for not locally configured vlans as PACKET_OTHERHOST if there was an rx_handler, as the rx_handler could cause the frame to be received on a different (virtual) vlan-capable interface where that vlan might be configured. As rx_handlers do not necessarily return RX_HANDLER_ANOTHER, this could cause frames for unknown vlans to be delivered to the protocol stack as if they had been received untagged. For example, if an ipv6 router advertisement that's tagged for a locally not configured vlan is received on an interface with macvlan interfaces attached, macvlan's rx_handler returns RX_HANDLER_PASS after delivering the frame to the macvlan interfaces, which caused it to be passed to the protocol stack, leading to ipv6 addresses for the announced prefix being configured even though those are completely unusable on the underlying interface. The fix moves marking as PACKET_OTHERHOST after the rx_handler so the rx_handler, if there is one, sees the frame unchanged, but afterwards, before the frame is delivered to the protocol stack, it gets marked whether there is an rx_handler or not. Signed-off-by: Florian Zumbiehl Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 8 ++++---- net/8021q/vlan_core.c | 10 ++-------- net/core/dev.c | 7 +++++-- 3 files changed, 11 insertions(+), 14 deletions(-) diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index e6ff12dd717..c0ff748d0aa 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -80,6 +80,8 @@ static inline int is_vlan_dev(struct net_device *dev) } #define vlan_tx_tag_present(__skb) ((__skb)->vlan_tci & VLAN_TAG_PRESENT) +#define vlan_tx_nonzero_tag_present(__skb) \ + (vlan_tx_tag_present(__skb) && ((__skb)->vlan_tci & VLAN_VID_MASK)) #define vlan_tx_tag_get(__skb) ((__skb)->vlan_tci & ~VLAN_TAG_PRESENT) #if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) @@ -89,7 +91,7 @@ extern struct net_device *__vlan_find_dev_deep(struct net_device *real_dev, extern struct net_device *vlan_dev_real_dev(const struct net_device *dev); extern u16 vlan_dev_vlan_id(const struct net_device *dev); -extern bool vlan_do_receive(struct sk_buff **skb, bool last_handler); +extern bool vlan_do_receive(struct sk_buff **skb); extern struct sk_buff *vlan_untag(struct sk_buff *skb); extern int vlan_vid_add(struct net_device *dev, unsigned short vid); @@ -120,10 +122,8 @@ static inline u16 vlan_dev_vlan_id(const struct net_device *dev) return 0; } -static inline bool vlan_do_receive(struct sk_buff **skb, bool last_handler) +static inline bool vlan_do_receive(struct sk_buff **skb) { - if (((*skb)->vlan_tci & VLAN_VID_MASK) && last_handler) - (*skb)->pkt_type = PACKET_OTHERHOST; return false; } diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index add69d0fd99..fbbf1fa0094 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -5,7 +5,7 @@ #include #include "vlan.h" -bool vlan_do_receive(struct sk_buff **skbp, bool last_handler) +bool vlan_do_receive(struct sk_buff **skbp) { struct sk_buff *skb = *skbp; u16 vlan_id = skb->vlan_tci & VLAN_VID_MASK; @@ -13,14 +13,8 @@ bool vlan_do_receive(struct sk_buff **skbp, bool last_handler) struct vlan_pcpu_stats *rx_stats; vlan_dev = vlan_find_dev(skb->dev, vlan_id); - if (!vlan_dev) { - /* Only the last call to vlan_do_receive() should change - * pkt_type to PACKET_OTHERHOST - */ - if (vlan_id && last_handler) - skb->pkt_type = PACKET_OTHERHOST; + if (!vlan_dev) return false; - } skb = *skbp = skb_share_check(skb, GFP_ATOMIC); if (unlikely(!skb)) diff --git a/net/core/dev.c b/net/core/dev.c index d44668f63c8..09cb3f6dc40 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3300,18 +3300,18 @@ ncls: && !skb_pfmemalloc_protocol(skb)) goto drop; - rx_handler = rcu_dereference(skb->dev->rx_handler); if (vlan_tx_tag_present(skb)) { if (pt_prev) { ret = deliver_skb(skb, pt_prev, orig_dev); pt_prev = NULL; } - if (vlan_do_receive(&skb, !rx_handler)) + if (vlan_do_receive(&skb)) goto another_round; else if (unlikely(!skb)) goto unlock; } + rx_handler = rcu_dereference(skb->dev->rx_handler); if (rx_handler) { if (pt_prev) { ret = deliver_skb(skb, pt_prev, orig_dev); @@ -3331,6 +3331,9 @@ ncls: } } + if (vlan_tx_nonzero_tag_present(skb)) + skb->pkt_type = PACKET_OTHERHOST; + /* deliver only exact match when indicated */ null_or_dev = deliver_exact ? skb->dev : NULL; From 863472454ce50d4ef0929c6aa738cc5d64b84679 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 8 Oct 2012 21:38:50 +0200 Subject: [PATCH 35/61] ipv6: gro: fix PV6_GRO_CB(skb)->proto problem It seems IPV6_GRO_CB(skb)->proto can be destroyed in skb_gro_receive() if a new skb is allocated (to serve as an anchor for frag_list) We copy NAPI_GRO_CB() only (not the IPV6 specific part) in : *NAPI_GRO_CB(nskb) = *NAPI_GRO_CB(p); So we leave IPV6_GRO_CB(nskb)->proto to 0 (fresh skb allocation) instead of IPPROTO_TCP (6) ipv6_gro_complete() isnt able to call ops->gro_complete() [ tcp6_gro_complete() ] Fix this by moving proto in NAPI_GRO_CB() and getting rid of IPV6_GRO_CB Signed-off-by: Eric Dumazet Cc: Herbert Xu Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +++ net/ipv6/af_inet6.c | 11 ++--------- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 0a36fff75bd..561c8bc8976 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1513,6 +1513,9 @@ struct napi_gro_cb { /* jiffies when first packet was created/queued */ unsigned long age; + + /* Used in ipv6_gro_receive() */ + int proto; }; #define NAPI_GRO_CB(skb) ((struct napi_gro_cb *)(skb)->cb) diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index f757e3b7cfb..a974247a9ae 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -822,13 +822,6 @@ out: return segs; } -struct ipv6_gro_cb { - struct napi_gro_cb napi; - int proto; -}; - -#define IPV6_GRO_CB(skb) ((struct ipv6_gro_cb *)(skb)->cb) - static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, struct sk_buff *skb) { @@ -874,7 +867,7 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, iph = ipv6_hdr(skb); } - IPV6_GRO_CB(skb)->proto = proto; + NAPI_GRO_CB(skb)->proto = proto; flush--; nlen = skb_network_header_len(skb); @@ -930,7 +923,7 @@ static int ipv6_gro_complete(struct sk_buff *skb) sizeof(*iph)); rcu_read_lock(); - ops = rcu_dereference(inet6_protos[IPV6_GRO_CB(skb)->proto]); + ops = rcu_dereference(inet6_protos[NAPI_GRO_CB(skb)->proto]); if (WARN_ON(!ops || !ops->gro_complete)) goto out_unlock; From e81da0e113a1b7fc7449ae6213f65f89ccac6d06 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Mon, 8 Oct 2012 11:41:15 +0000 Subject: [PATCH 36/61] ipv4: fix sending of redirects After "Cache input routes in fib_info nexthops" (commit d2d68ba9fe) and "Elide fib_validate_source() completely when possible" (commit 7a9bc9b81a) we can not send ICMP redirects. It seems we should not cache the RTCF_DOREDIRECT flag in nh_rth_input because the same fib_info can be used for traffic that is not redirected, eg. from other input devices or from sources that are not in same subnet. As result, we have to disable the caching of RTCF_DOREDIRECT flag and to force source validation for the case when forwarding traffic to the input device. If traffic comes from directly connected source we allow redirection as it was done before both changes. Avoid setting RTCF_DOREDIRECT if IN_DEV_TX_REDIRECTS is disabled, this can avoid source address validation and to help caching the routes. After the change "Adjust semantics of rt->rt_gateway" (commit f8126f1d51) we should make sure our ICMP_REDIR_HOST messages contain daddr instead of 0.0.0.0 when target is directly connected. Signed-off-by: Julian Anastasov Signed-off-by: David S. Miller --- net/ipv4/fib_frontend.c | 3 ++- net/ipv4/route.c | 30 ++++++++++++++++-------------- 2 files changed, 18 insertions(+), 15 deletions(-) diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 68c93d1bb03..825c608826d 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -322,7 +322,8 @@ int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, { int r = secpath_exists(skb) ? 0 : IN_DEV_RPFILTER(idev); - if (!r && !fib_num_tclassid_users(dev_net(dev))) { + if (!r && !fib_num_tclassid_users(dev_net(dev)) && + (dev->ifindex != oif || !IN_DEV_TX_REDIRECTS(idev))) { *itag = 0; return 0; } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 132e0dfee53..b90da1bc270 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -802,7 +802,8 @@ void ip_rt_send_redirect(struct sk_buff *skb) net = dev_net(rt->dst.dev); peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, 1); if (!peer) { - icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); + icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, + rt_nexthop(rt, ip_hdr(skb)->daddr)); return; } @@ -827,7 +828,9 @@ void ip_rt_send_redirect(struct sk_buff *skb) time_after(jiffies, (peer->rate_last + (ip_rt_redirect_load << peer->rate_tokens)))) { - icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); + __be32 gw = rt_nexthop(rt, ip_hdr(skb)->daddr); + + icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, gw); peer->rate_last = jiffies; ++peer->rate_tokens; #ifdef CONFIG_IP_ROUTE_VERBOSE @@ -835,7 +838,7 @@ void ip_rt_send_redirect(struct sk_buff *skb) peer->rate_tokens == ip_rt_redirect_number) net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n", &ip_hdr(skb)->saddr, inet_iif(skb), - &ip_hdr(skb)->daddr, &rt->rt_gateway); + &ip_hdr(skb)->daddr, &gw); #endif } out_put_peer: @@ -1442,10 +1445,13 @@ static int __mkroute_input(struct sk_buff *skb, goto cleanup; } - if (out_dev == in_dev && err && + do_cache = res->fi && !itag; + if (out_dev == in_dev && err && IN_DEV_TX_REDIRECTS(out_dev) && (IN_DEV_SHARED_MEDIA(out_dev) || - inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res)))) + inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res)))) { flags |= RTCF_DOREDIRECT; + do_cache = false; + } if (skb->protocol != htons(ETH_P_IP)) { /* Not IP (i.e. ARP). Do not create route, if it is @@ -1462,15 +1468,11 @@ static int __mkroute_input(struct sk_buff *skb, } } - do_cache = false; - if (res->fi) { - if (!itag) { - rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input); - if (rt_cache_valid(rth)) { - skb_dst_set_noref(skb, &rth->dst); - goto out; - } - do_cache = true; + if (do_cache) { + rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input); + if (rt_cache_valid(rth)) { + skb_dst_set_noref(skb, &rth->dst); + goto out; } } From e0adef0f7456d5d3a3bfe8ea61c7dddf146b40e1 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Mon, 8 Oct 2012 11:41:16 +0000 Subject: [PATCH 37/61] ipv4: fix forwarding for strict source routes After the change "Adjust semantics of rt->rt_gateway" (commit f8126f1d51) rt_gateway can be 0 but ip_forward() compares it directly with nexthop. What we want here is to check if traffic is to directly connected nexthop and to fail if using gateway. Signed-off-by: Julian Anastasov Signed-off-by: David S. Miller --- net/ipv4/ip_forward.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index ab09b126423..7f35ac26a71 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -85,7 +85,7 @@ int ip_forward(struct sk_buff *skb) rt = skb_rtable(skb); - if (opt->is_strictroute && opt->nexthop != rt->rt_gateway) + if (opt->is_strictroute && rt->rt_gateway) goto sr_failed; if (unlikely(skb->len > dst_mtu(&rt->dst) && !skb_is_gso(skb) && From f8a17175c63fd3e8b573719f7538816f8c96abf4 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Mon, 8 Oct 2012 11:41:17 +0000 Subject: [PATCH 38/61] ipv4: make sure nh_pcpu_rth_output is always allocated Avoid checking nh_pcpu_rth_output in fast path, abort fib_info creation on alloc_percpu failure. Signed-off-by: Julian Anastasov Signed-off-by: David S. Miller --- net/ipv4/fib_semantics.c | 2 ++ net/ipv4/route.c | 3 --- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 267753060ff..71b125cd5db 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -840,6 +840,8 @@ struct fib_info *fib_create_info(struct fib_config *cfg) change_nexthops(fi) { nexthop_nh->nh_parent = fi; nexthop_nh->nh_pcpu_rth_output = alloc_percpu(struct rtable __rcu *); + if (!nexthop_nh->nh_pcpu_rth_output) + goto failure; } endfor_nexthops(fi) if (cfg->fc_mx) { diff --git a/net/ipv4/route.c b/net/ipv4/route.c index b90da1bc270..5b0180f11b2 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1207,8 +1207,6 @@ static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt) if (rt_is_input_route(rt)) { p = (struct rtable **)&nh->nh_rth_input; } else { - if (!nh->nh_pcpu_rth_output) - goto nocache; p = (struct rtable **)__this_cpu_ptr(nh->nh_pcpu_rth_output); } orig = *p; @@ -1223,7 +1221,6 @@ static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt) * unsuccessful at storing this route into the cache * we really need to set it. */ -nocache: rt->dst.flags |= DST_NOCACHE; ret = false; } From 155e8336c373d14d87a7f91e356d85ef4b93b8f9 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Mon, 8 Oct 2012 11:41:18 +0000 Subject: [PATCH 39/61] ipv4: introduce rt_uses_gateway Add new flag to remember when route is via gateway. We will use it to allow rt_gateway to contain address of directly connected host for the cases when DST_NOCACHE is used or when the NH exception caches per-destination route without DST_NOCACHE flag, i.e. when routes are not used for other destinations. By this way we force the neighbour resolving to work with the routed destination but we can use different address in the packet, feature needed for IPVS-DR where original packet for virtual IP is routed via route to real IP. Signed-off-by: Julian Anastasov Signed-off-by: David S. Miller --- include/net/route.h | 3 ++- net/ipv4/inet_connection_sock.c | 4 +-- net/ipv4/ip_forward.c | 2 +- net/ipv4/ip_output.c | 4 +-- net/ipv4/route.c | 48 ++++++++++++++++++--------------- net/ipv4/xfrm4_policy.c | 1 + 6 files changed, 34 insertions(+), 28 deletions(-) diff --git a/include/net/route.h b/include/net/route.h index da22243d276..bc40b633a5c 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -48,7 +48,8 @@ struct rtable { int rt_genid; unsigned int rt_flags; __u16 rt_type; - __u16 rt_is_input; + __u8 rt_is_input; + __u8 rt_uses_gateway; int rt_iif; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index f0c5b9c1a95..d34ce2972c8 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -406,7 +406,7 @@ struct dst_entry *inet_csk_route_req(struct sock *sk, rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) goto no_route; - if (opt && opt->opt.is_strictroute && rt->rt_gateway) + if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway) goto route_err; return &rt->dst; @@ -442,7 +442,7 @@ struct dst_entry *inet_csk_route_child_sock(struct sock *sk, rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) goto no_route; - if (opt && opt->opt.is_strictroute && rt->rt_gateway) + if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway) goto route_err; rcu_read_unlock(); return &rt->dst; diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 7f35ac26a71..694de3b7aeb 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -85,7 +85,7 @@ int ip_forward(struct sk_buff *skb) rt = skb_rtable(skb); - if (opt->is_strictroute && rt->rt_gateway) + if (opt->is_strictroute && rt->rt_uses_gateway) goto sr_failed; if (unlikely(skb->len > dst_mtu(&rt->dst) && !skb_is_gso(skb) && diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 24a29a39e9a..6537a408a4f 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -193,7 +193,7 @@ static inline int ip_finish_output2(struct sk_buff *skb) } rcu_read_lock_bh(); - nexthop = rt->rt_gateway ? rt->rt_gateway : ip_hdr(skb)->daddr; + nexthop = (__force u32) rt_nexthop(rt, ip_hdr(skb)->daddr); neigh = __ipv4_neigh_lookup_noref(dev, nexthop); if (unlikely(!neigh)) neigh = __neigh_create(&arp_tbl, &nexthop, dev, false); @@ -371,7 +371,7 @@ int ip_queue_xmit(struct sk_buff *skb, struct flowi *fl) skb_dst_set_noref(skb, &rt->dst); packet_routed: - if (inet_opt && inet_opt->opt.is_strictroute && rt->rt_gateway) + if (inet_opt && inet_opt->opt.is_strictroute && rt->rt_uses_gateway) goto no_route; /* OK, we know where to send it, allocate and build IP header. */ diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 5b0180f11b2..3a116cb0991 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1126,7 +1126,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst) mtu = dst->dev->mtu; if (unlikely(dst_metric_locked(dst, RTAX_MTU))) { - if (rt->rt_gateway && mtu > 576) + if (rt->rt_uses_gateway && mtu > 576) mtu = 576; } @@ -1177,7 +1177,9 @@ static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, if (fnhe->fnhe_gw) { rt->rt_flags |= RTCF_REDIRECTED; rt->rt_gateway = fnhe->fnhe_gw; - } + rt->rt_uses_gateway = 1; + } else if (!rt->rt_gateway) + rt->rt_gateway = daddr; orig = rcu_dereference(fnhe->fnhe_rth); rcu_assign_pointer(fnhe->fnhe_rth, rt); @@ -1186,13 +1188,6 @@ static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, fnhe->fnhe_stamp = jiffies; ret = true; - } else { - /* Routes we intend to cache in nexthop exception have - * the DST_NOCACHE bit clear. However, if we are - * unsuccessful at storing this route into the cache - * we really need to set it. - */ - rt->dst.flags |= DST_NOCACHE; } spin_unlock_bh(&fnhe_lock); @@ -1215,15 +1210,8 @@ static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt) if (prev == orig) { if (orig) rt_free(orig); - } else { - /* Routes we intend to cache in the FIB nexthop have - * the DST_NOCACHE bit clear. However, if we are - * unsuccessful at storing this route into the cache - * we really need to set it. - */ - rt->dst.flags |= DST_NOCACHE; + } else ret = false; - } return ret; } @@ -1284,8 +1272,10 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr, if (fi) { struct fib_nh *nh = &FIB_RES_NH(*res); - if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) + if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) { rt->rt_gateway = nh->nh_gw; + rt->rt_uses_gateway = 1; + } dst_init_metrics(&rt->dst, fi->fib_metrics, true); #ifdef CONFIG_IP_ROUTE_CLASSID rt->dst.tclassid = nh->nh_tclassid; @@ -1294,8 +1284,18 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr, cached = rt_bind_exception(rt, fnhe, daddr); else if (!(rt->dst.flags & DST_NOCACHE)) cached = rt_cache_route(nh, rt); - } - if (unlikely(!cached)) + if (unlikely(!cached)) { + /* Routes we intend to cache in nexthop exception or + * FIB nexthop have the DST_NOCACHE bit clear. + * However, if we are unsuccessful at storing this + * route into the cache we really need to set it. + */ + rt->dst.flags |= DST_NOCACHE; + if (!rt->rt_gateway) + rt->rt_gateway = daddr; + rt_add_uncached_list(rt); + } + } else rt_add_uncached_list(rt); #ifdef CONFIG_IP_ROUTE_CLASSID @@ -1363,6 +1363,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth->rt_iif = 0; rth->rt_pmtu = 0; rth->rt_gateway = 0; + rth->rt_uses_gateway = 0; INIT_LIST_HEAD(&rth->rt_uncached); if (our) { rth->dst.input= ip_local_deliver; @@ -1432,7 +1433,6 @@ static int __mkroute_input(struct sk_buff *skb, return -EINVAL; } - err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res), in_dev->dev, in_dev, &itag); if (err < 0) { @@ -1488,6 +1488,7 @@ static int __mkroute_input(struct sk_buff *skb, rth->rt_iif = 0; rth->rt_pmtu = 0; rth->rt_gateway = 0; + rth->rt_uses_gateway = 0; INIT_LIST_HEAD(&rth->rt_uncached); rth->dst.input = ip_forward; @@ -1658,6 +1659,7 @@ local_input: rth->rt_iif = 0; rth->rt_pmtu = 0; rth->rt_gateway = 0; + rth->rt_uses_gateway = 0; INIT_LIST_HEAD(&rth->rt_uncached); if (res.type == RTN_UNREACHABLE) { rth->dst.input= ip_error; @@ -1826,6 +1828,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, rth->rt_iif = orig_oif ? : 0; rth->rt_pmtu = 0; rth->rt_gateway = 0; + rth->rt_uses_gateway = 0; INIT_LIST_HEAD(&rth->rt_uncached); RT_CACHE_STAT_INC(out_slow_tot); @@ -2104,6 +2107,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or rt->rt_flags = ort->rt_flags; rt->rt_type = ort->rt_type; rt->rt_gateway = ort->rt_gateway; + rt->rt_uses_gateway = ort->rt_uses_gateway; INIT_LIST_HEAD(&rt->rt_uncached); @@ -2182,7 +2186,7 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, if (nla_put_be32(skb, RTA_PREFSRC, fl4->saddr)) goto nla_put_failure; } - if (rt->rt_gateway && + if (rt->rt_uses_gateway && nla_put_be32(skb, RTA_GATEWAY, rt->rt_gateway)) goto nla_put_failure; diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 681ea2f413e..05c5ab8d983 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -91,6 +91,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, RTCF_LOCAL); xdst->u.rt.rt_type = rt->rt_type; xdst->u.rt.rt_gateway = rt->rt_gateway; + xdst->u.rt.rt_uses_gateway = rt->rt_uses_gateway; xdst->u.rt.rt_pmtu = rt->rt_pmtu; INIT_LIST_HEAD(&xdst->u.rt.rt_uncached); From c92b96553a80c1dbe2ebe128bbe37c8f98f148bf Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Mon, 8 Oct 2012 11:41:19 +0000 Subject: [PATCH 40/61] ipv4: Add FLOWI_FLAG_KNOWN_NH Add flag to request that output route should be returned with known rt_gateway, in case we want to use it as nexthop for neighbour resolving. The returned route can be cached as follows: - in NH exception: because the cached routes are not shared with other destinations - in FIB NH: when using gateway because all destinations for NH share same gateway As last option, to return rt_gateway!=0 we have to set DST_NOCACHE. Signed-off-by: Julian Anastasov Signed-off-by: David S. Miller --- include/net/flow.h | 1 + net/ipv4/route.c | 21 +++++++++++++++++---- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/include/net/flow.h b/include/net/flow.h index e1dd5082ec7..628e11b98c5 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -21,6 +21,7 @@ struct flowi_common { __u8 flowic_flags; #define FLOWI_FLAG_ANYSRC 0x01 #define FLOWI_FLAG_CAN_SLEEP 0x02 +#define FLOWI_FLAG_KNOWN_NH 0x04 __u32 flowic_secid; }; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 3a116cb0991..1a0da8dc818 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1762,6 +1762,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, struct in_device *in_dev; u16 type = res->type; struct rtable *rth; + bool do_cache; in_dev = __in_dev_get_rcu(dev_out); if (!in_dev) @@ -1798,24 +1799,36 @@ static struct rtable *__mkroute_output(const struct fib_result *res, } fnhe = NULL; + do_cache = fi != NULL; if (fi) { struct rtable __rcu **prth; + struct fib_nh *nh = &FIB_RES_NH(*res); - fnhe = find_exception(&FIB_RES_NH(*res), fl4->daddr); + fnhe = find_exception(nh, fl4->daddr); if (fnhe) prth = &fnhe->fnhe_rth; - else - prth = __this_cpu_ptr(FIB_RES_NH(*res).nh_pcpu_rth_output); + else { + if (unlikely(fl4->flowi4_flags & + FLOWI_FLAG_KNOWN_NH && + !(nh->nh_gw && + nh->nh_scope == RT_SCOPE_LINK))) { + do_cache = false; + goto add; + } + prth = __this_cpu_ptr(nh->nh_pcpu_rth_output); + } rth = rcu_dereference(*prth); if (rt_cache_valid(rth)) { dst_hold(&rth->dst); return rth; } } + +add: rth = rt_dst_alloc(dev_out, IN_DEV_CONF_GET(in_dev, NOPOLICY), IN_DEV_CONF_GET(in_dev, NOXFRM), - fi); + do_cache); if (!rth) return ERR_PTR(-ENOBUFS); From ad4d3ef8b7eb527cca478dc08c02c10936e64115 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Mon, 8 Oct 2012 11:41:20 +0000 Subject: [PATCH 41/61] ipvs: fix ARP resolving for direct routing mode After the change "Make neigh lookups directly in output packet path" (commit a263b30936) IPVS can not reach the real server for DR mode because we resolve the destination address from IP header, not from route neighbour. Use the new FLOWI_FLAG_KNOWN_NH flag to request output routes with known nexthop, so that it has preference on resolving. Signed-off-by: Julian Anastasov Signed-off-by: David S. Miller --- net/netfilter/ipvs/ip_vs_xmit.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 56f6d5d81a7..cc4c8095681 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -50,6 +50,7 @@ enum { * local */ IP_VS_RT_MODE_CONNECT = 8, /* Always bind route to saddr */ + IP_VS_RT_MODE_KNOWN_NH = 16,/* Route via remote addr */ }; /* @@ -113,6 +114,8 @@ static struct rtable *do_output_route4(struct net *net, __be32 daddr, fl4.daddr = daddr; fl4.saddr = (rt_mode & IP_VS_RT_MODE_CONNECT) ? *saddr : 0; fl4.flowi4_tos = rtos; + fl4.flowi4_flags = (rt_mode & IP_VS_RT_MODE_KNOWN_NH) ? + FLOWI_FLAG_KNOWN_NH : 0; retry: rt = ip_route_output_key(net, &fl4); @@ -1061,7 +1064,8 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, RT_TOS(iph->tos), IP_VS_RT_MODE_LOCAL | - IP_VS_RT_MODE_NON_LOCAL, NULL))) + IP_VS_RT_MODE_NON_LOCAL | + IP_VS_RT_MODE_KNOWN_NH, NULL))) goto tx_error_icmp; if (rt->rt_flags & RTCF_LOCAL) { ip_rt_put(rt); From 7c41c42c5d9db9efd8caab4764e912458a1a1a79 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 8 Oct 2012 14:55:30 -0700 Subject: [PATCH 42/61] vxlan: fix more sparse warnings Fix a couple harmless sparse warnings reported by Fengguang Wu. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 6f95580bf9d..8be9bf07bd3 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1083,13 +1083,13 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev) if (nla_put_u32(skb, IFLA_VXLAN_ID, vxlan->vni)) goto nla_put_failure; - if (vxlan->gaddr && nla_put_u32(skb, IFLA_VXLAN_GROUP, vxlan->gaddr)) + if (vxlan->gaddr && nla_put_be32(skb, IFLA_VXLAN_GROUP, vxlan->gaddr)) goto nla_put_failure; if (vxlan->link && nla_put_u32(skb, IFLA_VXLAN_LINK, vxlan->link)) goto nla_put_failure; - if (vxlan->saddr && nla_put_u32(skb, IFLA_VXLAN_LOCAL, vxlan->saddr)) + if (vxlan->saddr && nla_put_be32(skb, IFLA_VXLAN_LOCAL, vxlan->saddr)) goto nla_put_failure; if (nla_put_u8(skb, IFLA_VXLAN_TTL, vxlan->ttl) || From 66eef59f22275002f621ff9d951886b513d011b3 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Tue, 9 Oct 2012 10:52:25 +1100 Subject: [PATCH 43/61] net: fix typo in freescale/ucc_geth.c The following patch: acb600d net: remove skb recycling added dev_free_skb() to drivers/net/ethernet/freescale/ucc_geth.c This is a typo and should be dev_kfree_skb(). This fixes this. Signed-off-by: Michael Neuling Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/ucc_geth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c index dfa0aaaab00..0a70bb55d1b 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.c +++ b/drivers/net/ethernet/freescale/ucc_geth.c @@ -3268,7 +3268,7 @@ static int ucc_geth_rx(struct ucc_geth_private *ugeth, u8 rxQ, int rx_work_limit if (netif_msg_rx_err(ugeth)) ugeth_err("%s, %d: ERROR!!! skb - 0x%08x", __func__, __LINE__, (u32) skb); - dev_free_skb(skb); + dev_kfree_skb(skb); ugeth->rx_skbuff[rxQ][ugeth->skb_currx[rxQ]] = NULL; dev->stats.rx_dropped++; From 27a3aadcdc4f07c55f4d04e71268b6653ab4a4cf Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 9 Oct 2012 09:48:40 +0100 Subject: [PATCH 44/61] UAPI: (Scripted) Disintegrate include/linux/caif Signed-off-by: David Howells Acked-by: Arnd Bergmann Acked-by: Thomas Gleixner Acked-by: Michael Kerrisk Acked-by: Paul E. McKenney Acked-by: Dave Jones --- include/linux/caif/Kbuild | 2 -- include/uapi/linux/caif/Kbuild | 2 ++ include/{ => uapi}/linux/caif/caif_socket.h | 0 include/{ => uapi}/linux/caif/if_caif.h | 0 4 files changed, 2 insertions(+), 2 deletions(-) rename include/{ => uapi}/linux/caif/caif_socket.h (100%) rename include/{ => uapi}/linux/caif/if_caif.h (100%) diff --git a/include/linux/caif/Kbuild b/include/linux/caif/Kbuild index a9cf250689d..e69de29bb2d 100644 --- a/include/linux/caif/Kbuild +++ b/include/linux/caif/Kbuild @@ -1,2 +0,0 @@ -header-y += caif_socket.h -header-y += if_caif.h diff --git a/include/uapi/linux/caif/Kbuild b/include/uapi/linux/caif/Kbuild index aafaa5aa54d..43396612d3a 100644 --- a/include/uapi/linux/caif/Kbuild +++ b/include/uapi/linux/caif/Kbuild @@ -1 +1,3 @@ # UAPI Header export list +header-y += caif_socket.h +header-y += if_caif.h diff --git a/include/linux/caif/caif_socket.h b/include/uapi/linux/caif/caif_socket.h similarity index 100% rename from include/linux/caif/caif_socket.h rename to include/uapi/linux/caif/caif_socket.h diff --git a/include/linux/caif/if_caif.h b/include/uapi/linux/caif/if_caif.h similarity index 100% rename from include/linux/caif/if_caif.h rename to include/uapi/linux/caif/if_caif.h From 1855f1b1074ea8a76ced0f3ef25bbecd6b2a7222 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 9 Oct 2012 09:48:45 +0100 Subject: [PATCH 45/61] UAPI: (Scripted) Disintegrate include/linux/isdn Signed-off-by: David Howells Acked-by: Arnd Bergmann Acked-by: Thomas Gleixner Acked-by: Michael Kerrisk Acked-by: Paul E. McKenney Acked-by: Dave Jones --- include/linux/isdn/Kbuild | 1 - include/uapi/linux/isdn/Kbuild | 1 + include/{ => uapi}/linux/isdn/capicmd.h | 0 3 files changed, 1 insertion(+), 1 deletion(-) rename include/{ => uapi}/linux/isdn/capicmd.h (100%) diff --git a/include/linux/isdn/Kbuild b/include/linux/isdn/Kbuild index 991cdb29ab2..e69de29bb2d 100644 --- a/include/linux/isdn/Kbuild +++ b/include/linux/isdn/Kbuild @@ -1 +0,0 @@ -header-y += capicmd.h diff --git a/include/uapi/linux/isdn/Kbuild b/include/uapi/linux/isdn/Kbuild index aafaa5aa54d..89e52850bf2 100644 --- a/include/uapi/linux/isdn/Kbuild +++ b/include/uapi/linux/isdn/Kbuild @@ -1 +1,2 @@ # UAPI Header export list +header-y += capicmd.h diff --git a/include/linux/isdn/capicmd.h b/include/uapi/linux/isdn/capicmd.h similarity index 100% rename from include/linux/isdn/capicmd.h rename to include/uapi/linux/isdn/capicmd.h From 94d0ec58e63159ce5bcdfe612ee220eaeefa3b2a Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 9 Oct 2012 09:48:54 +0100 Subject: [PATCH 46/61] UAPI: (Scripted) Disintegrate include/linux/netfilter Signed-off-by: David Howells Acked-by: Arnd Bergmann Acked-by: Thomas Gleixner Acked-by: Michael Kerrisk Acked-by: Paul E. McKenney Acked-by: Dave Jones --- include/linux/netfilter/Kbuild | 77 -------- include/linux/netfilter/nf_conntrack_common.h | 115 +---------- include/linux/netfilter/nf_conntrack_ftp.h | 16 +- include/linux/netfilter/nf_conntrack_tcp.h | 49 +---- include/linux/netfilter/nfnetlink.h | 55 +----- include/linux/netfilter/nfnetlink_acct.h | 25 +-- include/linux/netfilter/x_tables.h | 186 +---------------- include/linux/netfilter/xt_hashlimit.h | 71 +------ include/linux/netfilter/xt_physdev.h | 21 +- include/uapi/linux/netfilter/Kbuild | 76 +++++++ .../linux/netfilter/nf_conntrack_common.h | 117 +++++++++++ .../uapi/linux/netfilter/nf_conntrack_ftp.h | 18 ++ .../linux/netfilter/nf_conntrack_sctp.h | 0 .../uapi/linux/netfilter/nf_conntrack_tcp.h | 51 +++++ .../netfilter/nf_conntrack_tuple_common.h | 0 include/{ => uapi}/linux/netfilter/nf_nat.h | 0 include/uapi/linux/netfilter/nfnetlink.h | 56 ++++++ include/uapi/linux/netfilter/nfnetlink_acct.h | 27 +++ .../linux/netfilter/nfnetlink_compat.h | 0 .../linux/netfilter/nfnetlink_conntrack.h | 0 .../linux/netfilter/nfnetlink_cthelper.h | 0 .../linux/netfilter/nfnetlink_cttimeout.h | 0 .../linux/netfilter/nfnetlink_log.h | 0 .../linux/netfilter/nfnetlink_queue.h | 0 include/uapi/linux/netfilter/x_tables.h | 187 ++++++++++++++++++ include/{ => uapi}/linux/netfilter/xt_AUDIT.h | 0 .../{ => uapi}/linux/netfilter/xt_CHECKSUM.h | 0 .../{ => uapi}/linux/netfilter/xt_CLASSIFY.h | 0 .../{ => uapi}/linux/netfilter/xt_CONNMARK.h | 0 .../linux/netfilter/xt_CONNSECMARK.h | 0 include/{ => uapi}/linux/netfilter/xt_CT.h | 0 include/{ => uapi}/linux/netfilter/xt_DSCP.h | 0 .../{ => uapi}/linux/netfilter/xt_IDLETIMER.h | 0 include/{ => uapi}/linux/netfilter/xt_LED.h | 0 include/{ => uapi}/linux/netfilter/xt_LOG.h | 0 include/{ => uapi}/linux/netfilter/xt_MARK.h | 0 include/{ => uapi}/linux/netfilter/xt_NFLOG.h | 0 .../{ => uapi}/linux/netfilter/xt_NFQUEUE.h | 0 .../{ => uapi}/linux/netfilter/xt_RATEEST.h | 0 .../{ => uapi}/linux/netfilter/xt_SECMARK.h | 0 .../{ => uapi}/linux/netfilter/xt_TCPMSS.h | 0 .../linux/netfilter/xt_TCPOPTSTRIP.h | 0 include/{ => uapi}/linux/netfilter/xt_TEE.h | 0 .../{ => uapi}/linux/netfilter/xt_TPROXY.h | 0 .../{ => uapi}/linux/netfilter/xt_addrtype.h | 0 .../{ => uapi}/linux/netfilter/xt_cluster.h | 0 .../{ => uapi}/linux/netfilter/xt_comment.h | 0 .../{ => uapi}/linux/netfilter/xt_connbytes.h | 0 .../{ => uapi}/linux/netfilter/xt_connlimit.h | 0 .../{ => uapi}/linux/netfilter/xt_connmark.h | 0 .../{ => uapi}/linux/netfilter/xt_conntrack.h | 0 include/{ => uapi}/linux/netfilter/xt_cpu.h | 0 include/{ => uapi}/linux/netfilter/xt_dccp.h | 0 .../{ => uapi}/linux/netfilter/xt_devgroup.h | 0 include/{ => uapi}/linux/netfilter/xt_dscp.h | 0 include/{ => uapi}/linux/netfilter/xt_ecn.h | 0 include/{ => uapi}/linux/netfilter/xt_esp.h | 0 include/uapi/linux/netfilter/xt_hashlimit.h | 73 +++++++ .../{ => uapi}/linux/netfilter/xt_helper.h | 0 .../{ => uapi}/linux/netfilter/xt_iprange.h | 0 include/{ => uapi}/linux/netfilter/xt_ipvs.h | 0 .../{ => uapi}/linux/netfilter/xt_length.h | 0 include/{ => uapi}/linux/netfilter/xt_limit.h | 0 include/{ => uapi}/linux/netfilter/xt_mac.h | 0 include/{ => uapi}/linux/netfilter/xt_mark.h | 0 .../{ => uapi}/linux/netfilter/xt_multiport.h | 0 .../{ => uapi}/linux/netfilter/xt_nfacct.h | 0 include/{ => uapi}/linux/netfilter/xt_osf.h | 0 include/{ => uapi}/linux/netfilter/xt_owner.h | 0 include/uapi/linux/netfilter/xt_physdev.h | 23 +++ .../{ => uapi}/linux/netfilter/xt_pkttype.h | 0 .../{ => uapi}/linux/netfilter/xt_policy.h | 0 include/{ => uapi}/linux/netfilter/xt_quota.h | 0 .../{ => uapi}/linux/netfilter/xt_rateest.h | 0 include/{ => uapi}/linux/netfilter/xt_realm.h | 0 .../{ => uapi}/linux/netfilter/xt_recent.h | 0 include/{ => uapi}/linux/netfilter/xt_sctp.h | 0 include/{ => uapi}/linux/netfilter/xt_set.h | 0 .../{ => uapi}/linux/netfilter/xt_socket.h | 0 include/{ => uapi}/linux/netfilter/xt_state.h | 0 .../{ => uapi}/linux/netfilter/xt_statistic.h | 0 .../{ => uapi}/linux/netfilter/xt_string.h | 0 .../{ => uapi}/linux/netfilter/xt_tcpmss.h | 0 .../{ => uapi}/linux/netfilter/xt_tcpudp.h | 0 include/{ => uapi}/linux/netfilter/xt_time.h | 0 include/{ => uapi}/linux/netfilter/xt_u32.h | 0 86 files changed, 636 insertions(+), 607 deletions(-) create mode 100644 include/uapi/linux/netfilter/nf_conntrack_common.h create mode 100644 include/uapi/linux/netfilter/nf_conntrack_ftp.h rename include/{ => uapi}/linux/netfilter/nf_conntrack_sctp.h (100%) create mode 100644 include/uapi/linux/netfilter/nf_conntrack_tcp.h rename include/{ => uapi}/linux/netfilter/nf_conntrack_tuple_common.h (100%) rename include/{ => uapi}/linux/netfilter/nf_nat.h (100%) create mode 100644 include/uapi/linux/netfilter/nfnetlink.h create mode 100644 include/uapi/linux/netfilter/nfnetlink_acct.h rename include/{ => uapi}/linux/netfilter/nfnetlink_compat.h (100%) rename include/{ => uapi}/linux/netfilter/nfnetlink_conntrack.h (100%) rename include/{ => uapi}/linux/netfilter/nfnetlink_cthelper.h (100%) rename include/{ => uapi}/linux/netfilter/nfnetlink_cttimeout.h (100%) rename include/{ => uapi}/linux/netfilter/nfnetlink_log.h (100%) rename include/{ => uapi}/linux/netfilter/nfnetlink_queue.h (100%) create mode 100644 include/uapi/linux/netfilter/x_tables.h rename include/{ => uapi}/linux/netfilter/xt_AUDIT.h (100%) rename include/{ => uapi}/linux/netfilter/xt_CHECKSUM.h (100%) rename include/{ => uapi}/linux/netfilter/xt_CLASSIFY.h (100%) rename include/{ => uapi}/linux/netfilter/xt_CONNMARK.h (100%) rename include/{ => uapi}/linux/netfilter/xt_CONNSECMARK.h (100%) rename include/{ => uapi}/linux/netfilter/xt_CT.h (100%) rename include/{ => uapi}/linux/netfilter/xt_DSCP.h (100%) rename include/{ => uapi}/linux/netfilter/xt_IDLETIMER.h (100%) rename include/{ => uapi}/linux/netfilter/xt_LED.h (100%) rename include/{ => uapi}/linux/netfilter/xt_LOG.h (100%) rename include/{ => uapi}/linux/netfilter/xt_MARK.h (100%) rename include/{ => uapi}/linux/netfilter/xt_NFLOG.h (100%) rename include/{ => uapi}/linux/netfilter/xt_NFQUEUE.h (100%) rename include/{ => uapi}/linux/netfilter/xt_RATEEST.h (100%) rename include/{ => uapi}/linux/netfilter/xt_SECMARK.h (100%) rename include/{ => uapi}/linux/netfilter/xt_TCPMSS.h (100%) rename include/{ => uapi}/linux/netfilter/xt_TCPOPTSTRIP.h (100%) rename include/{ => uapi}/linux/netfilter/xt_TEE.h (100%) rename include/{ => uapi}/linux/netfilter/xt_TPROXY.h (100%) rename include/{ => uapi}/linux/netfilter/xt_addrtype.h (100%) rename include/{ => uapi}/linux/netfilter/xt_cluster.h (100%) rename include/{ => uapi}/linux/netfilter/xt_comment.h (100%) rename include/{ => uapi}/linux/netfilter/xt_connbytes.h (100%) rename include/{ => uapi}/linux/netfilter/xt_connlimit.h (100%) rename include/{ => uapi}/linux/netfilter/xt_connmark.h (100%) rename include/{ => uapi}/linux/netfilter/xt_conntrack.h (100%) rename include/{ => uapi}/linux/netfilter/xt_cpu.h (100%) rename include/{ => uapi}/linux/netfilter/xt_dccp.h (100%) rename include/{ => uapi}/linux/netfilter/xt_devgroup.h (100%) rename include/{ => uapi}/linux/netfilter/xt_dscp.h (100%) rename include/{ => uapi}/linux/netfilter/xt_ecn.h (100%) rename include/{ => uapi}/linux/netfilter/xt_esp.h (100%) create mode 100644 include/uapi/linux/netfilter/xt_hashlimit.h rename include/{ => uapi}/linux/netfilter/xt_helper.h (100%) rename include/{ => uapi}/linux/netfilter/xt_iprange.h (100%) rename include/{ => uapi}/linux/netfilter/xt_ipvs.h (100%) rename include/{ => uapi}/linux/netfilter/xt_length.h (100%) rename include/{ => uapi}/linux/netfilter/xt_limit.h (100%) rename include/{ => uapi}/linux/netfilter/xt_mac.h (100%) rename include/{ => uapi}/linux/netfilter/xt_mark.h (100%) rename include/{ => uapi}/linux/netfilter/xt_multiport.h (100%) rename include/{ => uapi}/linux/netfilter/xt_nfacct.h (100%) rename include/{ => uapi}/linux/netfilter/xt_osf.h (100%) rename include/{ => uapi}/linux/netfilter/xt_owner.h (100%) create mode 100644 include/uapi/linux/netfilter/xt_physdev.h rename include/{ => uapi}/linux/netfilter/xt_pkttype.h (100%) rename include/{ => uapi}/linux/netfilter/xt_policy.h (100%) rename include/{ => uapi}/linux/netfilter/xt_quota.h (100%) rename include/{ => uapi}/linux/netfilter/xt_rateest.h (100%) rename include/{ => uapi}/linux/netfilter/xt_realm.h (100%) rename include/{ => uapi}/linux/netfilter/xt_recent.h (100%) rename include/{ => uapi}/linux/netfilter/xt_sctp.h (100%) rename include/{ => uapi}/linux/netfilter/xt_set.h (100%) rename include/{ => uapi}/linux/netfilter/xt_socket.h (100%) rename include/{ => uapi}/linux/netfilter/xt_state.h (100%) rename include/{ => uapi}/linux/netfilter/xt_statistic.h (100%) rename include/{ => uapi}/linux/netfilter/xt_string.h (100%) rename include/{ => uapi}/linux/netfilter/xt_tcpmss.h (100%) rename include/{ => uapi}/linux/netfilter/xt_tcpudp.h (100%) rename include/{ => uapi}/linux/netfilter/xt_time.h (100%) rename include/{ => uapi}/linux/netfilter/xt_u32.h (100%) diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild index 874ae8f2706..b3322023e9a 100644 --- a/include/linux/netfilter/Kbuild +++ b/include/linux/netfilter/Kbuild @@ -1,78 +1 @@ header-y += ipset/ - -header-y += nf_conntrack_common.h -header-y += nf_conntrack_ftp.h -header-y += nf_conntrack_sctp.h -header-y += nf_conntrack_tcp.h -header-y += nf_conntrack_tuple_common.h -header-y += nf_nat.h -header-y += nfnetlink.h -header-y += nfnetlink_acct.h -header-y += nfnetlink_compat.h -header-y += nfnetlink_conntrack.h -header-y += nfnetlink_cthelper.h -header-y += nfnetlink_cttimeout.h -header-y += nfnetlink_log.h -header-y += nfnetlink_queue.h -header-y += x_tables.h -header-y += xt_AUDIT.h -header-y += xt_CHECKSUM.h -header-y += xt_CLASSIFY.h -header-y += xt_CONNMARK.h -header-y += xt_CONNSECMARK.h -header-y += xt_CT.h -header-y += xt_DSCP.h -header-y += xt_IDLETIMER.h -header-y += xt_LED.h -header-y += xt_LOG.h -header-y += xt_MARK.h -header-y += xt_nfacct.h -header-y += xt_NFLOG.h -header-y += xt_NFQUEUE.h -header-y += xt_RATEEST.h -header-y += xt_SECMARK.h -header-y += xt_TCPMSS.h -header-y += xt_TCPOPTSTRIP.h -header-y += xt_TEE.h -header-y += xt_TPROXY.h -header-y += xt_addrtype.h -header-y += xt_cluster.h -header-y += xt_comment.h -header-y += xt_connbytes.h -header-y += xt_connlimit.h -header-y += xt_connmark.h -header-y += xt_conntrack.h -header-y += xt_cpu.h -header-y += xt_dccp.h -header-y += xt_devgroup.h -header-y += xt_dscp.h -header-y += xt_ecn.h -header-y += xt_esp.h -header-y += xt_hashlimit.h -header-y += xt_helper.h -header-y += xt_iprange.h -header-y += xt_ipvs.h -header-y += xt_length.h -header-y += xt_limit.h -header-y += xt_mac.h -header-y += xt_mark.h -header-y += xt_multiport.h -header-y += xt_osf.h -header-y += xt_owner.h -header-y += xt_physdev.h -header-y += xt_pkttype.h -header-y += xt_policy.h -header-y += xt_quota.h -header-y += xt_rateest.h -header-y += xt_realm.h -header-y += xt_recent.h -header-y += xt_set.h -header-y += xt_sctp.h -header-y += xt_socket.h -header-y += xt_state.h -header-y += xt_statistic.h -header-y += xt_string.h -header-y += xt_tcpmss.h -header-y += xt_tcpudp.h -header-y += xt_time.h -header-y += xt_u32.h diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h index d146872a0b9..127d0b90604 100644 --- a/include/linux/netfilter/nf_conntrack_common.h +++ b/include/linux/netfilter/nf_conntrack_common.h @@ -1,119 +1,8 @@ #ifndef _NF_CONNTRACK_COMMON_H #define _NF_CONNTRACK_COMMON_H -/* Connection state tracking for netfilter. This is separated from, - but required by, the NAT layer; it can also be used by an iptables - extension. */ -enum ip_conntrack_info { - /* Part of an established connection (either direction). */ - IP_CT_ESTABLISHED, - /* Like NEW, but related to an existing connection, or ICMP error - (in either direction). */ - IP_CT_RELATED, +#include - /* Started a new connection to track (only - IP_CT_DIR_ORIGINAL); may be a retransmission. */ - IP_CT_NEW, - - /* >= this indicates reply direction */ - IP_CT_IS_REPLY, - - IP_CT_ESTABLISHED_REPLY = IP_CT_ESTABLISHED + IP_CT_IS_REPLY, - IP_CT_RELATED_REPLY = IP_CT_RELATED + IP_CT_IS_REPLY, - IP_CT_NEW_REPLY = IP_CT_NEW + IP_CT_IS_REPLY, - /* Number of distinct IP_CT types (no NEW in reply dirn). */ - IP_CT_NUMBER = IP_CT_IS_REPLY * 2 - 1 -}; - -/* Bitset representing status of connection. */ -enum ip_conntrack_status { - /* It's an expected connection: bit 0 set. This bit never changed */ - IPS_EXPECTED_BIT = 0, - IPS_EXPECTED = (1 << IPS_EXPECTED_BIT), - - /* We've seen packets both ways: bit 1 set. Can be set, not unset. */ - IPS_SEEN_REPLY_BIT = 1, - IPS_SEEN_REPLY = (1 << IPS_SEEN_REPLY_BIT), - - /* Conntrack should never be early-expired. */ - IPS_ASSURED_BIT = 2, - IPS_ASSURED = (1 << IPS_ASSURED_BIT), - - /* Connection is confirmed: originating packet has left box */ - IPS_CONFIRMED_BIT = 3, - IPS_CONFIRMED = (1 << IPS_CONFIRMED_BIT), - - /* Connection needs src nat in orig dir. This bit never changed. */ - IPS_SRC_NAT_BIT = 4, - IPS_SRC_NAT = (1 << IPS_SRC_NAT_BIT), - - /* Connection needs dst nat in orig dir. This bit never changed. */ - IPS_DST_NAT_BIT = 5, - IPS_DST_NAT = (1 << IPS_DST_NAT_BIT), - - /* Both together. */ - IPS_NAT_MASK = (IPS_DST_NAT | IPS_SRC_NAT), - - /* Connection needs TCP sequence adjusted. */ - IPS_SEQ_ADJUST_BIT = 6, - IPS_SEQ_ADJUST = (1 << IPS_SEQ_ADJUST_BIT), - - /* NAT initialization bits. */ - IPS_SRC_NAT_DONE_BIT = 7, - IPS_SRC_NAT_DONE = (1 << IPS_SRC_NAT_DONE_BIT), - - IPS_DST_NAT_DONE_BIT = 8, - IPS_DST_NAT_DONE = (1 << IPS_DST_NAT_DONE_BIT), - - /* Both together */ - IPS_NAT_DONE_MASK = (IPS_DST_NAT_DONE | IPS_SRC_NAT_DONE), - - /* Connection is dying (removed from lists), can not be unset. */ - IPS_DYING_BIT = 9, - IPS_DYING = (1 << IPS_DYING_BIT), - - /* Connection has fixed timeout. */ - IPS_FIXED_TIMEOUT_BIT = 10, - IPS_FIXED_TIMEOUT = (1 << IPS_FIXED_TIMEOUT_BIT), - - /* Conntrack is a template */ - IPS_TEMPLATE_BIT = 11, - IPS_TEMPLATE = (1 << IPS_TEMPLATE_BIT), - - /* Conntrack is a fake untracked entry */ - IPS_UNTRACKED_BIT = 12, - IPS_UNTRACKED = (1 << IPS_UNTRACKED_BIT), - - /* Conntrack got a helper explicitly attached via CT target. */ - IPS_HELPER_BIT = 13, - IPS_HELPER = (1 << IPS_HELPER_BIT), -}; - -/* Connection tracking event types */ -enum ip_conntrack_events { - IPCT_NEW, /* new conntrack */ - IPCT_RELATED, /* related conntrack */ - IPCT_DESTROY, /* destroyed conntrack */ - IPCT_REPLY, /* connection has seen two-way traffic */ - IPCT_ASSURED, /* connection status has changed to assured */ - IPCT_PROTOINFO, /* protocol information has changed */ - IPCT_HELPER, /* new helper has been set */ - IPCT_MARK, /* new mark has been set */ - IPCT_NATSEQADJ, /* NAT is doing sequence adjustment */ - IPCT_SECMARK, /* new security mark has been set */ -}; - -enum ip_conntrack_expect_events { - IPEXP_NEW, /* new expectation */ - IPEXP_DESTROY, /* destroyed expectation */ -}; - -/* expectation flags */ -#define NF_CT_EXPECT_PERMANENT 0x1 -#define NF_CT_EXPECT_INACTIVE 0x2 -#define NF_CT_EXPECT_USERSPACE 0x4 - -#ifdef __KERNEL__ struct ip_conntrack_stat { unsigned int searched; unsigned int found; @@ -136,6 +25,4 @@ struct ip_conntrack_stat { /* call to create an explicit dependency on nf_conntrack. */ extern void need_conntrack(void); -#endif /* __KERNEL__ */ - #endif /* _NF_CONNTRACK_COMMON_H */ diff --git a/include/linux/netfilter/nf_conntrack_ftp.h b/include/linux/netfilter/nf_conntrack_ftp.h index 8faf3f792d1..5f818b01e03 100644 --- a/include/linux/netfilter/nf_conntrack_ftp.h +++ b/include/linux/netfilter/nf_conntrack_ftp.h @@ -1,20 +1,8 @@ #ifndef _NF_CONNTRACK_FTP_H #define _NF_CONNTRACK_FTP_H -/* FTP tracking. */ -/* This enum is exposed to userspace */ -enum nf_ct_ftp_type { - /* PORT command from client */ - NF_CT_FTP_PORT, - /* PASV response from server */ - NF_CT_FTP_PASV, - /* EPRT command from client */ - NF_CT_FTP_EPRT, - /* EPSV response from server */ - NF_CT_FTP_EPSV, -}; +#include -#ifdef __KERNEL__ #define FTP_PORT 21 @@ -42,6 +30,4 @@ extern unsigned int (*nf_nat_ftp_hook)(struct sk_buff *skb, unsigned int matchoff, unsigned int matchlen, struct nf_conntrack_expect *exp); -#endif /* __KERNEL__ */ - #endif /* _NF_CONNTRACK_FTP_H */ diff --git a/include/linux/netfilter/nf_conntrack_tcp.h b/include/linux/netfilter/nf_conntrack_tcp.h index e59868ae12d..22db9614b58 100644 --- a/include/linux/netfilter/nf_conntrack_tcp.h +++ b/include/linux/netfilter/nf_conntrack_tcp.h @@ -1,53 +1,8 @@ #ifndef _NF_CONNTRACK_TCP_H #define _NF_CONNTRACK_TCP_H -/* TCP tracking. */ -#include +#include -/* This is exposed to userspace (ctnetlink) */ -enum tcp_conntrack { - TCP_CONNTRACK_NONE, - TCP_CONNTRACK_SYN_SENT, - TCP_CONNTRACK_SYN_RECV, - TCP_CONNTRACK_ESTABLISHED, - TCP_CONNTRACK_FIN_WAIT, - TCP_CONNTRACK_CLOSE_WAIT, - TCP_CONNTRACK_LAST_ACK, - TCP_CONNTRACK_TIME_WAIT, - TCP_CONNTRACK_CLOSE, - TCP_CONNTRACK_LISTEN, /* obsolete */ -#define TCP_CONNTRACK_SYN_SENT2 TCP_CONNTRACK_LISTEN - TCP_CONNTRACK_MAX, - TCP_CONNTRACK_IGNORE, - TCP_CONNTRACK_RETRANS, - TCP_CONNTRACK_UNACK, - TCP_CONNTRACK_TIMEOUT_MAX -}; - -/* Window scaling is advertised by the sender */ -#define IP_CT_TCP_FLAG_WINDOW_SCALE 0x01 - -/* SACK is permitted by the sender */ -#define IP_CT_TCP_FLAG_SACK_PERM 0x02 - -/* This sender sent FIN first */ -#define IP_CT_TCP_FLAG_CLOSE_INIT 0x04 - -/* Be liberal in window checking */ -#define IP_CT_TCP_FLAG_BE_LIBERAL 0x08 - -/* Has unacknowledged data */ -#define IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED 0x10 - -/* The field td_maxack has been set */ -#define IP_CT_TCP_FLAG_MAXACK_SET 0x20 - -struct nf_ct_tcp_flags { - __u8 flags; - __u8 mask; -}; - -#ifdef __KERNEL__ struct ip_ct_tcp_state { u_int32_t td_end; /* max of seq + len */ @@ -74,6 +29,4 @@ struct ip_ct_tcp { u_int8_t last_flags; /* Last flags set */ }; -#endif /* __KERNEL__ */ - #endif /* _NF_CONNTRACK_TCP_H */ diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index 18341cdb244..4966ddec039 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -1,63 +1,11 @@ #ifndef _NFNETLINK_H #define _NFNETLINK_H -#include -#include -enum nfnetlink_groups { - NFNLGRP_NONE, -#define NFNLGRP_NONE NFNLGRP_NONE - NFNLGRP_CONNTRACK_NEW, -#define NFNLGRP_CONNTRACK_NEW NFNLGRP_CONNTRACK_NEW - NFNLGRP_CONNTRACK_UPDATE, -#define NFNLGRP_CONNTRACK_UPDATE NFNLGRP_CONNTRACK_UPDATE - NFNLGRP_CONNTRACK_DESTROY, -#define NFNLGRP_CONNTRACK_DESTROY NFNLGRP_CONNTRACK_DESTROY - NFNLGRP_CONNTRACK_EXP_NEW, -#define NFNLGRP_CONNTRACK_EXP_NEW NFNLGRP_CONNTRACK_EXP_NEW - NFNLGRP_CONNTRACK_EXP_UPDATE, -#define NFNLGRP_CONNTRACK_EXP_UPDATE NFNLGRP_CONNTRACK_EXP_UPDATE - NFNLGRP_CONNTRACK_EXP_DESTROY, -#define NFNLGRP_CONNTRACK_EXP_DESTROY NFNLGRP_CONNTRACK_EXP_DESTROY - __NFNLGRP_MAX, -}; -#define NFNLGRP_MAX (__NFNLGRP_MAX - 1) - -/* General form of address family dependent message. - */ -struct nfgenmsg { - __u8 nfgen_family; /* AF_xxx */ - __u8 version; /* nfnetlink version */ - __be16 res_id; /* resource id */ -}; - -#define NFNETLINK_V0 0 - -/* netfilter netlink message types are split in two pieces: - * 8 bit subsystem, 8bit operation. - */ - -#define NFNL_SUBSYS_ID(x) ((x & 0xff00) >> 8) -#define NFNL_MSG_TYPE(x) (x & 0x00ff) - -/* No enum here, otherwise __stringify() trick of MODULE_ALIAS_NFNL_SUBSYS() - * won't work anymore */ -#define NFNL_SUBSYS_NONE 0 -#define NFNL_SUBSYS_CTNETLINK 1 -#define NFNL_SUBSYS_CTNETLINK_EXP 2 -#define NFNL_SUBSYS_QUEUE 3 -#define NFNL_SUBSYS_ULOG 4 -#define NFNL_SUBSYS_OSF 5 -#define NFNL_SUBSYS_IPSET 6 -#define NFNL_SUBSYS_ACCT 7 -#define NFNL_SUBSYS_CTNETLINK_TIMEOUT 8 -#define NFNL_SUBSYS_CTHELPER 9 -#define NFNL_SUBSYS_COUNT 10 - -#ifdef __KERNEL__ #include #include #include +#include struct nfnl_callback { int (*call)(struct sock *nl, struct sk_buff *skb, @@ -92,5 +40,4 @@ extern void nfnl_unlock(void); #define MODULE_ALIAS_NFNL_SUBSYS(subsys) \ MODULE_ALIAS("nfnetlink-subsys-" __stringify(subsys)) -#endif /* __KERNEL__ */ #endif /* _NFNETLINK_H */ diff --git a/include/linux/netfilter/nfnetlink_acct.h b/include/linux/netfilter/nfnetlink_acct.h index 7c4279b4ae7..bb4bbc9b7a1 100644 --- a/include/linux/netfilter/nfnetlink_acct.h +++ b/include/linux/netfilter/nfnetlink_acct.h @@ -1,29 +1,8 @@ #ifndef _NFNL_ACCT_H_ #define _NFNL_ACCT_H_ -#ifndef NFACCT_NAME_MAX -#define NFACCT_NAME_MAX 32 -#endif +#include -enum nfnl_acct_msg_types { - NFNL_MSG_ACCT_NEW, - NFNL_MSG_ACCT_GET, - NFNL_MSG_ACCT_GET_CTRZERO, - NFNL_MSG_ACCT_DEL, - NFNL_MSG_ACCT_MAX -}; - -enum nfnl_acct_type { - NFACCT_UNSPEC, - NFACCT_NAME, - NFACCT_PKTS, - NFACCT_BYTES, - NFACCT_USE, - __NFACCT_MAX -}; -#define NFACCT_MAX (__NFACCT_MAX - 1) - -#ifdef __KERNEL__ struct nf_acct; @@ -31,6 +10,4 @@ extern struct nf_acct *nfnl_acct_find_get(const char *filter_name); extern void nfnl_acct_put(struct nf_acct *acct); extern void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct); -#endif /* __KERNEL__ */ - #endif /* _NFNL_ACCT_H */ diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 8d674a78674..dd49566315c 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -1,191 +1,9 @@ #ifndef _X_TABLES_H #define _X_TABLES_H -#include -#include -#define XT_FUNCTION_MAXNAMELEN 30 -#define XT_EXTENSION_MAXNAMELEN 29 -#define XT_TABLE_MAXNAMELEN 32 - -struct xt_entry_match { - union { - struct { - __u16 match_size; - - /* Used by userspace */ - char name[XT_EXTENSION_MAXNAMELEN]; - __u8 revision; - } user; - struct { - __u16 match_size; - - /* Used inside the kernel */ - struct xt_match *match; - } kernel; - - /* Total length */ - __u16 match_size; - } u; - - unsigned char data[0]; -}; - -struct xt_entry_target { - union { - struct { - __u16 target_size; - - /* Used by userspace */ - char name[XT_EXTENSION_MAXNAMELEN]; - __u8 revision; - } user; - struct { - __u16 target_size; - - /* Used inside the kernel */ - struct xt_target *target; - } kernel; - - /* Total length */ - __u16 target_size; - } u; - - unsigned char data[0]; -}; - -#define XT_TARGET_INIT(__name, __size) \ -{ \ - .target.u.user = { \ - .target_size = XT_ALIGN(__size), \ - .name = __name, \ - }, \ -} - -struct xt_standard_target { - struct xt_entry_target target; - int verdict; -}; - -struct xt_error_target { - struct xt_entry_target target; - char errorname[XT_FUNCTION_MAXNAMELEN]; -}; - -/* The argument to IPT_SO_GET_REVISION_*. Returns highest revision - * kernel supports, if >= revision. */ -struct xt_get_revision { - char name[XT_EXTENSION_MAXNAMELEN]; - __u8 revision; -}; - -/* CONTINUE verdict for targets */ -#define XT_CONTINUE 0xFFFFFFFF - -/* For standard target */ -#define XT_RETURN (-NF_REPEAT - 1) - -/* this is a dummy structure to find out the alignment requirement for a struct - * containing all the fundamental data types that are used in ipt_entry, - * ip6t_entry and arpt_entry. This sucks, and it is a hack. It will be my - * personal pleasure to remove it -HW - */ -struct _xt_align { - __u8 u8; - __u16 u16; - __u32 u32; - __u64 u64; -}; - -#define XT_ALIGN(s) __ALIGN_KERNEL((s), __alignof__(struct _xt_align)) - -/* Standard return verdict, or do jump. */ -#define XT_STANDARD_TARGET "" -/* Error verdict. */ -#define XT_ERROR_TARGET "ERROR" - -#define SET_COUNTER(c,b,p) do { (c).bcnt = (b); (c).pcnt = (p); } while(0) -#define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0) - -struct xt_counters { - __u64 pcnt, bcnt; /* Packet and byte counters */ -}; - -/* The argument to IPT_SO_ADD_COUNTERS. */ -struct xt_counters_info { - /* Which table. */ - char name[XT_TABLE_MAXNAMELEN]; - - unsigned int num_counters; - - /* The counters (actually `number' of these). */ - struct xt_counters counters[0]; -}; - -#define XT_INV_PROTO 0x40 /* Invert the sense of PROTO. */ - -#ifndef __KERNEL__ -/* fn returns 0 to continue iteration */ -#define XT_MATCH_ITERATE(type, e, fn, args...) \ -({ \ - unsigned int __i; \ - int __ret = 0; \ - struct xt_entry_match *__m; \ - \ - for (__i = sizeof(type); \ - __i < (e)->target_offset; \ - __i += __m->u.match_size) { \ - __m = (void *)e + __i; \ - \ - __ret = fn(__m , ## args); \ - if (__ret != 0) \ - break; \ - } \ - __ret; \ -}) - -/* fn returns 0 to continue iteration */ -#define XT_ENTRY_ITERATE_CONTINUE(type, entries, size, n, fn, args...) \ -({ \ - unsigned int __i, __n; \ - int __ret = 0; \ - type *__entry; \ - \ - for (__i = 0, __n = 0; __i < (size); \ - __i += __entry->next_offset, __n++) { \ - __entry = (void *)(entries) + __i; \ - if (__n < n) \ - continue; \ - \ - __ret = fn(__entry , ## args); \ - if (__ret != 0) \ - break; \ - } \ - __ret; \ -}) - -/* fn returns 0 to continue iteration */ -#define XT_ENTRY_ITERATE(type, entries, size, fn, args...) \ - XT_ENTRY_ITERATE_CONTINUE(type, entries, size, 0, fn, args) - -#endif /* !__KERNEL__ */ - -/* pos is normally a struct ipt_entry/ip6t_entry/etc. */ -#define xt_entry_foreach(pos, ehead, esize) \ - for ((pos) = (typeof(pos))(ehead); \ - (pos) < (typeof(pos))((char *)(ehead) + (esize)); \ - (pos) = (typeof(pos))((char *)(pos) + (pos)->next_offset)) - -/* can only be xt_entry_match, so no use of typeof here */ -#define xt_ematch_foreach(pos, entry) \ - for ((pos) = (struct xt_entry_match *)entry->elems; \ - (pos) < (struct xt_entry_match *)((char *)(entry) + \ - (entry)->target_offset); \ - (pos) = (struct xt_entry_match *)((char *)(pos) + \ - (pos)->u.match_size)) - -#ifdef __KERNEL__ #include +#include /** * struct xt_action_param - parameters for matches/targets @@ -617,6 +435,4 @@ extern int xt_compat_target_to_user(const struct xt_entry_target *t, void __user **dstptr, unsigned int *size); #endif /* CONFIG_COMPAT */ -#endif /* __KERNEL__ */ - #endif /* _X_TABLES_H */ diff --git a/include/linux/netfilter/xt_hashlimit.h b/include/linux/netfilter/xt_hashlimit.h index c42e52f39f8..074790c0cf7 100644 --- a/include/linux/netfilter/xt_hashlimit.h +++ b/include/linux/netfilter/xt_hashlimit.h @@ -1,78 +1,9 @@ #ifndef _XT_HASHLIMIT_H #define _XT_HASHLIMIT_H -#include +#include -/* timings are in milliseconds. */ -#define XT_HASHLIMIT_SCALE 10000 -/* 1/10,000 sec period => max of 10,000/sec. Min rate is then 429490 - * seconds, or one packet every 59 hours. - */ - -/* packet length accounting is done in 16-byte steps */ -#define XT_HASHLIMIT_BYTE_SHIFT 4 - -/* details of this structure hidden by the implementation */ -struct xt_hashlimit_htable; - -enum { - XT_HASHLIMIT_HASH_DIP = 1 << 0, - XT_HASHLIMIT_HASH_DPT = 1 << 1, - XT_HASHLIMIT_HASH_SIP = 1 << 2, - XT_HASHLIMIT_HASH_SPT = 1 << 3, - XT_HASHLIMIT_INVERT = 1 << 4, - XT_HASHLIMIT_BYTES = 1 << 5, -}; -#ifdef __KERNEL__ #define XT_HASHLIMIT_ALL (XT_HASHLIMIT_HASH_DIP | XT_HASHLIMIT_HASH_DPT | \ XT_HASHLIMIT_HASH_SIP | XT_HASHLIMIT_HASH_SPT | \ XT_HASHLIMIT_INVERT | XT_HASHLIMIT_BYTES) -#endif - -struct hashlimit_cfg { - __u32 mode; /* bitmask of XT_HASHLIMIT_HASH_* */ - __u32 avg; /* Average secs between packets * scale */ - __u32 burst; /* Period multiplier for upper limit. */ - - /* user specified */ - __u32 size; /* how many buckets */ - __u32 max; /* max number of entries */ - __u32 gc_interval; /* gc interval */ - __u32 expire; /* when do entries expire? */ -}; - -struct xt_hashlimit_info { - char name [IFNAMSIZ]; /* name */ - struct hashlimit_cfg cfg; - - /* Used internally by the kernel */ - struct xt_hashlimit_htable *hinfo; - union { - void *ptr; - struct xt_hashlimit_info *master; - } u; -}; - -struct hashlimit_cfg1 { - __u32 mode; /* bitmask of XT_HASHLIMIT_HASH_* */ - __u32 avg; /* Average secs between packets * scale */ - __u32 burst; /* Period multiplier for upper limit. */ - - /* user specified */ - __u32 size; /* how many buckets */ - __u32 max; /* max number of entries */ - __u32 gc_interval; /* gc interval */ - __u32 expire; /* when do entries expire? */ - - __u8 srcmask, dstmask; -}; - -struct xt_hashlimit_mtinfo1 { - char name[IFNAMSIZ]; - struct hashlimit_cfg1 cfg; - - /* Used internally by the kernel */ - struct xt_hashlimit_htable *hinfo __attribute__((aligned(8))); -}; - #endif /*_XT_HASHLIMIT_H*/ diff --git a/include/linux/netfilter/xt_physdev.h b/include/linux/netfilter/xt_physdev.h index 8555e399886..5b5e41716d6 100644 --- a/include/linux/netfilter/xt_physdev.h +++ b/include/linux/netfilter/xt_physdev.h @@ -1,26 +1,7 @@ #ifndef _XT_PHYSDEV_H #define _XT_PHYSDEV_H -#include - -#ifdef __KERNEL__ #include -#endif - -#define XT_PHYSDEV_OP_IN 0x01 -#define XT_PHYSDEV_OP_OUT 0x02 -#define XT_PHYSDEV_OP_BRIDGED 0x04 -#define XT_PHYSDEV_OP_ISIN 0x08 -#define XT_PHYSDEV_OP_ISOUT 0x10 -#define XT_PHYSDEV_OP_MASK (0x20 - 1) - -struct xt_physdev_info { - char physindev[IFNAMSIZ]; - char in_mask[IFNAMSIZ]; - char physoutdev[IFNAMSIZ]; - char out_mask[IFNAMSIZ]; - __u8 invert; - __u8 bitmask; -}; +#include #endif /*_XT_PHYSDEV_H*/ diff --git a/include/uapi/linux/netfilter/Kbuild b/include/uapi/linux/netfilter/Kbuild index 4afbace8e86..08f555fef13 100644 --- a/include/uapi/linux/netfilter/Kbuild +++ b/include/uapi/linux/netfilter/Kbuild @@ -1,2 +1,78 @@ # UAPI Header export list header-y += ipset/ +header-y += nf_conntrack_common.h +header-y += nf_conntrack_ftp.h +header-y += nf_conntrack_sctp.h +header-y += nf_conntrack_tcp.h +header-y += nf_conntrack_tuple_common.h +header-y += nf_nat.h +header-y += nfnetlink.h +header-y += nfnetlink_acct.h +header-y += nfnetlink_compat.h +header-y += nfnetlink_conntrack.h +header-y += nfnetlink_cthelper.h +header-y += nfnetlink_cttimeout.h +header-y += nfnetlink_log.h +header-y += nfnetlink_queue.h +header-y += x_tables.h +header-y += xt_AUDIT.h +header-y += xt_CHECKSUM.h +header-y += xt_CLASSIFY.h +header-y += xt_CONNMARK.h +header-y += xt_CONNSECMARK.h +header-y += xt_CT.h +header-y += xt_DSCP.h +header-y += xt_IDLETIMER.h +header-y += xt_LED.h +header-y += xt_LOG.h +header-y += xt_MARK.h +header-y += xt_NFLOG.h +header-y += xt_NFQUEUE.h +header-y += xt_RATEEST.h +header-y += xt_SECMARK.h +header-y += xt_TCPMSS.h +header-y += xt_TCPOPTSTRIP.h +header-y += xt_TEE.h +header-y += xt_TPROXY.h +header-y += xt_addrtype.h +header-y += xt_cluster.h +header-y += xt_comment.h +header-y += xt_connbytes.h +header-y += xt_connlimit.h +header-y += xt_connmark.h +header-y += xt_conntrack.h +header-y += xt_cpu.h +header-y += xt_dccp.h +header-y += xt_devgroup.h +header-y += xt_dscp.h +header-y += xt_ecn.h +header-y += xt_esp.h +header-y += xt_hashlimit.h +header-y += xt_helper.h +header-y += xt_iprange.h +header-y += xt_ipvs.h +header-y += xt_length.h +header-y += xt_limit.h +header-y += xt_mac.h +header-y += xt_mark.h +header-y += xt_multiport.h +header-y += xt_nfacct.h +header-y += xt_osf.h +header-y += xt_owner.h +header-y += xt_physdev.h +header-y += xt_pkttype.h +header-y += xt_policy.h +header-y += xt_quota.h +header-y += xt_rateest.h +header-y += xt_realm.h +header-y += xt_recent.h +header-y += xt_sctp.h +header-y += xt_set.h +header-y += xt_socket.h +header-y += xt_state.h +header-y += xt_statistic.h +header-y += xt_string.h +header-y += xt_tcpmss.h +header-y += xt_tcpudp.h +header-y += xt_time.h +header-y += xt_u32.h diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h new file mode 100644 index 00000000000..1644cdd8be9 --- /dev/null +++ b/include/uapi/linux/netfilter/nf_conntrack_common.h @@ -0,0 +1,117 @@ +#ifndef _UAPI_NF_CONNTRACK_COMMON_H +#define _UAPI_NF_CONNTRACK_COMMON_H +/* Connection state tracking for netfilter. This is separated from, + but required by, the NAT layer; it can also be used by an iptables + extension. */ +enum ip_conntrack_info { + /* Part of an established connection (either direction). */ + IP_CT_ESTABLISHED, + + /* Like NEW, but related to an existing connection, or ICMP error + (in either direction). */ + IP_CT_RELATED, + + /* Started a new connection to track (only + IP_CT_DIR_ORIGINAL); may be a retransmission. */ + IP_CT_NEW, + + /* >= this indicates reply direction */ + IP_CT_IS_REPLY, + + IP_CT_ESTABLISHED_REPLY = IP_CT_ESTABLISHED + IP_CT_IS_REPLY, + IP_CT_RELATED_REPLY = IP_CT_RELATED + IP_CT_IS_REPLY, + IP_CT_NEW_REPLY = IP_CT_NEW + IP_CT_IS_REPLY, + /* Number of distinct IP_CT types (no NEW in reply dirn). */ + IP_CT_NUMBER = IP_CT_IS_REPLY * 2 - 1 +}; + +/* Bitset representing status of connection. */ +enum ip_conntrack_status { + /* It's an expected connection: bit 0 set. This bit never changed */ + IPS_EXPECTED_BIT = 0, + IPS_EXPECTED = (1 << IPS_EXPECTED_BIT), + + /* We've seen packets both ways: bit 1 set. Can be set, not unset. */ + IPS_SEEN_REPLY_BIT = 1, + IPS_SEEN_REPLY = (1 << IPS_SEEN_REPLY_BIT), + + /* Conntrack should never be early-expired. */ + IPS_ASSURED_BIT = 2, + IPS_ASSURED = (1 << IPS_ASSURED_BIT), + + /* Connection is confirmed: originating packet has left box */ + IPS_CONFIRMED_BIT = 3, + IPS_CONFIRMED = (1 << IPS_CONFIRMED_BIT), + + /* Connection needs src nat in orig dir. This bit never changed. */ + IPS_SRC_NAT_BIT = 4, + IPS_SRC_NAT = (1 << IPS_SRC_NAT_BIT), + + /* Connection needs dst nat in orig dir. This bit never changed. */ + IPS_DST_NAT_BIT = 5, + IPS_DST_NAT = (1 << IPS_DST_NAT_BIT), + + /* Both together. */ + IPS_NAT_MASK = (IPS_DST_NAT | IPS_SRC_NAT), + + /* Connection needs TCP sequence adjusted. */ + IPS_SEQ_ADJUST_BIT = 6, + IPS_SEQ_ADJUST = (1 << IPS_SEQ_ADJUST_BIT), + + /* NAT initialization bits. */ + IPS_SRC_NAT_DONE_BIT = 7, + IPS_SRC_NAT_DONE = (1 << IPS_SRC_NAT_DONE_BIT), + + IPS_DST_NAT_DONE_BIT = 8, + IPS_DST_NAT_DONE = (1 << IPS_DST_NAT_DONE_BIT), + + /* Both together */ + IPS_NAT_DONE_MASK = (IPS_DST_NAT_DONE | IPS_SRC_NAT_DONE), + + /* Connection is dying (removed from lists), can not be unset. */ + IPS_DYING_BIT = 9, + IPS_DYING = (1 << IPS_DYING_BIT), + + /* Connection has fixed timeout. */ + IPS_FIXED_TIMEOUT_BIT = 10, + IPS_FIXED_TIMEOUT = (1 << IPS_FIXED_TIMEOUT_BIT), + + /* Conntrack is a template */ + IPS_TEMPLATE_BIT = 11, + IPS_TEMPLATE = (1 << IPS_TEMPLATE_BIT), + + /* Conntrack is a fake untracked entry */ + IPS_UNTRACKED_BIT = 12, + IPS_UNTRACKED = (1 << IPS_UNTRACKED_BIT), + + /* Conntrack got a helper explicitly attached via CT target. */ + IPS_HELPER_BIT = 13, + IPS_HELPER = (1 << IPS_HELPER_BIT), +}; + +/* Connection tracking event types */ +enum ip_conntrack_events { + IPCT_NEW, /* new conntrack */ + IPCT_RELATED, /* related conntrack */ + IPCT_DESTROY, /* destroyed conntrack */ + IPCT_REPLY, /* connection has seen two-way traffic */ + IPCT_ASSURED, /* connection status has changed to assured */ + IPCT_PROTOINFO, /* protocol information has changed */ + IPCT_HELPER, /* new helper has been set */ + IPCT_MARK, /* new mark has been set */ + IPCT_NATSEQADJ, /* NAT is doing sequence adjustment */ + IPCT_SECMARK, /* new security mark has been set */ +}; + +enum ip_conntrack_expect_events { + IPEXP_NEW, /* new expectation */ + IPEXP_DESTROY, /* destroyed expectation */ +}; + +/* expectation flags */ +#define NF_CT_EXPECT_PERMANENT 0x1 +#define NF_CT_EXPECT_INACTIVE 0x2 +#define NF_CT_EXPECT_USERSPACE 0x4 + + +#endif /* _UAPI_NF_CONNTRACK_COMMON_H */ diff --git a/include/uapi/linux/netfilter/nf_conntrack_ftp.h b/include/uapi/linux/netfilter/nf_conntrack_ftp.h new file mode 100644 index 00000000000..1030315a41b --- /dev/null +++ b/include/uapi/linux/netfilter/nf_conntrack_ftp.h @@ -0,0 +1,18 @@ +#ifndef _UAPI_NF_CONNTRACK_FTP_H +#define _UAPI_NF_CONNTRACK_FTP_H +/* FTP tracking. */ + +/* This enum is exposed to userspace */ +enum nf_ct_ftp_type { + /* PORT command from client */ + NF_CT_FTP_PORT, + /* PASV response from server */ + NF_CT_FTP_PASV, + /* EPRT command from client */ + NF_CT_FTP_EPRT, + /* EPSV response from server */ + NF_CT_FTP_EPSV, +}; + + +#endif /* _UAPI_NF_CONNTRACK_FTP_H */ diff --git a/include/linux/netfilter/nf_conntrack_sctp.h b/include/uapi/linux/netfilter/nf_conntrack_sctp.h similarity index 100% rename from include/linux/netfilter/nf_conntrack_sctp.h rename to include/uapi/linux/netfilter/nf_conntrack_sctp.h diff --git a/include/uapi/linux/netfilter/nf_conntrack_tcp.h b/include/uapi/linux/netfilter/nf_conntrack_tcp.h new file mode 100644 index 00000000000..9993a421201 --- /dev/null +++ b/include/uapi/linux/netfilter/nf_conntrack_tcp.h @@ -0,0 +1,51 @@ +#ifndef _UAPI_NF_CONNTRACK_TCP_H +#define _UAPI_NF_CONNTRACK_TCP_H +/* TCP tracking. */ + +#include + +/* This is exposed to userspace (ctnetlink) */ +enum tcp_conntrack { + TCP_CONNTRACK_NONE, + TCP_CONNTRACK_SYN_SENT, + TCP_CONNTRACK_SYN_RECV, + TCP_CONNTRACK_ESTABLISHED, + TCP_CONNTRACK_FIN_WAIT, + TCP_CONNTRACK_CLOSE_WAIT, + TCP_CONNTRACK_LAST_ACK, + TCP_CONNTRACK_TIME_WAIT, + TCP_CONNTRACK_CLOSE, + TCP_CONNTRACK_LISTEN, /* obsolete */ +#define TCP_CONNTRACK_SYN_SENT2 TCP_CONNTRACK_LISTEN + TCP_CONNTRACK_MAX, + TCP_CONNTRACK_IGNORE, + TCP_CONNTRACK_RETRANS, + TCP_CONNTRACK_UNACK, + TCP_CONNTRACK_TIMEOUT_MAX +}; + +/* Window scaling is advertised by the sender */ +#define IP_CT_TCP_FLAG_WINDOW_SCALE 0x01 + +/* SACK is permitted by the sender */ +#define IP_CT_TCP_FLAG_SACK_PERM 0x02 + +/* This sender sent FIN first */ +#define IP_CT_TCP_FLAG_CLOSE_INIT 0x04 + +/* Be liberal in window checking */ +#define IP_CT_TCP_FLAG_BE_LIBERAL 0x08 + +/* Has unacknowledged data */ +#define IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED 0x10 + +/* The field td_maxack has been set */ +#define IP_CT_TCP_FLAG_MAXACK_SET 0x20 + +struct nf_ct_tcp_flags { + __u8 flags; + __u8 mask; +}; + + +#endif /* _UAPI_NF_CONNTRACK_TCP_H */ diff --git a/include/linux/netfilter/nf_conntrack_tuple_common.h b/include/uapi/linux/netfilter/nf_conntrack_tuple_common.h similarity index 100% rename from include/linux/netfilter/nf_conntrack_tuple_common.h rename to include/uapi/linux/netfilter/nf_conntrack_tuple_common.h diff --git a/include/linux/netfilter/nf_nat.h b/include/uapi/linux/netfilter/nf_nat.h similarity index 100% rename from include/linux/netfilter/nf_nat.h rename to include/uapi/linux/netfilter/nf_nat.h diff --git a/include/uapi/linux/netfilter/nfnetlink.h b/include/uapi/linux/netfilter/nfnetlink.h new file mode 100644 index 00000000000..4a4efafad5f --- /dev/null +++ b/include/uapi/linux/netfilter/nfnetlink.h @@ -0,0 +1,56 @@ +#ifndef _UAPI_NFNETLINK_H +#define _UAPI_NFNETLINK_H +#include +#include + +enum nfnetlink_groups { + NFNLGRP_NONE, +#define NFNLGRP_NONE NFNLGRP_NONE + NFNLGRP_CONNTRACK_NEW, +#define NFNLGRP_CONNTRACK_NEW NFNLGRP_CONNTRACK_NEW + NFNLGRP_CONNTRACK_UPDATE, +#define NFNLGRP_CONNTRACK_UPDATE NFNLGRP_CONNTRACK_UPDATE + NFNLGRP_CONNTRACK_DESTROY, +#define NFNLGRP_CONNTRACK_DESTROY NFNLGRP_CONNTRACK_DESTROY + NFNLGRP_CONNTRACK_EXP_NEW, +#define NFNLGRP_CONNTRACK_EXP_NEW NFNLGRP_CONNTRACK_EXP_NEW + NFNLGRP_CONNTRACK_EXP_UPDATE, +#define NFNLGRP_CONNTRACK_EXP_UPDATE NFNLGRP_CONNTRACK_EXP_UPDATE + NFNLGRP_CONNTRACK_EXP_DESTROY, +#define NFNLGRP_CONNTRACK_EXP_DESTROY NFNLGRP_CONNTRACK_EXP_DESTROY + __NFNLGRP_MAX, +}; +#define NFNLGRP_MAX (__NFNLGRP_MAX - 1) + +/* General form of address family dependent message. + */ +struct nfgenmsg { + __u8 nfgen_family; /* AF_xxx */ + __u8 version; /* nfnetlink version */ + __be16 res_id; /* resource id */ +}; + +#define NFNETLINK_V0 0 + +/* netfilter netlink message types are split in two pieces: + * 8 bit subsystem, 8bit operation. + */ + +#define NFNL_SUBSYS_ID(x) ((x & 0xff00) >> 8) +#define NFNL_MSG_TYPE(x) (x & 0x00ff) + +/* No enum here, otherwise __stringify() trick of MODULE_ALIAS_NFNL_SUBSYS() + * won't work anymore */ +#define NFNL_SUBSYS_NONE 0 +#define NFNL_SUBSYS_CTNETLINK 1 +#define NFNL_SUBSYS_CTNETLINK_EXP 2 +#define NFNL_SUBSYS_QUEUE 3 +#define NFNL_SUBSYS_ULOG 4 +#define NFNL_SUBSYS_OSF 5 +#define NFNL_SUBSYS_IPSET 6 +#define NFNL_SUBSYS_ACCT 7 +#define NFNL_SUBSYS_CTNETLINK_TIMEOUT 8 +#define NFNL_SUBSYS_CTHELPER 9 +#define NFNL_SUBSYS_COUNT 10 + +#endif /* _UAPI_NFNETLINK_H */ diff --git a/include/uapi/linux/netfilter/nfnetlink_acct.h b/include/uapi/linux/netfilter/nfnetlink_acct.h new file mode 100644 index 00000000000..c7b6269e760 --- /dev/null +++ b/include/uapi/linux/netfilter/nfnetlink_acct.h @@ -0,0 +1,27 @@ +#ifndef _UAPI_NFNL_ACCT_H_ +#define _UAPI_NFNL_ACCT_H_ + +#ifndef NFACCT_NAME_MAX +#define NFACCT_NAME_MAX 32 +#endif + +enum nfnl_acct_msg_types { + NFNL_MSG_ACCT_NEW, + NFNL_MSG_ACCT_GET, + NFNL_MSG_ACCT_GET_CTRZERO, + NFNL_MSG_ACCT_DEL, + NFNL_MSG_ACCT_MAX +}; + +enum nfnl_acct_type { + NFACCT_UNSPEC, + NFACCT_NAME, + NFACCT_PKTS, + NFACCT_BYTES, + NFACCT_USE, + __NFACCT_MAX +}; +#define NFACCT_MAX (__NFACCT_MAX - 1) + + +#endif /* _UAPI_NFNL_ACCT_H_ */ diff --git a/include/linux/netfilter/nfnetlink_compat.h b/include/uapi/linux/netfilter/nfnetlink_compat.h similarity index 100% rename from include/linux/netfilter/nfnetlink_compat.h rename to include/uapi/linux/netfilter/nfnetlink_compat.h diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/uapi/linux/netfilter/nfnetlink_conntrack.h similarity index 100% rename from include/linux/netfilter/nfnetlink_conntrack.h rename to include/uapi/linux/netfilter/nfnetlink_conntrack.h diff --git a/include/linux/netfilter/nfnetlink_cthelper.h b/include/uapi/linux/netfilter/nfnetlink_cthelper.h similarity index 100% rename from include/linux/netfilter/nfnetlink_cthelper.h rename to include/uapi/linux/netfilter/nfnetlink_cthelper.h diff --git a/include/linux/netfilter/nfnetlink_cttimeout.h b/include/uapi/linux/netfilter/nfnetlink_cttimeout.h similarity index 100% rename from include/linux/netfilter/nfnetlink_cttimeout.h rename to include/uapi/linux/netfilter/nfnetlink_cttimeout.h diff --git a/include/linux/netfilter/nfnetlink_log.h b/include/uapi/linux/netfilter/nfnetlink_log.h similarity index 100% rename from include/linux/netfilter/nfnetlink_log.h rename to include/uapi/linux/netfilter/nfnetlink_log.h diff --git a/include/linux/netfilter/nfnetlink_queue.h b/include/uapi/linux/netfilter/nfnetlink_queue.h similarity index 100% rename from include/linux/netfilter/nfnetlink_queue.h rename to include/uapi/linux/netfilter/nfnetlink_queue.h diff --git a/include/uapi/linux/netfilter/x_tables.h b/include/uapi/linux/netfilter/x_tables.h new file mode 100644 index 00000000000..c36969b9153 --- /dev/null +++ b/include/uapi/linux/netfilter/x_tables.h @@ -0,0 +1,187 @@ +#ifndef _UAPI_X_TABLES_H +#define _UAPI_X_TABLES_H +#include +#include + +#define XT_FUNCTION_MAXNAMELEN 30 +#define XT_EXTENSION_MAXNAMELEN 29 +#define XT_TABLE_MAXNAMELEN 32 + +struct xt_entry_match { + union { + struct { + __u16 match_size; + + /* Used by userspace */ + char name[XT_EXTENSION_MAXNAMELEN]; + __u8 revision; + } user; + struct { + __u16 match_size; + + /* Used inside the kernel */ + struct xt_match *match; + } kernel; + + /* Total length */ + __u16 match_size; + } u; + + unsigned char data[0]; +}; + +struct xt_entry_target { + union { + struct { + __u16 target_size; + + /* Used by userspace */ + char name[XT_EXTENSION_MAXNAMELEN]; + __u8 revision; + } user; + struct { + __u16 target_size; + + /* Used inside the kernel */ + struct xt_target *target; + } kernel; + + /* Total length */ + __u16 target_size; + } u; + + unsigned char data[0]; +}; + +#define XT_TARGET_INIT(__name, __size) \ +{ \ + .target.u.user = { \ + .target_size = XT_ALIGN(__size), \ + .name = __name, \ + }, \ +} + +struct xt_standard_target { + struct xt_entry_target target; + int verdict; +}; + +struct xt_error_target { + struct xt_entry_target target; + char errorname[XT_FUNCTION_MAXNAMELEN]; +}; + +/* The argument to IPT_SO_GET_REVISION_*. Returns highest revision + * kernel supports, if >= revision. */ +struct xt_get_revision { + char name[XT_EXTENSION_MAXNAMELEN]; + __u8 revision; +}; + +/* CONTINUE verdict for targets */ +#define XT_CONTINUE 0xFFFFFFFF + +/* For standard target */ +#define XT_RETURN (-NF_REPEAT - 1) + +/* this is a dummy structure to find out the alignment requirement for a struct + * containing all the fundamental data types that are used in ipt_entry, + * ip6t_entry and arpt_entry. This sucks, and it is a hack. It will be my + * personal pleasure to remove it -HW + */ +struct _xt_align { + __u8 u8; + __u16 u16; + __u32 u32; + __u64 u64; +}; + +#define XT_ALIGN(s) __ALIGN_KERNEL((s), __alignof__(struct _xt_align)) + +/* Standard return verdict, or do jump. */ +#define XT_STANDARD_TARGET "" +/* Error verdict. */ +#define XT_ERROR_TARGET "ERROR" + +#define SET_COUNTER(c,b,p) do { (c).bcnt = (b); (c).pcnt = (p); } while(0) +#define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0) + +struct xt_counters { + __u64 pcnt, bcnt; /* Packet and byte counters */ +}; + +/* The argument to IPT_SO_ADD_COUNTERS. */ +struct xt_counters_info { + /* Which table. */ + char name[XT_TABLE_MAXNAMELEN]; + + unsigned int num_counters; + + /* The counters (actually `number' of these). */ + struct xt_counters counters[0]; +}; + +#define XT_INV_PROTO 0x40 /* Invert the sense of PROTO. */ + +#ifndef __KERNEL__ +/* fn returns 0 to continue iteration */ +#define XT_MATCH_ITERATE(type, e, fn, args...) \ +({ \ + unsigned int __i; \ + int __ret = 0; \ + struct xt_entry_match *__m; \ + \ + for (__i = sizeof(type); \ + __i < (e)->target_offset; \ + __i += __m->u.match_size) { \ + __m = (void *)e + __i; \ + \ + __ret = fn(__m , ## args); \ + if (__ret != 0) \ + break; \ + } \ + __ret; \ +}) + +/* fn returns 0 to continue iteration */ +#define XT_ENTRY_ITERATE_CONTINUE(type, entries, size, n, fn, args...) \ +({ \ + unsigned int __i, __n; \ + int __ret = 0; \ + type *__entry; \ + \ + for (__i = 0, __n = 0; __i < (size); \ + __i += __entry->next_offset, __n++) { \ + __entry = (void *)(entries) + __i; \ + if (__n < n) \ + continue; \ + \ + __ret = fn(__entry , ## args); \ + if (__ret != 0) \ + break; \ + } \ + __ret; \ +}) + +/* fn returns 0 to continue iteration */ +#define XT_ENTRY_ITERATE(type, entries, size, fn, args...) \ + XT_ENTRY_ITERATE_CONTINUE(type, entries, size, 0, fn, args) + +#endif /* !__KERNEL__ */ + +/* pos is normally a struct ipt_entry/ip6t_entry/etc. */ +#define xt_entry_foreach(pos, ehead, esize) \ + for ((pos) = (typeof(pos))(ehead); \ + (pos) < (typeof(pos))((char *)(ehead) + (esize)); \ + (pos) = (typeof(pos))((char *)(pos) + (pos)->next_offset)) + +/* can only be xt_entry_match, so no use of typeof here */ +#define xt_ematch_foreach(pos, entry) \ + for ((pos) = (struct xt_entry_match *)entry->elems; \ + (pos) < (struct xt_entry_match *)((char *)(entry) + \ + (entry)->target_offset); \ + (pos) = (struct xt_entry_match *)((char *)(pos) + \ + (pos)->u.match_size)) + + +#endif /* _UAPI_X_TABLES_H */ diff --git a/include/linux/netfilter/xt_AUDIT.h b/include/uapi/linux/netfilter/xt_AUDIT.h similarity index 100% rename from include/linux/netfilter/xt_AUDIT.h rename to include/uapi/linux/netfilter/xt_AUDIT.h diff --git a/include/linux/netfilter/xt_CHECKSUM.h b/include/uapi/linux/netfilter/xt_CHECKSUM.h similarity index 100% rename from include/linux/netfilter/xt_CHECKSUM.h rename to include/uapi/linux/netfilter/xt_CHECKSUM.h diff --git a/include/linux/netfilter/xt_CLASSIFY.h b/include/uapi/linux/netfilter/xt_CLASSIFY.h similarity index 100% rename from include/linux/netfilter/xt_CLASSIFY.h rename to include/uapi/linux/netfilter/xt_CLASSIFY.h diff --git a/include/linux/netfilter/xt_CONNMARK.h b/include/uapi/linux/netfilter/xt_CONNMARK.h similarity index 100% rename from include/linux/netfilter/xt_CONNMARK.h rename to include/uapi/linux/netfilter/xt_CONNMARK.h diff --git a/include/linux/netfilter/xt_CONNSECMARK.h b/include/uapi/linux/netfilter/xt_CONNSECMARK.h similarity index 100% rename from include/linux/netfilter/xt_CONNSECMARK.h rename to include/uapi/linux/netfilter/xt_CONNSECMARK.h diff --git a/include/linux/netfilter/xt_CT.h b/include/uapi/linux/netfilter/xt_CT.h similarity index 100% rename from include/linux/netfilter/xt_CT.h rename to include/uapi/linux/netfilter/xt_CT.h diff --git a/include/linux/netfilter/xt_DSCP.h b/include/uapi/linux/netfilter/xt_DSCP.h similarity index 100% rename from include/linux/netfilter/xt_DSCP.h rename to include/uapi/linux/netfilter/xt_DSCP.h diff --git a/include/linux/netfilter/xt_IDLETIMER.h b/include/uapi/linux/netfilter/xt_IDLETIMER.h similarity index 100% rename from include/linux/netfilter/xt_IDLETIMER.h rename to include/uapi/linux/netfilter/xt_IDLETIMER.h diff --git a/include/linux/netfilter/xt_LED.h b/include/uapi/linux/netfilter/xt_LED.h similarity index 100% rename from include/linux/netfilter/xt_LED.h rename to include/uapi/linux/netfilter/xt_LED.h diff --git a/include/linux/netfilter/xt_LOG.h b/include/uapi/linux/netfilter/xt_LOG.h similarity index 100% rename from include/linux/netfilter/xt_LOG.h rename to include/uapi/linux/netfilter/xt_LOG.h diff --git a/include/linux/netfilter/xt_MARK.h b/include/uapi/linux/netfilter/xt_MARK.h similarity index 100% rename from include/linux/netfilter/xt_MARK.h rename to include/uapi/linux/netfilter/xt_MARK.h diff --git a/include/linux/netfilter/xt_NFLOG.h b/include/uapi/linux/netfilter/xt_NFLOG.h similarity index 100% rename from include/linux/netfilter/xt_NFLOG.h rename to include/uapi/linux/netfilter/xt_NFLOG.h diff --git a/include/linux/netfilter/xt_NFQUEUE.h b/include/uapi/linux/netfilter/xt_NFQUEUE.h similarity index 100% rename from include/linux/netfilter/xt_NFQUEUE.h rename to include/uapi/linux/netfilter/xt_NFQUEUE.h diff --git a/include/linux/netfilter/xt_RATEEST.h b/include/uapi/linux/netfilter/xt_RATEEST.h similarity index 100% rename from include/linux/netfilter/xt_RATEEST.h rename to include/uapi/linux/netfilter/xt_RATEEST.h diff --git a/include/linux/netfilter/xt_SECMARK.h b/include/uapi/linux/netfilter/xt_SECMARK.h similarity index 100% rename from include/linux/netfilter/xt_SECMARK.h rename to include/uapi/linux/netfilter/xt_SECMARK.h diff --git a/include/linux/netfilter/xt_TCPMSS.h b/include/uapi/linux/netfilter/xt_TCPMSS.h similarity index 100% rename from include/linux/netfilter/xt_TCPMSS.h rename to include/uapi/linux/netfilter/xt_TCPMSS.h diff --git a/include/linux/netfilter/xt_TCPOPTSTRIP.h b/include/uapi/linux/netfilter/xt_TCPOPTSTRIP.h similarity index 100% rename from include/linux/netfilter/xt_TCPOPTSTRIP.h rename to include/uapi/linux/netfilter/xt_TCPOPTSTRIP.h diff --git a/include/linux/netfilter/xt_TEE.h b/include/uapi/linux/netfilter/xt_TEE.h similarity index 100% rename from include/linux/netfilter/xt_TEE.h rename to include/uapi/linux/netfilter/xt_TEE.h diff --git a/include/linux/netfilter/xt_TPROXY.h b/include/uapi/linux/netfilter/xt_TPROXY.h similarity index 100% rename from include/linux/netfilter/xt_TPROXY.h rename to include/uapi/linux/netfilter/xt_TPROXY.h diff --git a/include/linux/netfilter/xt_addrtype.h b/include/uapi/linux/netfilter/xt_addrtype.h similarity index 100% rename from include/linux/netfilter/xt_addrtype.h rename to include/uapi/linux/netfilter/xt_addrtype.h diff --git a/include/linux/netfilter/xt_cluster.h b/include/uapi/linux/netfilter/xt_cluster.h similarity index 100% rename from include/linux/netfilter/xt_cluster.h rename to include/uapi/linux/netfilter/xt_cluster.h diff --git a/include/linux/netfilter/xt_comment.h b/include/uapi/linux/netfilter/xt_comment.h similarity index 100% rename from include/linux/netfilter/xt_comment.h rename to include/uapi/linux/netfilter/xt_comment.h diff --git a/include/linux/netfilter/xt_connbytes.h b/include/uapi/linux/netfilter/xt_connbytes.h similarity index 100% rename from include/linux/netfilter/xt_connbytes.h rename to include/uapi/linux/netfilter/xt_connbytes.h diff --git a/include/linux/netfilter/xt_connlimit.h b/include/uapi/linux/netfilter/xt_connlimit.h similarity index 100% rename from include/linux/netfilter/xt_connlimit.h rename to include/uapi/linux/netfilter/xt_connlimit.h diff --git a/include/linux/netfilter/xt_connmark.h b/include/uapi/linux/netfilter/xt_connmark.h similarity index 100% rename from include/linux/netfilter/xt_connmark.h rename to include/uapi/linux/netfilter/xt_connmark.h diff --git a/include/linux/netfilter/xt_conntrack.h b/include/uapi/linux/netfilter/xt_conntrack.h similarity index 100% rename from include/linux/netfilter/xt_conntrack.h rename to include/uapi/linux/netfilter/xt_conntrack.h diff --git a/include/linux/netfilter/xt_cpu.h b/include/uapi/linux/netfilter/xt_cpu.h similarity index 100% rename from include/linux/netfilter/xt_cpu.h rename to include/uapi/linux/netfilter/xt_cpu.h diff --git a/include/linux/netfilter/xt_dccp.h b/include/uapi/linux/netfilter/xt_dccp.h similarity index 100% rename from include/linux/netfilter/xt_dccp.h rename to include/uapi/linux/netfilter/xt_dccp.h diff --git a/include/linux/netfilter/xt_devgroup.h b/include/uapi/linux/netfilter/xt_devgroup.h similarity index 100% rename from include/linux/netfilter/xt_devgroup.h rename to include/uapi/linux/netfilter/xt_devgroup.h diff --git a/include/linux/netfilter/xt_dscp.h b/include/uapi/linux/netfilter/xt_dscp.h similarity index 100% rename from include/linux/netfilter/xt_dscp.h rename to include/uapi/linux/netfilter/xt_dscp.h diff --git a/include/linux/netfilter/xt_ecn.h b/include/uapi/linux/netfilter/xt_ecn.h similarity index 100% rename from include/linux/netfilter/xt_ecn.h rename to include/uapi/linux/netfilter/xt_ecn.h diff --git a/include/linux/netfilter/xt_esp.h b/include/uapi/linux/netfilter/xt_esp.h similarity index 100% rename from include/linux/netfilter/xt_esp.h rename to include/uapi/linux/netfilter/xt_esp.h diff --git a/include/uapi/linux/netfilter/xt_hashlimit.h b/include/uapi/linux/netfilter/xt_hashlimit.h new file mode 100644 index 00000000000..cbfc43d1af6 --- /dev/null +++ b/include/uapi/linux/netfilter/xt_hashlimit.h @@ -0,0 +1,73 @@ +#ifndef _UAPI_XT_HASHLIMIT_H +#define _UAPI_XT_HASHLIMIT_H + +#include + +/* timings are in milliseconds. */ +#define XT_HASHLIMIT_SCALE 10000 +/* 1/10,000 sec period => max of 10,000/sec. Min rate is then 429490 + * seconds, or one packet every 59 hours. + */ + +/* packet length accounting is done in 16-byte steps */ +#define XT_HASHLIMIT_BYTE_SHIFT 4 + +/* details of this structure hidden by the implementation */ +struct xt_hashlimit_htable; + +enum { + XT_HASHLIMIT_HASH_DIP = 1 << 0, + XT_HASHLIMIT_HASH_DPT = 1 << 1, + XT_HASHLIMIT_HASH_SIP = 1 << 2, + XT_HASHLIMIT_HASH_SPT = 1 << 3, + XT_HASHLIMIT_INVERT = 1 << 4, + XT_HASHLIMIT_BYTES = 1 << 5, +}; + +struct hashlimit_cfg { + __u32 mode; /* bitmask of XT_HASHLIMIT_HASH_* */ + __u32 avg; /* Average secs between packets * scale */ + __u32 burst; /* Period multiplier for upper limit. */ + + /* user specified */ + __u32 size; /* how many buckets */ + __u32 max; /* max number of entries */ + __u32 gc_interval; /* gc interval */ + __u32 expire; /* when do entries expire? */ +}; + +struct xt_hashlimit_info { + char name [IFNAMSIZ]; /* name */ + struct hashlimit_cfg cfg; + + /* Used internally by the kernel */ + struct xt_hashlimit_htable *hinfo; + union { + void *ptr; + struct xt_hashlimit_info *master; + } u; +}; + +struct hashlimit_cfg1 { + __u32 mode; /* bitmask of XT_HASHLIMIT_HASH_* */ + __u32 avg; /* Average secs between packets * scale */ + __u32 burst; /* Period multiplier for upper limit. */ + + /* user specified */ + __u32 size; /* how many buckets */ + __u32 max; /* max number of entries */ + __u32 gc_interval; /* gc interval */ + __u32 expire; /* when do entries expire? */ + + __u8 srcmask, dstmask; +}; + +struct xt_hashlimit_mtinfo1 { + char name[IFNAMSIZ]; + struct hashlimit_cfg1 cfg; + + /* Used internally by the kernel */ + struct xt_hashlimit_htable *hinfo __attribute__((aligned(8))); +}; + +#endif /* _UAPI_XT_HASHLIMIT_H */ diff --git a/include/linux/netfilter/xt_helper.h b/include/uapi/linux/netfilter/xt_helper.h similarity index 100% rename from include/linux/netfilter/xt_helper.h rename to include/uapi/linux/netfilter/xt_helper.h diff --git a/include/linux/netfilter/xt_iprange.h b/include/uapi/linux/netfilter/xt_iprange.h similarity index 100% rename from include/linux/netfilter/xt_iprange.h rename to include/uapi/linux/netfilter/xt_iprange.h diff --git a/include/linux/netfilter/xt_ipvs.h b/include/uapi/linux/netfilter/xt_ipvs.h similarity index 100% rename from include/linux/netfilter/xt_ipvs.h rename to include/uapi/linux/netfilter/xt_ipvs.h diff --git a/include/linux/netfilter/xt_length.h b/include/uapi/linux/netfilter/xt_length.h similarity index 100% rename from include/linux/netfilter/xt_length.h rename to include/uapi/linux/netfilter/xt_length.h diff --git a/include/linux/netfilter/xt_limit.h b/include/uapi/linux/netfilter/xt_limit.h similarity index 100% rename from include/linux/netfilter/xt_limit.h rename to include/uapi/linux/netfilter/xt_limit.h diff --git a/include/linux/netfilter/xt_mac.h b/include/uapi/linux/netfilter/xt_mac.h similarity index 100% rename from include/linux/netfilter/xt_mac.h rename to include/uapi/linux/netfilter/xt_mac.h diff --git a/include/linux/netfilter/xt_mark.h b/include/uapi/linux/netfilter/xt_mark.h similarity index 100% rename from include/linux/netfilter/xt_mark.h rename to include/uapi/linux/netfilter/xt_mark.h diff --git a/include/linux/netfilter/xt_multiport.h b/include/uapi/linux/netfilter/xt_multiport.h similarity index 100% rename from include/linux/netfilter/xt_multiport.h rename to include/uapi/linux/netfilter/xt_multiport.h diff --git a/include/linux/netfilter/xt_nfacct.h b/include/uapi/linux/netfilter/xt_nfacct.h similarity index 100% rename from include/linux/netfilter/xt_nfacct.h rename to include/uapi/linux/netfilter/xt_nfacct.h diff --git a/include/linux/netfilter/xt_osf.h b/include/uapi/linux/netfilter/xt_osf.h similarity index 100% rename from include/linux/netfilter/xt_osf.h rename to include/uapi/linux/netfilter/xt_osf.h diff --git a/include/linux/netfilter/xt_owner.h b/include/uapi/linux/netfilter/xt_owner.h similarity index 100% rename from include/linux/netfilter/xt_owner.h rename to include/uapi/linux/netfilter/xt_owner.h diff --git a/include/uapi/linux/netfilter/xt_physdev.h b/include/uapi/linux/netfilter/xt_physdev.h new file mode 100644 index 00000000000..db7a2982e9c --- /dev/null +++ b/include/uapi/linux/netfilter/xt_physdev.h @@ -0,0 +1,23 @@ +#ifndef _UAPI_XT_PHYSDEV_H +#define _UAPI_XT_PHYSDEV_H + +#include + + +#define XT_PHYSDEV_OP_IN 0x01 +#define XT_PHYSDEV_OP_OUT 0x02 +#define XT_PHYSDEV_OP_BRIDGED 0x04 +#define XT_PHYSDEV_OP_ISIN 0x08 +#define XT_PHYSDEV_OP_ISOUT 0x10 +#define XT_PHYSDEV_OP_MASK (0x20 - 1) + +struct xt_physdev_info { + char physindev[IFNAMSIZ]; + char in_mask[IFNAMSIZ]; + char physoutdev[IFNAMSIZ]; + char out_mask[IFNAMSIZ]; + __u8 invert; + __u8 bitmask; +}; + +#endif /* _UAPI_XT_PHYSDEV_H */ diff --git a/include/linux/netfilter/xt_pkttype.h b/include/uapi/linux/netfilter/xt_pkttype.h similarity index 100% rename from include/linux/netfilter/xt_pkttype.h rename to include/uapi/linux/netfilter/xt_pkttype.h diff --git a/include/linux/netfilter/xt_policy.h b/include/uapi/linux/netfilter/xt_policy.h similarity index 100% rename from include/linux/netfilter/xt_policy.h rename to include/uapi/linux/netfilter/xt_policy.h diff --git a/include/linux/netfilter/xt_quota.h b/include/uapi/linux/netfilter/xt_quota.h similarity index 100% rename from include/linux/netfilter/xt_quota.h rename to include/uapi/linux/netfilter/xt_quota.h diff --git a/include/linux/netfilter/xt_rateest.h b/include/uapi/linux/netfilter/xt_rateest.h similarity index 100% rename from include/linux/netfilter/xt_rateest.h rename to include/uapi/linux/netfilter/xt_rateest.h diff --git a/include/linux/netfilter/xt_realm.h b/include/uapi/linux/netfilter/xt_realm.h similarity index 100% rename from include/linux/netfilter/xt_realm.h rename to include/uapi/linux/netfilter/xt_realm.h diff --git a/include/linux/netfilter/xt_recent.h b/include/uapi/linux/netfilter/xt_recent.h similarity index 100% rename from include/linux/netfilter/xt_recent.h rename to include/uapi/linux/netfilter/xt_recent.h diff --git a/include/linux/netfilter/xt_sctp.h b/include/uapi/linux/netfilter/xt_sctp.h similarity index 100% rename from include/linux/netfilter/xt_sctp.h rename to include/uapi/linux/netfilter/xt_sctp.h diff --git a/include/linux/netfilter/xt_set.h b/include/uapi/linux/netfilter/xt_set.h similarity index 100% rename from include/linux/netfilter/xt_set.h rename to include/uapi/linux/netfilter/xt_set.h diff --git a/include/linux/netfilter/xt_socket.h b/include/uapi/linux/netfilter/xt_socket.h similarity index 100% rename from include/linux/netfilter/xt_socket.h rename to include/uapi/linux/netfilter/xt_socket.h diff --git a/include/linux/netfilter/xt_state.h b/include/uapi/linux/netfilter/xt_state.h similarity index 100% rename from include/linux/netfilter/xt_state.h rename to include/uapi/linux/netfilter/xt_state.h diff --git a/include/linux/netfilter/xt_statistic.h b/include/uapi/linux/netfilter/xt_statistic.h similarity index 100% rename from include/linux/netfilter/xt_statistic.h rename to include/uapi/linux/netfilter/xt_statistic.h diff --git a/include/linux/netfilter/xt_string.h b/include/uapi/linux/netfilter/xt_string.h similarity index 100% rename from include/linux/netfilter/xt_string.h rename to include/uapi/linux/netfilter/xt_string.h diff --git a/include/linux/netfilter/xt_tcpmss.h b/include/uapi/linux/netfilter/xt_tcpmss.h similarity index 100% rename from include/linux/netfilter/xt_tcpmss.h rename to include/uapi/linux/netfilter/xt_tcpmss.h diff --git a/include/linux/netfilter/xt_tcpudp.h b/include/uapi/linux/netfilter/xt_tcpudp.h similarity index 100% rename from include/linux/netfilter/xt_tcpudp.h rename to include/uapi/linux/netfilter/xt_tcpudp.h diff --git a/include/linux/netfilter/xt_time.h b/include/uapi/linux/netfilter/xt_time.h similarity index 100% rename from include/linux/netfilter/xt_time.h rename to include/uapi/linux/netfilter/xt_time.h diff --git a/include/linux/netfilter/xt_u32.h b/include/uapi/linux/netfilter/xt_u32.h similarity index 100% rename from include/linux/netfilter/xt_u32.h rename to include/uapi/linux/netfilter/xt_u32.h From a82014149becc68695e7f1d62a8cc1e4ae062318 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 9 Oct 2012 09:48:55 +0100 Subject: [PATCH 47/61] UAPI: (Scripted) Disintegrate include/linux/netfilter/ipset Signed-off-by: David Howells Acked-by: Arnd Bergmann Acked-by: Thomas Gleixner Acked-by: Michael Kerrisk Acked-by: Paul E. McKenney Acked-by: Dave Jones --- include/linux/netfilter/ipset/Kbuild | 4 - include/linux/netfilter/ipset/ip_set.h | 225 +---------------- include/linux/netfilter/ipset/ip_set_bitmap.h | 11 +- include/linux/netfilter/ipset/ip_set_hash.h | 19 +- include/linux/netfilter/ipset/ip_set_list.h | 19 +- include/uapi/linux/netfilter/ipset/Kbuild | 4 + include/uapi/linux/netfilter/ipset/ip_set.h | 231 ++++++++++++++++++ .../linux/netfilter/ipset/ip_set_bitmap.h | 13 + .../uapi/linux/netfilter/ipset/ip_set_hash.h | 21 ++ .../uapi/linux/netfilter/ipset/ip_set_list.h | 21 ++ 10 files changed, 296 insertions(+), 272 deletions(-) create mode 100644 include/uapi/linux/netfilter/ipset/ip_set.h create mode 100644 include/uapi/linux/netfilter/ipset/ip_set_bitmap.h create mode 100644 include/uapi/linux/netfilter/ipset/ip_set_hash.h create mode 100644 include/uapi/linux/netfilter/ipset/ip_set_list.h diff --git a/include/linux/netfilter/ipset/Kbuild b/include/linux/netfilter/ipset/Kbuild index 601fe71d34d..e69de29bb2d 100644 --- a/include/linux/netfilter/ipset/Kbuild +++ b/include/linux/netfilter/ipset/Kbuild @@ -1,4 +0,0 @@ -header-y += ip_set.h -header-y += ip_set_bitmap.h -header-y += ip_set_hash.h -header-y += ip_set_list.h diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 528697b3c15..7958e84a65a 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -1,6 +1,3 @@ -#ifndef _IP_SET_H -#define _IP_SET_H - /* Copyright (C) 2000-2002 Joakim Axelsson * Patrick Schaaf * Martin Josefsson @@ -10,199 +7,9 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ +#ifndef _IP_SET_H +#define _IP_SET_H -#include - -/* The protocol version */ -#define IPSET_PROTOCOL 6 - -/* The max length of strings including NUL: set and type identifiers */ -#define IPSET_MAXNAMELEN 32 - -/* Message types and commands */ -enum ipset_cmd { - IPSET_CMD_NONE, - IPSET_CMD_PROTOCOL, /* 1: Return protocol version */ - IPSET_CMD_CREATE, /* 2: Create a new (empty) set */ - IPSET_CMD_DESTROY, /* 3: Destroy a (empty) set */ - IPSET_CMD_FLUSH, /* 4: Remove all elements from a set */ - IPSET_CMD_RENAME, /* 5: Rename a set */ - IPSET_CMD_SWAP, /* 6: Swap two sets */ - IPSET_CMD_LIST, /* 7: List sets */ - IPSET_CMD_SAVE, /* 8: Save sets */ - IPSET_CMD_ADD, /* 9: Add an element to a set */ - IPSET_CMD_DEL, /* 10: Delete an element from a set */ - IPSET_CMD_TEST, /* 11: Test an element in a set */ - IPSET_CMD_HEADER, /* 12: Get set header data only */ - IPSET_CMD_TYPE, /* 13: Get set type */ - IPSET_MSG_MAX, /* Netlink message commands */ - - /* Commands in userspace: */ - IPSET_CMD_RESTORE = IPSET_MSG_MAX, /* 14: Enter restore mode */ - IPSET_CMD_HELP, /* 15: Get help */ - IPSET_CMD_VERSION, /* 16: Get program version */ - IPSET_CMD_QUIT, /* 17: Quit from interactive mode */ - - IPSET_CMD_MAX, - - IPSET_CMD_COMMIT = IPSET_CMD_MAX, /* 18: Commit buffered commands */ -}; - -/* Attributes at command level */ -enum { - IPSET_ATTR_UNSPEC, - IPSET_ATTR_PROTOCOL, /* 1: Protocol version */ - IPSET_ATTR_SETNAME, /* 2: Name of the set */ - IPSET_ATTR_TYPENAME, /* 3: Typename */ - IPSET_ATTR_SETNAME2 = IPSET_ATTR_TYPENAME, /* Setname at rename/swap */ - IPSET_ATTR_REVISION, /* 4: Settype revision */ - IPSET_ATTR_FAMILY, /* 5: Settype family */ - IPSET_ATTR_FLAGS, /* 6: Flags at command level */ - IPSET_ATTR_DATA, /* 7: Nested attributes */ - IPSET_ATTR_ADT, /* 8: Multiple data containers */ - IPSET_ATTR_LINENO, /* 9: Restore lineno */ - IPSET_ATTR_PROTOCOL_MIN, /* 10: Minimal supported version number */ - IPSET_ATTR_REVISION_MIN = IPSET_ATTR_PROTOCOL_MIN, /* type rev min */ - __IPSET_ATTR_CMD_MAX, -}; -#define IPSET_ATTR_CMD_MAX (__IPSET_ATTR_CMD_MAX - 1) - -/* CADT specific attributes */ -enum { - IPSET_ATTR_IP = IPSET_ATTR_UNSPEC + 1, - IPSET_ATTR_IP_FROM = IPSET_ATTR_IP, - IPSET_ATTR_IP_TO, /* 2 */ - IPSET_ATTR_CIDR, /* 3 */ - IPSET_ATTR_PORT, /* 4 */ - IPSET_ATTR_PORT_FROM = IPSET_ATTR_PORT, - IPSET_ATTR_PORT_TO, /* 5 */ - IPSET_ATTR_TIMEOUT, /* 6 */ - IPSET_ATTR_PROTO, /* 7 */ - IPSET_ATTR_CADT_FLAGS, /* 8 */ - IPSET_ATTR_CADT_LINENO = IPSET_ATTR_LINENO, /* 9 */ - /* Reserve empty slots */ - IPSET_ATTR_CADT_MAX = 16, - /* Create-only specific attributes */ - IPSET_ATTR_GC, - IPSET_ATTR_HASHSIZE, - IPSET_ATTR_MAXELEM, - IPSET_ATTR_NETMASK, - IPSET_ATTR_PROBES, - IPSET_ATTR_RESIZE, - IPSET_ATTR_SIZE, - /* Kernel-only */ - IPSET_ATTR_ELEMENTS, - IPSET_ATTR_REFERENCES, - IPSET_ATTR_MEMSIZE, - - __IPSET_ATTR_CREATE_MAX, -}; -#define IPSET_ATTR_CREATE_MAX (__IPSET_ATTR_CREATE_MAX - 1) - -/* ADT specific attributes */ -enum { - IPSET_ATTR_ETHER = IPSET_ATTR_CADT_MAX + 1, - IPSET_ATTR_NAME, - IPSET_ATTR_NAMEREF, - IPSET_ATTR_IP2, - IPSET_ATTR_CIDR2, - IPSET_ATTR_IP2_TO, - IPSET_ATTR_IFACE, - __IPSET_ATTR_ADT_MAX, -}; -#define IPSET_ATTR_ADT_MAX (__IPSET_ATTR_ADT_MAX - 1) - -/* IP specific attributes */ -enum { - IPSET_ATTR_IPADDR_IPV4 = IPSET_ATTR_UNSPEC + 1, - IPSET_ATTR_IPADDR_IPV6, - __IPSET_ATTR_IPADDR_MAX, -}; -#define IPSET_ATTR_IPADDR_MAX (__IPSET_ATTR_IPADDR_MAX - 1) - -/* Error codes */ -enum ipset_errno { - IPSET_ERR_PRIVATE = 4096, - IPSET_ERR_PROTOCOL, - IPSET_ERR_FIND_TYPE, - IPSET_ERR_MAX_SETS, - IPSET_ERR_BUSY, - IPSET_ERR_EXIST_SETNAME2, - IPSET_ERR_TYPE_MISMATCH, - IPSET_ERR_EXIST, - IPSET_ERR_INVALID_CIDR, - IPSET_ERR_INVALID_NETMASK, - IPSET_ERR_INVALID_FAMILY, - IPSET_ERR_TIMEOUT, - IPSET_ERR_REFERENCED, - IPSET_ERR_IPADDR_IPV4, - IPSET_ERR_IPADDR_IPV6, - - /* Type specific error codes */ - IPSET_ERR_TYPE_SPECIFIC = 4352, -}; - -/* Flags at command level */ -enum ipset_cmd_flags { - IPSET_FLAG_BIT_EXIST = 0, - IPSET_FLAG_EXIST = (1 << IPSET_FLAG_BIT_EXIST), - IPSET_FLAG_BIT_LIST_SETNAME = 1, - IPSET_FLAG_LIST_SETNAME = (1 << IPSET_FLAG_BIT_LIST_SETNAME), - IPSET_FLAG_BIT_LIST_HEADER = 2, - IPSET_FLAG_LIST_HEADER = (1 << IPSET_FLAG_BIT_LIST_HEADER), - IPSET_FLAG_CMD_MAX = 15, /* Lower half */ -}; - -/* Flags at CADT attribute level */ -enum ipset_cadt_flags { - IPSET_FLAG_BIT_BEFORE = 0, - IPSET_FLAG_BEFORE = (1 << IPSET_FLAG_BIT_BEFORE), - IPSET_FLAG_BIT_PHYSDEV = 1, - IPSET_FLAG_PHYSDEV = (1 << IPSET_FLAG_BIT_PHYSDEV), - IPSET_FLAG_BIT_NOMATCH = 2, - IPSET_FLAG_NOMATCH = (1 << IPSET_FLAG_BIT_NOMATCH), - IPSET_FLAG_CADT_MAX = 15, /* Upper half */ -}; - -/* Commands with settype-specific attributes */ -enum ipset_adt { - IPSET_ADD, - IPSET_DEL, - IPSET_TEST, - IPSET_ADT_MAX, - IPSET_CREATE = IPSET_ADT_MAX, - IPSET_CADT_MAX, -}; - -/* Sets are identified by an index in kernel space. Tweak with ip_set_id_t - * and IPSET_INVALID_ID if you want to increase the max number of sets. - */ -typedef __u16 ip_set_id_t; - -#define IPSET_INVALID_ID 65535 - -enum ip_set_dim { - IPSET_DIM_ZERO = 0, - IPSET_DIM_ONE, - IPSET_DIM_TWO, - IPSET_DIM_THREE, - /* Max dimension in elements. - * If changed, new revision of iptables match/target is required. - */ - IPSET_DIM_MAX = 6, - IPSET_BIT_RETURN_NOMATCH = 7, -}; - -/* Option flags for kernel operations */ -enum ip_set_kopt { - IPSET_INV_MATCH = (1 << IPSET_DIM_ZERO), - IPSET_DIM_ONE_SRC = (1 << IPSET_DIM_ONE), - IPSET_DIM_TWO_SRC = (1 << IPSET_DIM_TWO), - IPSET_DIM_THREE_SRC = (1 << IPSET_DIM_THREE), - IPSET_RETURN_NOMATCH = (1 << IPSET_BIT_RETURN_NOMATCH), -}; - -#ifdef __KERNEL__ #include #include #include @@ -211,6 +18,7 @@ enum ip_set_kopt { #include #include #include +#include #define _IP_SET_MODULE_DESC(a, b, c) \ MODULE_DESCRIPTION(a " type of IP sets, revisions " b "-" c) @@ -476,31 +284,4 @@ bitmap_bytes(u32 a, u32 b) return 4 * ((((b - a + 8) / 8) + 3) / 4); } -#endif /* __KERNEL__ */ - -/* Interface to iptables/ip6tables */ - -#define SO_IP_SET 83 - -union ip_set_name_index { - char name[IPSET_MAXNAMELEN]; - ip_set_id_t index; -}; - -#define IP_SET_OP_GET_BYNAME 0x00000006 /* Get set index by name */ -struct ip_set_req_get_set { - unsigned int op; - unsigned int version; - union ip_set_name_index set; -}; - -#define IP_SET_OP_GET_BYINDEX 0x00000007 /* Get set name by index */ -/* Uses ip_set_req_get_set */ - -#define IP_SET_OP_VERSION 0x00000100 /* Ask kernel version */ -struct ip_set_req_version { - unsigned int op; - unsigned int version; -}; - #endif /*_IP_SET_H */ diff --git a/include/linux/netfilter/ipset/ip_set_bitmap.h b/include/linux/netfilter/ipset/ip_set_bitmap.h index 61a9e8746c8..1a30646d5be 100644 --- a/include/linux/netfilter/ipset/ip_set_bitmap.h +++ b/include/linux/netfilter/ipset/ip_set_bitmap.h @@ -1,15 +1,8 @@ #ifndef __IP_SET_BITMAP_H #define __IP_SET_BITMAP_H -/* Bitmap type specific error codes */ -enum { - /* The element is out of the range of the set */ - IPSET_ERR_BITMAP_RANGE = IPSET_ERR_TYPE_SPECIFIC, - /* The range exceeds the size limit of the set type */ - IPSET_ERR_BITMAP_RANGE_SIZE, -}; +#include -#ifdef __KERNEL__ #define IPSET_BITMAP_MAX_RANGE 0x0000FFFF /* Common functions */ @@ -26,6 +19,4 @@ range_to_mask(u32 from, u32 to, u8 *bits) return mask; } -#endif /* __KERNEL__ */ - #endif /* __IP_SET_BITMAP_H */ diff --git a/include/linux/netfilter/ipset/ip_set_hash.h b/include/linux/netfilter/ipset/ip_set_hash.h index e2a9fae767f..f98ddfb094c 100644 --- a/include/linux/netfilter/ipset/ip_set_hash.h +++ b/include/linux/netfilter/ipset/ip_set_hash.h @@ -1,23 +1,8 @@ #ifndef __IP_SET_HASH_H #define __IP_SET_HASH_H -/* Hash type specific error codes */ -enum { - /* Hash is full */ - IPSET_ERR_HASH_FULL = IPSET_ERR_TYPE_SPECIFIC, - /* Null-valued element */ - IPSET_ERR_HASH_ELEM, - /* Invalid protocol */ - IPSET_ERR_INVALID_PROTO, - /* Protocol missing but must be specified */ - IPSET_ERR_MISSING_PROTO, - /* Range not supported */ - IPSET_ERR_HASH_RANGE_UNSUPPORTED, - /* Invalid range */ - IPSET_ERR_HASH_RANGE, -}; +#include -#ifdef __KERNEL__ #define IPSET_DEFAULT_HASHSIZE 1024 #define IPSET_MIMINAL_HASHSIZE 64 @@ -25,6 +10,4 @@ enum { #define IPSET_DEFAULT_PROBES 4 #define IPSET_DEFAULT_RESIZE 100 -#endif /* __KERNEL__ */ - #endif /* __IP_SET_HASH_H */ diff --git a/include/linux/netfilter/ipset/ip_set_list.h b/include/linux/netfilter/ipset/ip_set_list.h index 40a63f30261..68c2aea897f 100644 --- a/include/linux/netfilter/ipset/ip_set_list.h +++ b/include/linux/netfilter/ipset/ip_set_list.h @@ -1,27 +1,10 @@ #ifndef __IP_SET_LIST_H #define __IP_SET_LIST_H -/* List type specific error codes */ -enum { - /* Set name to be added/deleted/tested does not exist. */ - IPSET_ERR_NAME = IPSET_ERR_TYPE_SPECIFIC, - /* list:set type is not permitted to add */ - IPSET_ERR_LOOP, - /* Missing reference set */ - IPSET_ERR_BEFORE, - /* Reference set does not exist */ - IPSET_ERR_NAMEREF, - /* Set is full */ - IPSET_ERR_LIST_FULL, - /* Reference set is not added to the set */ - IPSET_ERR_REF_EXIST, -}; +#include -#ifdef __KERNEL__ #define IP_SET_LIST_DEFAULT_SIZE 8 #define IP_SET_LIST_MIN_SIZE 4 -#endif /* __KERNEL__ */ - #endif /* __IP_SET_LIST_H */ diff --git a/include/uapi/linux/netfilter/ipset/Kbuild b/include/uapi/linux/netfilter/ipset/Kbuild index aafaa5aa54d..d2680423d9a 100644 --- a/include/uapi/linux/netfilter/ipset/Kbuild +++ b/include/uapi/linux/netfilter/ipset/Kbuild @@ -1 +1,5 @@ # UAPI Header export list +header-y += ip_set.h +header-y += ip_set_bitmap.h +header-y += ip_set_hash.h +header-y += ip_set_list.h diff --git a/include/uapi/linux/netfilter/ipset/ip_set.h b/include/uapi/linux/netfilter/ipset/ip_set.h new file mode 100644 index 00000000000..fbee42807a1 --- /dev/null +++ b/include/uapi/linux/netfilter/ipset/ip_set.h @@ -0,0 +1,231 @@ +/* Copyright (C) 2000-2002 Joakim Axelsson + * Patrick Schaaf + * Martin Josefsson + * Copyright (C) 2003-2011 Jozsef Kadlecsik + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef _UAPI_IP_SET_H +#define _UAPI_IP_SET_H + + +#include + +/* The protocol version */ +#define IPSET_PROTOCOL 6 + +/* The max length of strings including NUL: set and type identifiers */ +#define IPSET_MAXNAMELEN 32 + +/* Message types and commands */ +enum ipset_cmd { + IPSET_CMD_NONE, + IPSET_CMD_PROTOCOL, /* 1: Return protocol version */ + IPSET_CMD_CREATE, /* 2: Create a new (empty) set */ + IPSET_CMD_DESTROY, /* 3: Destroy a (empty) set */ + IPSET_CMD_FLUSH, /* 4: Remove all elements from a set */ + IPSET_CMD_RENAME, /* 5: Rename a set */ + IPSET_CMD_SWAP, /* 6: Swap two sets */ + IPSET_CMD_LIST, /* 7: List sets */ + IPSET_CMD_SAVE, /* 8: Save sets */ + IPSET_CMD_ADD, /* 9: Add an element to a set */ + IPSET_CMD_DEL, /* 10: Delete an element from a set */ + IPSET_CMD_TEST, /* 11: Test an element in a set */ + IPSET_CMD_HEADER, /* 12: Get set header data only */ + IPSET_CMD_TYPE, /* 13: Get set type */ + IPSET_MSG_MAX, /* Netlink message commands */ + + /* Commands in userspace: */ + IPSET_CMD_RESTORE = IPSET_MSG_MAX, /* 14: Enter restore mode */ + IPSET_CMD_HELP, /* 15: Get help */ + IPSET_CMD_VERSION, /* 16: Get program version */ + IPSET_CMD_QUIT, /* 17: Quit from interactive mode */ + + IPSET_CMD_MAX, + + IPSET_CMD_COMMIT = IPSET_CMD_MAX, /* 18: Commit buffered commands */ +}; + +/* Attributes at command level */ +enum { + IPSET_ATTR_UNSPEC, + IPSET_ATTR_PROTOCOL, /* 1: Protocol version */ + IPSET_ATTR_SETNAME, /* 2: Name of the set */ + IPSET_ATTR_TYPENAME, /* 3: Typename */ + IPSET_ATTR_SETNAME2 = IPSET_ATTR_TYPENAME, /* Setname at rename/swap */ + IPSET_ATTR_REVISION, /* 4: Settype revision */ + IPSET_ATTR_FAMILY, /* 5: Settype family */ + IPSET_ATTR_FLAGS, /* 6: Flags at command level */ + IPSET_ATTR_DATA, /* 7: Nested attributes */ + IPSET_ATTR_ADT, /* 8: Multiple data containers */ + IPSET_ATTR_LINENO, /* 9: Restore lineno */ + IPSET_ATTR_PROTOCOL_MIN, /* 10: Minimal supported version number */ + IPSET_ATTR_REVISION_MIN = IPSET_ATTR_PROTOCOL_MIN, /* type rev min */ + __IPSET_ATTR_CMD_MAX, +}; +#define IPSET_ATTR_CMD_MAX (__IPSET_ATTR_CMD_MAX - 1) + +/* CADT specific attributes */ +enum { + IPSET_ATTR_IP = IPSET_ATTR_UNSPEC + 1, + IPSET_ATTR_IP_FROM = IPSET_ATTR_IP, + IPSET_ATTR_IP_TO, /* 2 */ + IPSET_ATTR_CIDR, /* 3 */ + IPSET_ATTR_PORT, /* 4 */ + IPSET_ATTR_PORT_FROM = IPSET_ATTR_PORT, + IPSET_ATTR_PORT_TO, /* 5 */ + IPSET_ATTR_TIMEOUT, /* 6 */ + IPSET_ATTR_PROTO, /* 7 */ + IPSET_ATTR_CADT_FLAGS, /* 8 */ + IPSET_ATTR_CADT_LINENO = IPSET_ATTR_LINENO, /* 9 */ + /* Reserve empty slots */ + IPSET_ATTR_CADT_MAX = 16, + /* Create-only specific attributes */ + IPSET_ATTR_GC, + IPSET_ATTR_HASHSIZE, + IPSET_ATTR_MAXELEM, + IPSET_ATTR_NETMASK, + IPSET_ATTR_PROBES, + IPSET_ATTR_RESIZE, + IPSET_ATTR_SIZE, + /* Kernel-only */ + IPSET_ATTR_ELEMENTS, + IPSET_ATTR_REFERENCES, + IPSET_ATTR_MEMSIZE, + + __IPSET_ATTR_CREATE_MAX, +}; +#define IPSET_ATTR_CREATE_MAX (__IPSET_ATTR_CREATE_MAX - 1) + +/* ADT specific attributes */ +enum { + IPSET_ATTR_ETHER = IPSET_ATTR_CADT_MAX + 1, + IPSET_ATTR_NAME, + IPSET_ATTR_NAMEREF, + IPSET_ATTR_IP2, + IPSET_ATTR_CIDR2, + IPSET_ATTR_IP2_TO, + IPSET_ATTR_IFACE, + __IPSET_ATTR_ADT_MAX, +}; +#define IPSET_ATTR_ADT_MAX (__IPSET_ATTR_ADT_MAX - 1) + +/* IP specific attributes */ +enum { + IPSET_ATTR_IPADDR_IPV4 = IPSET_ATTR_UNSPEC + 1, + IPSET_ATTR_IPADDR_IPV6, + __IPSET_ATTR_IPADDR_MAX, +}; +#define IPSET_ATTR_IPADDR_MAX (__IPSET_ATTR_IPADDR_MAX - 1) + +/* Error codes */ +enum ipset_errno { + IPSET_ERR_PRIVATE = 4096, + IPSET_ERR_PROTOCOL, + IPSET_ERR_FIND_TYPE, + IPSET_ERR_MAX_SETS, + IPSET_ERR_BUSY, + IPSET_ERR_EXIST_SETNAME2, + IPSET_ERR_TYPE_MISMATCH, + IPSET_ERR_EXIST, + IPSET_ERR_INVALID_CIDR, + IPSET_ERR_INVALID_NETMASK, + IPSET_ERR_INVALID_FAMILY, + IPSET_ERR_TIMEOUT, + IPSET_ERR_REFERENCED, + IPSET_ERR_IPADDR_IPV4, + IPSET_ERR_IPADDR_IPV6, + + /* Type specific error codes */ + IPSET_ERR_TYPE_SPECIFIC = 4352, +}; + +/* Flags at command level */ +enum ipset_cmd_flags { + IPSET_FLAG_BIT_EXIST = 0, + IPSET_FLAG_EXIST = (1 << IPSET_FLAG_BIT_EXIST), + IPSET_FLAG_BIT_LIST_SETNAME = 1, + IPSET_FLAG_LIST_SETNAME = (1 << IPSET_FLAG_BIT_LIST_SETNAME), + IPSET_FLAG_BIT_LIST_HEADER = 2, + IPSET_FLAG_LIST_HEADER = (1 << IPSET_FLAG_BIT_LIST_HEADER), + IPSET_FLAG_CMD_MAX = 15, /* Lower half */ +}; + +/* Flags at CADT attribute level */ +enum ipset_cadt_flags { + IPSET_FLAG_BIT_BEFORE = 0, + IPSET_FLAG_BEFORE = (1 << IPSET_FLAG_BIT_BEFORE), + IPSET_FLAG_BIT_PHYSDEV = 1, + IPSET_FLAG_PHYSDEV = (1 << IPSET_FLAG_BIT_PHYSDEV), + IPSET_FLAG_BIT_NOMATCH = 2, + IPSET_FLAG_NOMATCH = (1 << IPSET_FLAG_BIT_NOMATCH), + IPSET_FLAG_CADT_MAX = 15, /* Upper half */ +}; + +/* Commands with settype-specific attributes */ +enum ipset_adt { + IPSET_ADD, + IPSET_DEL, + IPSET_TEST, + IPSET_ADT_MAX, + IPSET_CREATE = IPSET_ADT_MAX, + IPSET_CADT_MAX, +}; + +/* Sets are identified by an index in kernel space. Tweak with ip_set_id_t + * and IPSET_INVALID_ID if you want to increase the max number of sets. + */ +typedef __u16 ip_set_id_t; + +#define IPSET_INVALID_ID 65535 + +enum ip_set_dim { + IPSET_DIM_ZERO = 0, + IPSET_DIM_ONE, + IPSET_DIM_TWO, + IPSET_DIM_THREE, + /* Max dimension in elements. + * If changed, new revision of iptables match/target is required. + */ + IPSET_DIM_MAX = 6, + IPSET_BIT_RETURN_NOMATCH = 7, +}; + +/* Option flags for kernel operations */ +enum ip_set_kopt { + IPSET_INV_MATCH = (1 << IPSET_DIM_ZERO), + IPSET_DIM_ONE_SRC = (1 << IPSET_DIM_ONE), + IPSET_DIM_TWO_SRC = (1 << IPSET_DIM_TWO), + IPSET_DIM_THREE_SRC = (1 << IPSET_DIM_THREE), + IPSET_RETURN_NOMATCH = (1 << IPSET_BIT_RETURN_NOMATCH), +}; + + +/* Interface to iptables/ip6tables */ + +#define SO_IP_SET 83 + +union ip_set_name_index { + char name[IPSET_MAXNAMELEN]; + ip_set_id_t index; +}; + +#define IP_SET_OP_GET_BYNAME 0x00000006 /* Get set index by name */ +struct ip_set_req_get_set { + unsigned int op; + unsigned int version; + union ip_set_name_index set; +}; + +#define IP_SET_OP_GET_BYINDEX 0x00000007 /* Get set name by index */ +/* Uses ip_set_req_get_set */ + +#define IP_SET_OP_VERSION 0x00000100 /* Ask kernel version */ +struct ip_set_req_version { + unsigned int op; + unsigned int version; +}; + +#endif /* _UAPI_IP_SET_H */ diff --git a/include/uapi/linux/netfilter/ipset/ip_set_bitmap.h b/include/uapi/linux/netfilter/ipset/ip_set_bitmap.h new file mode 100644 index 00000000000..6a2c038d188 --- /dev/null +++ b/include/uapi/linux/netfilter/ipset/ip_set_bitmap.h @@ -0,0 +1,13 @@ +#ifndef _UAPI__IP_SET_BITMAP_H +#define _UAPI__IP_SET_BITMAP_H + +/* Bitmap type specific error codes */ +enum { + /* The element is out of the range of the set */ + IPSET_ERR_BITMAP_RANGE = IPSET_ERR_TYPE_SPECIFIC, + /* The range exceeds the size limit of the set type */ + IPSET_ERR_BITMAP_RANGE_SIZE, +}; + + +#endif /* _UAPI__IP_SET_BITMAP_H */ diff --git a/include/uapi/linux/netfilter/ipset/ip_set_hash.h b/include/uapi/linux/netfilter/ipset/ip_set_hash.h new file mode 100644 index 00000000000..352eeccdc7f --- /dev/null +++ b/include/uapi/linux/netfilter/ipset/ip_set_hash.h @@ -0,0 +1,21 @@ +#ifndef _UAPI__IP_SET_HASH_H +#define _UAPI__IP_SET_HASH_H + +/* Hash type specific error codes */ +enum { + /* Hash is full */ + IPSET_ERR_HASH_FULL = IPSET_ERR_TYPE_SPECIFIC, + /* Null-valued element */ + IPSET_ERR_HASH_ELEM, + /* Invalid protocol */ + IPSET_ERR_INVALID_PROTO, + /* Protocol missing but must be specified */ + IPSET_ERR_MISSING_PROTO, + /* Range not supported */ + IPSET_ERR_HASH_RANGE_UNSUPPORTED, + /* Invalid range */ + IPSET_ERR_HASH_RANGE, +}; + + +#endif /* _UAPI__IP_SET_HASH_H */ diff --git a/include/uapi/linux/netfilter/ipset/ip_set_list.h b/include/uapi/linux/netfilter/ipset/ip_set_list.h new file mode 100644 index 00000000000..a44efaa9821 --- /dev/null +++ b/include/uapi/linux/netfilter/ipset/ip_set_list.h @@ -0,0 +1,21 @@ +#ifndef _UAPI__IP_SET_LIST_H +#define _UAPI__IP_SET_LIST_H + +/* List type specific error codes */ +enum { + /* Set name to be added/deleted/tested does not exist. */ + IPSET_ERR_NAME = IPSET_ERR_TYPE_SPECIFIC, + /* list:set type is not permitted to add */ + IPSET_ERR_LOOP, + /* Missing reference set */ + IPSET_ERR_BEFORE, + /* Reference set does not exist */ + IPSET_ERR_NAMEREF, + /* Set is full */ + IPSET_ERR_LIST_FULL, + /* Reference set is not added to the set */ + IPSET_ERR_REF_EXIST, +}; + + +#endif /* _UAPI__IP_SET_LIST_H */ From 8922082ae6cd2783789e83ae9c67ffcbe5a2f4e1 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 9 Oct 2012 09:48:56 +0100 Subject: [PATCH 48/61] UAPI: (Scripted) Disintegrate include/linux/netfilter_arp Signed-off-by: David Howells Acked-by: Arnd Bergmann Acked-by: Thomas Gleixner Acked-by: Michael Kerrisk Acked-by: Paul E. McKenney Acked-by: Dave Jones --- include/linux/netfilter_arp/Kbuild | 2 - include/linux/netfilter_arp/arp_tables.h | 200 +---------------- include/uapi/linux/netfilter_arp/Kbuild | 2 + include/uapi/linux/netfilter_arp/arp_tables.h | 206 ++++++++++++++++++ .../linux/netfilter_arp/arpt_mangle.h | 0 5 files changed, 209 insertions(+), 201 deletions(-) create mode 100644 include/uapi/linux/netfilter_arp/arp_tables.h rename include/{ => uapi}/linux/netfilter_arp/arpt_mangle.h (100%) diff --git a/include/linux/netfilter_arp/Kbuild b/include/linux/netfilter_arp/Kbuild index b27439c7103..e69de29bb2d 100644 --- a/include/linux/netfilter_arp/Kbuild +++ b/include/linux/netfilter_arp/Kbuild @@ -1,2 +0,0 @@ -header-y += arp_tables.h -header-y += arpt_mangle.h diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h index e08565d4517..cfb7191e6ef 100644 --- a/include/linux/netfilter_arp/arp_tables.h +++ b/include/linux/netfilter_arp/arp_tables.h @@ -5,211 +5,14 @@ * network byte order. * flags are stored in host byte order (of course). */ - #ifndef _ARPTABLES_H #define _ARPTABLES_H -#ifdef __KERNEL__ #include #include #include #include -#endif -#include -#include -#include - -#include - -#ifndef __KERNEL__ -#define ARPT_FUNCTION_MAXNAMELEN XT_FUNCTION_MAXNAMELEN -#define ARPT_TABLE_MAXNAMELEN XT_TABLE_MAXNAMELEN -#define arpt_entry_target xt_entry_target -#define arpt_standard_target xt_standard_target -#define arpt_error_target xt_error_target -#define ARPT_CONTINUE XT_CONTINUE -#define ARPT_RETURN XT_RETURN -#define arpt_counters_info xt_counters_info -#define arpt_counters xt_counters -#define ARPT_STANDARD_TARGET XT_STANDARD_TARGET -#define ARPT_ERROR_TARGET XT_ERROR_TARGET -#define ARPT_ENTRY_ITERATE(entries, size, fn, args...) \ - XT_ENTRY_ITERATE(struct arpt_entry, entries, size, fn, ## args) -#endif - -#define ARPT_DEV_ADDR_LEN_MAX 16 - -struct arpt_devaddr_info { - char addr[ARPT_DEV_ADDR_LEN_MAX]; - char mask[ARPT_DEV_ADDR_LEN_MAX]; -}; - -/* Yes, Virginia, you have to zero the padding. */ -struct arpt_arp { - /* Source and target IP addr */ - struct in_addr src, tgt; - /* Mask for src and target IP addr */ - struct in_addr smsk, tmsk; - - /* Device hw address length, src+target device addresses */ - __u8 arhln, arhln_mask; - struct arpt_devaddr_info src_devaddr; - struct arpt_devaddr_info tgt_devaddr; - - /* ARP operation code. */ - __be16 arpop, arpop_mask; - - /* ARP hardware address and protocol address format. */ - __be16 arhrd, arhrd_mask; - __be16 arpro, arpro_mask; - - /* The protocol address length is only accepted if it is 4 - * so there is no use in offering a way to do filtering on it. - */ - - char iniface[IFNAMSIZ], outiface[IFNAMSIZ]; - unsigned char iniface_mask[IFNAMSIZ], outiface_mask[IFNAMSIZ]; - - /* Flags word */ - __u8 flags; - /* Inverse flags */ - __u16 invflags; -}; - -/* Values for "flag" field in struct arpt_ip (general arp structure). - * No flags defined yet. - */ -#define ARPT_F_MASK 0x00 /* All possible flag bits mask. */ - -/* Values for "inv" field in struct arpt_arp. */ -#define ARPT_INV_VIA_IN 0x0001 /* Invert the sense of IN IFACE. */ -#define ARPT_INV_VIA_OUT 0x0002 /* Invert the sense of OUT IFACE */ -#define ARPT_INV_SRCIP 0x0004 /* Invert the sense of SRC IP. */ -#define ARPT_INV_TGTIP 0x0008 /* Invert the sense of TGT IP. */ -#define ARPT_INV_SRCDEVADDR 0x0010 /* Invert the sense of SRC DEV ADDR. */ -#define ARPT_INV_TGTDEVADDR 0x0020 /* Invert the sense of TGT DEV ADDR. */ -#define ARPT_INV_ARPOP 0x0040 /* Invert the sense of ARP OP. */ -#define ARPT_INV_ARPHRD 0x0080 /* Invert the sense of ARP HRD. */ -#define ARPT_INV_ARPPRO 0x0100 /* Invert the sense of ARP PRO. */ -#define ARPT_INV_ARPHLN 0x0200 /* Invert the sense of ARP HLN. */ -#define ARPT_INV_MASK 0x03FF /* All possible flag bits mask. */ - -/* This structure defines each of the firewall rules. Consists of 3 - parts which are 1) general ARP header stuff 2) match specific - stuff 3) the target to perform if the rule matches */ -struct arpt_entry -{ - struct arpt_arp arp; - - /* Size of arpt_entry + matches */ - __u16 target_offset; - /* Size of arpt_entry + matches + target */ - __u16 next_offset; - - /* Back pointer */ - unsigned int comefrom; - - /* Packet and byte counters. */ - struct xt_counters counters; - - /* The matches (if any), then the target. */ - unsigned char elems[0]; -}; - -/* - * New IP firewall options for [gs]etsockopt at the RAW IP level. - * Unlike BSD Linux inherits IP options so you don't have to use a raw - * socket for this. Instead we check rights in the calls. - * - * ATTENTION: check linux/in.h before adding new number here. - */ -#define ARPT_BASE_CTL 96 - -#define ARPT_SO_SET_REPLACE (ARPT_BASE_CTL) -#define ARPT_SO_SET_ADD_COUNTERS (ARPT_BASE_CTL + 1) -#define ARPT_SO_SET_MAX ARPT_SO_SET_ADD_COUNTERS - -#define ARPT_SO_GET_INFO (ARPT_BASE_CTL) -#define ARPT_SO_GET_ENTRIES (ARPT_BASE_CTL + 1) -/* #define ARPT_SO_GET_REVISION_MATCH (APRT_BASE_CTL + 2) */ -#define ARPT_SO_GET_REVISION_TARGET (ARPT_BASE_CTL + 3) -#define ARPT_SO_GET_MAX (ARPT_SO_GET_REVISION_TARGET) - -/* The argument to ARPT_SO_GET_INFO */ -struct arpt_getinfo { - /* Which table: caller fills this in. */ - char name[XT_TABLE_MAXNAMELEN]; - - /* Kernel fills these in. */ - /* Which hook entry points are valid: bitmask */ - unsigned int valid_hooks; - - /* Hook entry points: one per netfilter hook. */ - unsigned int hook_entry[NF_ARP_NUMHOOKS]; - - /* Underflow points. */ - unsigned int underflow[NF_ARP_NUMHOOKS]; - - /* Number of entries */ - unsigned int num_entries; - - /* Size of entries. */ - unsigned int size; -}; - -/* The argument to ARPT_SO_SET_REPLACE. */ -struct arpt_replace { - /* Which table. */ - char name[XT_TABLE_MAXNAMELEN]; - - /* Which hook entry points are valid: bitmask. You can't - change this. */ - unsigned int valid_hooks; - - /* Number of entries */ - unsigned int num_entries; - - /* Total size of new entries */ - unsigned int size; - - /* Hook entry points. */ - unsigned int hook_entry[NF_ARP_NUMHOOKS]; - - /* Underflow points. */ - unsigned int underflow[NF_ARP_NUMHOOKS]; - - /* Information about old entries: */ - /* Number of counters (must be equal to current number of entries). */ - unsigned int num_counters; - /* The old entries' counters. */ - struct xt_counters __user *counters; - - /* The entries (hang off end: not really an array). */ - struct arpt_entry entries[0]; -}; - -/* The argument to ARPT_SO_GET_ENTRIES. */ -struct arpt_get_entries { - /* Which table: user fills this in. */ - char name[XT_TABLE_MAXNAMELEN]; - - /* User fills this in: total entry size. */ - unsigned int size; - - /* The entries. */ - struct arpt_entry entrytable[0]; -}; - -/* Helper functions */ -static __inline__ struct xt_entry_target *arpt_get_target(struct arpt_entry *e) -{ - return (void *)e + e->target_offset; -} - -/* - * Main firewall chains definitions and global var's definitions. - */ -#ifdef __KERNEL__ +#include /* Standard entry. */ struct arpt_standard { @@ -274,5 +77,4 @@ compat_arpt_get_target(struct compat_arpt_entry *e) } #endif /* CONFIG_COMPAT */ -#endif /*__KERNEL__*/ #endif /* _ARPTABLES_H */ diff --git a/include/uapi/linux/netfilter_arp/Kbuild b/include/uapi/linux/netfilter_arp/Kbuild index aafaa5aa54d..62d5637cc0a 100644 --- a/include/uapi/linux/netfilter_arp/Kbuild +++ b/include/uapi/linux/netfilter_arp/Kbuild @@ -1 +1,3 @@ # UAPI Header export list +header-y += arp_tables.h +header-y += arpt_mangle.h diff --git a/include/uapi/linux/netfilter_arp/arp_tables.h b/include/uapi/linux/netfilter_arp/arp_tables.h new file mode 100644 index 00000000000..a5a86a4db6b --- /dev/null +++ b/include/uapi/linux/netfilter_arp/arp_tables.h @@ -0,0 +1,206 @@ +/* + * Format of an ARP firewall descriptor + * + * src, tgt, src_mask, tgt_mask, arpop, arpop_mask are always stored in + * network byte order. + * flags are stored in host byte order (of course). + */ + +#ifndef _UAPI_ARPTABLES_H +#define _UAPI_ARPTABLES_H + +#include +#include +#include + +#include + +#ifndef __KERNEL__ +#define ARPT_FUNCTION_MAXNAMELEN XT_FUNCTION_MAXNAMELEN +#define ARPT_TABLE_MAXNAMELEN XT_TABLE_MAXNAMELEN +#define arpt_entry_target xt_entry_target +#define arpt_standard_target xt_standard_target +#define arpt_error_target xt_error_target +#define ARPT_CONTINUE XT_CONTINUE +#define ARPT_RETURN XT_RETURN +#define arpt_counters_info xt_counters_info +#define arpt_counters xt_counters +#define ARPT_STANDARD_TARGET XT_STANDARD_TARGET +#define ARPT_ERROR_TARGET XT_ERROR_TARGET +#define ARPT_ENTRY_ITERATE(entries, size, fn, args...) \ + XT_ENTRY_ITERATE(struct arpt_entry, entries, size, fn, ## args) +#endif + +#define ARPT_DEV_ADDR_LEN_MAX 16 + +struct arpt_devaddr_info { + char addr[ARPT_DEV_ADDR_LEN_MAX]; + char mask[ARPT_DEV_ADDR_LEN_MAX]; +}; + +/* Yes, Virginia, you have to zero the padding. */ +struct arpt_arp { + /* Source and target IP addr */ + struct in_addr src, tgt; + /* Mask for src and target IP addr */ + struct in_addr smsk, tmsk; + + /* Device hw address length, src+target device addresses */ + __u8 arhln, arhln_mask; + struct arpt_devaddr_info src_devaddr; + struct arpt_devaddr_info tgt_devaddr; + + /* ARP operation code. */ + __be16 arpop, arpop_mask; + + /* ARP hardware address and protocol address format. */ + __be16 arhrd, arhrd_mask; + __be16 arpro, arpro_mask; + + /* The protocol address length is only accepted if it is 4 + * so there is no use in offering a way to do filtering on it. + */ + + char iniface[IFNAMSIZ], outiface[IFNAMSIZ]; + unsigned char iniface_mask[IFNAMSIZ], outiface_mask[IFNAMSIZ]; + + /* Flags word */ + __u8 flags; + /* Inverse flags */ + __u16 invflags; +}; + +/* Values for "flag" field in struct arpt_ip (general arp structure). + * No flags defined yet. + */ +#define ARPT_F_MASK 0x00 /* All possible flag bits mask. */ + +/* Values for "inv" field in struct arpt_arp. */ +#define ARPT_INV_VIA_IN 0x0001 /* Invert the sense of IN IFACE. */ +#define ARPT_INV_VIA_OUT 0x0002 /* Invert the sense of OUT IFACE */ +#define ARPT_INV_SRCIP 0x0004 /* Invert the sense of SRC IP. */ +#define ARPT_INV_TGTIP 0x0008 /* Invert the sense of TGT IP. */ +#define ARPT_INV_SRCDEVADDR 0x0010 /* Invert the sense of SRC DEV ADDR. */ +#define ARPT_INV_TGTDEVADDR 0x0020 /* Invert the sense of TGT DEV ADDR. */ +#define ARPT_INV_ARPOP 0x0040 /* Invert the sense of ARP OP. */ +#define ARPT_INV_ARPHRD 0x0080 /* Invert the sense of ARP HRD. */ +#define ARPT_INV_ARPPRO 0x0100 /* Invert the sense of ARP PRO. */ +#define ARPT_INV_ARPHLN 0x0200 /* Invert the sense of ARP HLN. */ +#define ARPT_INV_MASK 0x03FF /* All possible flag bits mask. */ + +/* This structure defines each of the firewall rules. Consists of 3 + parts which are 1) general ARP header stuff 2) match specific + stuff 3) the target to perform if the rule matches */ +struct arpt_entry +{ + struct arpt_arp arp; + + /* Size of arpt_entry + matches */ + __u16 target_offset; + /* Size of arpt_entry + matches + target */ + __u16 next_offset; + + /* Back pointer */ + unsigned int comefrom; + + /* Packet and byte counters. */ + struct xt_counters counters; + + /* The matches (if any), then the target. */ + unsigned char elems[0]; +}; + +/* + * New IP firewall options for [gs]etsockopt at the RAW IP level. + * Unlike BSD Linux inherits IP options so you don't have to use a raw + * socket for this. Instead we check rights in the calls. + * + * ATTENTION: check linux/in.h before adding new number here. + */ +#define ARPT_BASE_CTL 96 + +#define ARPT_SO_SET_REPLACE (ARPT_BASE_CTL) +#define ARPT_SO_SET_ADD_COUNTERS (ARPT_BASE_CTL + 1) +#define ARPT_SO_SET_MAX ARPT_SO_SET_ADD_COUNTERS + +#define ARPT_SO_GET_INFO (ARPT_BASE_CTL) +#define ARPT_SO_GET_ENTRIES (ARPT_BASE_CTL + 1) +/* #define ARPT_SO_GET_REVISION_MATCH (APRT_BASE_CTL + 2) */ +#define ARPT_SO_GET_REVISION_TARGET (ARPT_BASE_CTL + 3) +#define ARPT_SO_GET_MAX (ARPT_SO_GET_REVISION_TARGET) + +/* The argument to ARPT_SO_GET_INFO */ +struct arpt_getinfo { + /* Which table: caller fills this in. */ + char name[XT_TABLE_MAXNAMELEN]; + + /* Kernel fills these in. */ + /* Which hook entry points are valid: bitmask */ + unsigned int valid_hooks; + + /* Hook entry points: one per netfilter hook. */ + unsigned int hook_entry[NF_ARP_NUMHOOKS]; + + /* Underflow points. */ + unsigned int underflow[NF_ARP_NUMHOOKS]; + + /* Number of entries */ + unsigned int num_entries; + + /* Size of entries. */ + unsigned int size; +}; + +/* The argument to ARPT_SO_SET_REPLACE. */ +struct arpt_replace { + /* Which table. */ + char name[XT_TABLE_MAXNAMELEN]; + + /* Which hook entry points are valid: bitmask. You can't + change this. */ + unsigned int valid_hooks; + + /* Number of entries */ + unsigned int num_entries; + + /* Total size of new entries */ + unsigned int size; + + /* Hook entry points. */ + unsigned int hook_entry[NF_ARP_NUMHOOKS]; + + /* Underflow points. */ + unsigned int underflow[NF_ARP_NUMHOOKS]; + + /* Information about old entries: */ + /* Number of counters (must be equal to current number of entries). */ + unsigned int num_counters; + /* The old entries' counters. */ + struct xt_counters __user *counters; + + /* The entries (hang off end: not really an array). */ + struct arpt_entry entries[0]; +}; + +/* The argument to ARPT_SO_GET_ENTRIES. */ +struct arpt_get_entries { + /* Which table: user fills this in. */ + char name[XT_TABLE_MAXNAMELEN]; + + /* User fills this in: total entry size. */ + unsigned int size; + + /* The entries. */ + struct arpt_entry entrytable[0]; +}; + +/* Helper functions */ +static __inline__ struct xt_entry_target *arpt_get_target(struct arpt_entry *e) +{ + return (void *)e + e->target_offset; +} + +/* + * Main firewall chains definitions and global var's definitions. + */ +#endif /* _UAPI_ARPTABLES_H */ diff --git a/include/linux/netfilter_arp/arpt_mangle.h b/include/uapi/linux/netfilter_arp/arpt_mangle.h similarity index 100% rename from include/linux/netfilter_arp/arpt_mangle.h rename to include/uapi/linux/netfilter_arp/arpt_mangle.h From 55c5cd3cc179eb87faa9cc2d9741047dd1642aaf Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 9 Oct 2012 09:48:58 +0100 Subject: [PATCH 49/61] UAPI: (Scripted) Disintegrate include/linux/netfilter_bridge Signed-off-by: David Howells Acked-by: Arnd Bergmann Acked-by: Thomas Gleixner Acked-by: Michael Kerrisk Acked-by: Paul E. McKenney Acked-by: Dave Jones --- include/linux/netfilter_bridge/Kbuild | 18 -- include/linux/netfilter_bridge/ebt_802_3.h | 61 +--- include/linux/netfilter_bridge/ebtables.h | 255 +---------------- include/uapi/linux/netfilter_bridge/Kbuild | 18 ++ .../uapi/linux/netfilter_bridge/ebt_802_3.h | 62 ++++ .../linux/netfilter_bridge/ebt_among.h | 0 .../linux/netfilter_bridge/ebt_arp.h | 0 .../linux/netfilter_bridge/ebt_arpreply.h | 0 .../linux/netfilter_bridge/ebt_ip.h | 0 .../linux/netfilter_bridge/ebt_ip6.h | 0 .../linux/netfilter_bridge/ebt_limit.h | 0 .../linux/netfilter_bridge/ebt_log.h | 0 .../linux/netfilter_bridge/ebt_mark_m.h | 0 .../linux/netfilter_bridge/ebt_mark_t.h | 0 .../linux/netfilter_bridge/ebt_nat.h | 0 .../linux/netfilter_bridge/ebt_nflog.h | 0 .../linux/netfilter_bridge/ebt_pkttype.h | 0 .../linux/netfilter_bridge/ebt_redirect.h | 0 .../linux/netfilter_bridge/ebt_stp.h | 0 .../linux/netfilter_bridge/ebt_ulog.h | 0 .../linux/netfilter_bridge/ebt_vlan.h | 0 .../uapi/linux/netfilter_bridge/ebtables.h | 268 ++++++++++++++++++ 22 files changed, 350 insertions(+), 332 deletions(-) create mode 100644 include/uapi/linux/netfilter_bridge/ebt_802_3.h rename include/{ => uapi}/linux/netfilter_bridge/ebt_among.h (100%) rename include/{ => uapi}/linux/netfilter_bridge/ebt_arp.h (100%) rename include/{ => uapi}/linux/netfilter_bridge/ebt_arpreply.h (100%) rename include/{ => uapi}/linux/netfilter_bridge/ebt_ip.h (100%) rename include/{ => uapi}/linux/netfilter_bridge/ebt_ip6.h (100%) rename include/{ => uapi}/linux/netfilter_bridge/ebt_limit.h (100%) rename include/{ => uapi}/linux/netfilter_bridge/ebt_log.h (100%) rename include/{ => uapi}/linux/netfilter_bridge/ebt_mark_m.h (100%) rename include/{ => uapi}/linux/netfilter_bridge/ebt_mark_t.h (100%) rename include/{ => uapi}/linux/netfilter_bridge/ebt_nat.h (100%) rename include/{ => uapi}/linux/netfilter_bridge/ebt_nflog.h (100%) rename include/{ => uapi}/linux/netfilter_bridge/ebt_pkttype.h (100%) rename include/{ => uapi}/linux/netfilter_bridge/ebt_redirect.h (100%) rename include/{ => uapi}/linux/netfilter_bridge/ebt_stp.h (100%) rename include/{ => uapi}/linux/netfilter_bridge/ebt_ulog.h (100%) rename include/{ => uapi}/linux/netfilter_bridge/ebt_vlan.h (100%) create mode 100644 include/uapi/linux/netfilter_bridge/ebtables.h diff --git a/include/linux/netfilter_bridge/Kbuild b/include/linux/netfilter_bridge/Kbuild index e48f1a3f5a4..e69de29bb2d 100644 --- a/include/linux/netfilter_bridge/Kbuild +++ b/include/linux/netfilter_bridge/Kbuild @@ -1,18 +0,0 @@ -header-y += ebt_802_3.h -header-y += ebt_among.h -header-y += ebt_arp.h -header-y += ebt_arpreply.h -header-y += ebt_ip.h -header-y += ebt_ip6.h -header-y += ebt_limit.h -header-y += ebt_log.h -header-y += ebt_mark_m.h -header-y += ebt_mark_t.h -header-y += ebt_nat.h -header-y += ebt_nflog.h -header-y += ebt_pkttype.h -header-y += ebt_redirect.h -header-y += ebt_stp.h -header-y += ebt_ulog.h -header-y += ebt_vlan.h -header-y += ebtables.h diff --git a/include/linux/netfilter_bridge/ebt_802_3.h b/include/linux/netfilter_bridge/ebt_802_3.h index be5be1577a5..e17e8bfb4e8 100644 --- a/include/linux/netfilter_bridge/ebt_802_3.h +++ b/include/linux/netfilter_bridge/ebt_802_3.h @@ -1,70 +1,11 @@ #ifndef __LINUX_BRIDGE_EBT_802_3_H #define __LINUX_BRIDGE_EBT_802_3_H -#include - -#define EBT_802_3_SAP 0x01 -#define EBT_802_3_TYPE 0x02 - -#define EBT_802_3_MATCH "802_3" - -/* - * If frame has DSAP/SSAP value 0xaa you must check the SNAP type - * to discover what kind of packet we're carrying. - */ -#define CHECK_TYPE 0xaa - -/* - * Control field may be one or two bytes. If the first byte has - * the value 0x03 then the entire length is one byte, otherwise it is two. - * One byte controls are used in Unnumbered Information frames. - * Two byte controls are used in Numbered Information frames. - */ -#define IS_UI 0x03 - -#define EBT_802_3_MASK (EBT_802_3_SAP | EBT_802_3_TYPE | EBT_802_3) - -/* ui has one byte ctrl, ni has two */ -struct hdr_ui { - __u8 dsap; - __u8 ssap; - __u8 ctrl; - __u8 orig[3]; - __be16 type; -}; - -struct hdr_ni { - __u8 dsap; - __u8 ssap; - __be16 ctrl; - __u8 orig[3]; - __be16 type; -}; - -struct ebt_802_3_hdr { - __u8 daddr[6]; - __u8 saddr[6]; - __be16 len; - union { - struct hdr_ui ui; - struct hdr_ni ni; - } llc; -}; - -#ifdef __KERNEL__ #include +#include static inline struct ebt_802_3_hdr *ebt_802_3_hdr(const struct sk_buff *skb) { return (struct ebt_802_3_hdr *)skb_mac_header(skb); } #endif - -struct ebt_802_3_info { - __u8 sap; - __be16 type; - __u8 bitmask; - __u8 invflags; -}; - -#endif diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index 4dd5bd6994a..34e7a2b7f86 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -9,191 +9,11 @@ * This code is stongly inspired on the iptables code which is * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling */ - #ifndef __LINUX_BRIDGE_EFF_H #define __LINUX_BRIDGE_EFF_H -#include -#include -#include -#define EBT_TABLE_MAXNAMELEN 32 -#define EBT_CHAIN_MAXNAMELEN EBT_TABLE_MAXNAMELEN -#define EBT_FUNCTION_MAXNAMELEN EBT_TABLE_MAXNAMELEN +#include -/* verdicts >0 are "branches" */ -#define EBT_ACCEPT -1 -#define EBT_DROP -2 -#define EBT_CONTINUE -3 -#define EBT_RETURN -4 -#define NUM_STANDARD_TARGETS 4 -/* ebtables target modules store the verdict inside an int. We can - * reclaim a part of this int for backwards compatible extensions. - * The 4 lsb are more than enough to store the verdict. */ -#define EBT_VERDICT_BITS 0x0000000F - -struct xt_match; -struct xt_target; - -struct ebt_counter { - uint64_t pcnt; - uint64_t bcnt; -}; - -struct ebt_replace { - char name[EBT_TABLE_MAXNAMELEN]; - unsigned int valid_hooks; - /* nr of rules in the table */ - unsigned int nentries; - /* total size of the entries */ - unsigned int entries_size; - /* start of the chains */ - struct ebt_entries __user *hook_entry[NF_BR_NUMHOOKS]; - /* nr of counters userspace expects back */ - unsigned int num_counters; - /* where the kernel will put the old counters */ - struct ebt_counter __user *counters; - char __user *entries; -}; - -struct ebt_replace_kernel { - char name[EBT_TABLE_MAXNAMELEN]; - unsigned int valid_hooks; - /* nr of rules in the table */ - unsigned int nentries; - /* total size of the entries */ - unsigned int entries_size; - /* start of the chains */ - struct ebt_entries *hook_entry[NF_BR_NUMHOOKS]; - /* nr of counters userspace expects back */ - unsigned int num_counters; - /* where the kernel will put the old counters */ - struct ebt_counter *counters; - char *entries; -}; - -struct ebt_entries { - /* this field is always set to zero - * See EBT_ENTRY_OR_ENTRIES. - * Must be same size as ebt_entry.bitmask */ - unsigned int distinguisher; - /* the chain name */ - char name[EBT_CHAIN_MAXNAMELEN]; - /* counter offset for this chain */ - unsigned int counter_offset; - /* one standard (accept, drop, return) per hook */ - int policy; - /* nr. of entries */ - unsigned int nentries; - /* entry list */ - char data[0] __attribute__ ((aligned (__alignof__(struct ebt_replace)))); -}; - -/* used for the bitmask of struct ebt_entry */ - -/* This is a hack to make a difference between an ebt_entry struct and an - * ebt_entries struct when traversing the entries from start to end. - * Using this simplifies the code a lot, while still being able to use - * ebt_entries. - * Contrary, iptables doesn't use something like ebt_entries and therefore uses - * different techniques for naming the policy and such. So, iptables doesn't - * need a hack like this. - */ -#define EBT_ENTRY_OR_ENTRIES 0x01 -/* these are the normal masks */ -#define EBT_NOPROTO 0x02 -#define EBT_802_3 0x04 -#define EBT_SOURCEMAC 0x08 -#define EBT_DESTMAC 0x10 -#define EBT_F_MASK (EBT_NOPROTO | EBT_802_3 | EBT_SOURCEMAC | EBT_DESTMAC \ - | EBT_ENTRY_OR_ENTRIES) - -#define EBT_IPROTO 0x01 -#define EBT_IIN 0x02 -#define EBT_IOUT 0x04 -#define EBT_ISOURCE 0x8 -#define EBT_IDEST 0x10 -#define EBT_ILOGICALIN 0x20 -#define EBT_ILOGICALOUT 0x40 -#define EBT_INV_MASK (EBT_IPROTO | EBT_IIN | EBT_IOUT | EBT_ILOGICALIN \ - | EBT_ILOGICALOUT | EBT_ISOURCE | EBT_IDEST) - -struct ebt_entry_match { - union { - char name[EBT_FUNCTION_MAXNAMELEN]; - struct xt_match *match; - } u; - /* size of data */ - unsigned int match_size; - unsigned char data[0] __attribute__ ((aligned (__alignof__(struct ebt_replace)))); -}; - -struct ebt_entry_watcher { - union { - char name[EBT_FUNCTION_MAXNAMELEN]; - struct xt_target *watcher; - } u; - /* size of data */ - unsigned int watcher_size; - unsigned char data[0] __attribute__ ((aligned (__alignof__(struct ebt_replace)))); -}; - -struct ebt_entry_target { - union { - char name[EBT_FUNCTION_MAXNAMELEN]; - struct xt_target *target; - } u; - /* size of data */ - unsigned int target_size; - unsigned char data[0] __attribute__ ((aligned (__alignof__(struct ebt_replace)))); -}; - -#define EBT_STANDARD_TARGET "standard" -struct ebt_standard_target { - struct ebt_entry_target target; - int verdict; -}; - -/* one entry */ -struct ebt_entry { - /* this needs to be the first field */ - unsigned int bitmask; - unsigned int invflags; - __be16 ethproto; - /* the physical in-dev */ - char in[IFNAMSIZ]; - /* the logical in-dev */ - char logical_in[IFNAMSIZ]; - /* the physical out-dev */ - char out[IFNAMSIZ]; - /* the logical out-dev */ - char logical_out[IFNAMSIZ]; - unsigned char sourcemac[ETH_ALEN]; - unsigned char sourcemsk[ETH_ALEN]; - unsigned char destmac[ETH_ALEN]; - unsigned char destmsk[ETH_ALEN]; - /* sizeof ebt_entry + matches */ - unsigned int watchers_offset; - /* sizeof ebt_entry + matches + watchers */ - unsigned int target_offset; - /* sizeof ebt_entry + matches + watchers + target */ - unsigned int next_offset; - unsigned char elems[0] __attribute__ ((aligned (__alignof__(struct ebt_replace)))); -}; - -/* {g,s}etsockopt numbers */ -#define EBT_BASE_CTL 128 - -#define EBT_SO_SET_ENTRIES (EBT_BASE_CTL) -#define EBT_SO_SET_COUNTERS (EBT_SO_SET_ENTRIES+1) -#define EBT_SO_SET_MAX (EBT_SO_SET_COUNTERS+1) - -#define EBT_SO_GET_INFO (EBT_BASE_CTL) -#define EBT_SO_GET_ENTRIES (EBT_SO_GET_INFO+1) -#define EBT_SO_GET_INIT_INFO (EBT_SO_GET_ENTRIES+1) -#define EBT_SO_GET_INIT_ENTRIES (EBT_SO_GET_INIT_INFO+1) -#define EBT_SO_GET_MAX (EBT_SO_GET_INIT_ENTRIES+1) - -#ifdef __KERNEL__ /* return values for match() functions */ #define EBT_MATCH 0 @@ -304,77 +124,4 @@ extern unsigned int ebt_do_table(unsigned int hook, struct sk_buff *skb, /* True if the target is not a standard target */ #define INVALID_TARGET (info->target < -NUM_STANDARD_TARGETS || info->target >= 0) -#endif /* __KERNEL__ */ - -/* blatently stolen from ip_tables.h - * fn returns 0 to continue iteration */ -#define EBT_MATCH_ITERATE(e, fn, args...) \ -({ \ - unsigned int __i; \ - int __ret = 0; \ - struct ebt_entry_match *__match; \ - \ - for (__i = sizeof(struct ebt_entry); \ - __i < (e)->watchers_offset; \ - __i += __match->match_size + \ - sizeof(struct ebt_entry_match)) { \ - __match = (void *)(e) + __i; \ - \ - __ret = fn(__match , ## args); \ - if (__ret != 0) \ - break; \ - } \ - if (__ret == 0) { \ - if (__i != (e)->watchers_offset) \ - __ret = -EINVAL; \ - } \ - __ret; \ -}) - -#define EBT_WATCHER_ITERATE(e, fn, args...) \ -({ \ - unsigned int __i; \ - int __ret = 0; \ - struct ebt_entry_watcher *__watcher; \ - \ - for (__i = e->watchers_offset; \ - __i < (e)->target_offset; \ - __i += __watcher->watcher_size + \ - sizeof(struct ebt_entry_watcher)) { \ - __watcher = (void *)(e) + __i; \ - \ - __ret = fn(__watcher , ## args); \ - if (__ret != 0) \ - break; \ - } \ - if (__ret == 0) { \ - if (__i != (e)->target_offset) \ - __ret = -EINVAL; \ - } \ - __ret; \ -}) - -#define EBT_ENTRY_ITERATE(entries, size, fn, args...) \ -({ \ - unsigned int __i; \ - int __ret = 0; \ - struct ebt_entry *__entry; \ - \ - for (__i = 0; __i < (size);) { \ - __entry = (void *)(entries) + __i; \ - __ret = fn(__entry , ## args); \ - if (__ret != 0) \ - break; \ - if (__entry->bitmask != 0) \ - __i += __entry->next_offset; \ - else \ - __i += sizeof(struct ebt_entries); \ - } \ - if (__ret == 0) { \ - if (__i != (size)) \ - __ret = -EINVAL; \ - } \ - __ret; \ -}) - #endif diff --git a/include/uapi/linux/netfilter_bridge/Kbuild b/include/uapi/linux/netfilter_bridge/Kbuild index aafaa5aa54d..348717c3a22 100644 --- a/include/uapi/linux/netfilter_bridge/Kbuild +++ b/include/uapi/linux/netfilter_bridge/Kbuild @@ -1 +1,19 @@ # UAPI Header export list +header-y += ebt_802_3.h +header-y += ebt_among.h +header-y += ebt_arp.h +header-y += ebt_arpreply.h +header-y += ebt_ip.h +header-y += ebt_ip6.h +header-y += ebt_limit.h +header-y += ebt_log.h +header-y += ebt_mark_m.h +header-y += ebt_mark_t.h +header-y += ebt_nat.h +header-y += ebt_nflog.h +header-y += ebt_pkttype.h +header-y += ebt_redirect.h +header-y += ebt_stp.h +header-y += ebt_ulog.h +header-y += ebt_vlan.h +header-y += ebtables.h diff --git a/include/uapi/linux/netfilter_bridge/ebt_802_3.h b/include/uapi/linux/netfilter_bridge/ebt_802_3.h new file mode 100644 index 00000000000..5bf84912a08 --- /dev/null +++ b/include/uapi/linux/netfilter_bridge/ebt_802_3.h @@ -0,0 +1,62 @@ +#ifndef _UAPI__LINUX_BRIDGE_EBT_802_3_H +#define _UAPI__LINUX_BRIDGE_EBT_802_3_H + +#include + +#define EBT_802_3_SAP 0x01 +#define EBT_802_3_TYPE 0x02 + +#define EBT_802_3_MATCH "802_3" + +/* + * If frame has DSAP/SSAP value 0xaa you must check the SNAP type + * to discover what kind of packet we're carrying. + */ +#define CHECK_TYPE 0xaa + +/* + * Control field may be one or two bytes. If the first byte has + * the value 0x03 then the entire length is one byte, otherwise it is two. + * One byte controls are used in Unnumbered Information frames. + * Two byte controls are used in Numbered Information frames. + */ +#define IS_UI 0x03 + +#define EBT_802_3_MASK (EBT_802_3_SAP | EBT_802_3_TYPE | EBT_802_3) + +/* ui has one byte ctrl, ni has two */ +struct hdr_ui { + __u8 dsap; + __u8 ssap; + __u8 ctrl; + __u8 orig[3]; + __be16 type; +}; + +struct hdr_ni { + __u8 dsap; + __u8 ssap; + __be16 ctrl; + __u8 orig[3]; + __be16 type; +}; + +struct ebt_802_3_hdr { + __u8 daddr[6]; + __u8 saddr[6]; + __be16 len; + union { + struct hdr_ui ui; + struct hdr_ni ni; + } llc; +}; + + +struct ebt_802_3_info { + __u8 sap; + __be16 type; + __u8 bitmask; + __u8 invflags; +}; + +#endif /* _UAPI__LINUX_BRIDGE_EBT_802_3_H */ diff --git a/include/linux/netfilter_bridge/ebt_among.h b/include/uapi/linux/netfilter_bridge/ebt_among.h similarity index 100% rename from include/linux/netfilter_bridge/ebt_among.h rename to include/uapi/linux/netfilter_bridge/ebt_among.h diff --git a/include/linux/netfilter_bridge/ebt_arp.h b/include/uapi/linux/netfilter_bridge/ebt_arp.h similarity index 100% rename from include/linux/netfilter_bridge/ebt_arp.h rename to include/uapi/linux/netfilter_bridge/ebt_arp.h diff --git a/include/linux/netfilter_bridge/ebt_arpreply.h b/include/uapi/linux/netfilter_bridge/ebt_arpreply.h similarity index 100% rename from include/linux/netfilter_bridge/ebt_arpreply.h rename to include/uapi/linux/netfilter_bridge/ebt_arpreply.h diff --git a/include/linux/netfilter_bridge/ebt_ip.h b/include/uapi/linux/netfilter_bridge/ebt_ip.h similarity index 100% rename from include/linux/netfilter_bridge/ebt_ip.h rename to include/uapi/linux/netfilter_bridge/ebt_ip.h diff --git a/include/linux/netfilter_bridge/ebt_ip6.h b/include/uapi/linux/netfilter_bridge/ebt_ip6.h similarity index 100% rename from include/linux/netfilter_bridge/ebt_ip6.h rename to include/uapi/linux/netfilter_bridge/ebt_ip6.h diff --git a/include/linux/netfilter_bridge/ebt_limit.h b/include/uapi/linux/netfilter_bridge/ebt_limit.h similarity index 100% rename from include/linux/netfilter_bridge/ebt_limit.h rename to include/uapi/linux/netfilter_bridge/ebt_limit.h diff --git a/include/linux/netfilter_bridge/ebt_log.h b/include/uapi/linux/netfilter_bridge/ebt_log.h similarity index 100% rename from include/linux/netfilter_bridge/ebt_log.h rename to include/uapi/linux/netfilter_bridge/ebt_log.h diff --git a/include/linux/netfilter_bridge/ebt_mark_m.h b/include/uapi/linux/netfilter_bridge/ebt_mark_m.h similarity index 100% rename from include/linux/netfilter_bridge/ebt_mark_m.h rename to include/uapi/linux/netfilter_bridge/ebt_mark_m.h diff --git a/include/linux/netfilter_bridge/ebt_mark_t.h b/include/uapi/linux/netfilter_bridge/ebt_mark_t.h similarity index 100% rename from include/linux/netfilter_bridge/ebt_mark_t.h rename to include/uapi/linux/netfilter_bridge/ebt_mark_t.h diff --git a/include/linux/netfilter_bridge/ebt_nat.h b/include/uapi/linux/netfilter_bridge/ebt_nat.h similarity index 100% rename from include/linux/netfilter_bridge/ebt_nat.h rename to include/uapi/linux/netfilter_bridge/ebt_nat.h diff --git a/include/linux/netfilter_bridge/ebt_nflog.h b/include/uapi/linux/netfilter_bridge/ebt_nflog.h similarity index 100% rename from include/linux/netfilter_bridge/ebt_nflog.h rename to include/uapi/linux/netfilter_bridge/ebt_nflog.h diff --git a/include/linux/netfilter_bridge/ebt_pkttype.h b/include/uapi/linux/netfilter_bridge/ebt_pkttype.h similarity index 100% rename from include/linux/netfilter_bridge/ebt_pkttype.h rename to include/uapi/linux/netfilter_bridge/ebt_pkttype.h diff --git a/include/linux/netfilter_bridge/ebt_redirect.h b/include/uapi/linux/netfilter_bridge/ebt_redirect.h similarity index 100% rename from include/linux/netfilter_bridge/ebt_redirect.h rename to include/uapi/linux/netfilter_bridge/ebt_redirect.h diff --git a/include/linux/netfilter_bridge/ebt_stp.h b/include/uapi/linux/netfilter_bridge/ebt_stp.h similarity index 100% rename from include/linux/netfilter_bridge/ebt_stp.h rename to include/uapi/linux/netfilter_bridge/ebt_stp.h diff --git a/include/linux/netfilter_bridge/ebt_ulog.h b/include/uapi/linux/netfilter_bridge/ebt_ulog.h similarity index 100% rename from include/linux/netfilter_bridge/ebt_ulog.h rename to include/uapi/linux/netfilter_bridge/ebt_ulog.h diff --git a/include/linux/netfilter_bridge/ebt_vlan.h b/include/uapi/linux/netfilter_bridge/ebt_vlan.h similarity index 100% rename from include/linux/netfilter_bridge/ebt_vlan.h rename to include/uapi/linux/netfilter_bridge/ebt_vlan.h diff --git a/include/uapi/linux/netfilter_bridge/ebtables.h b/include/uapi/linux/netfilter_bridge/ebtables.h new file mode 100644 index 00000000000..ba993360dbe --- /dev/null +++ b/include/uapi/linux/netfilter_bridge/ebtables.h @@ -0,0 +1,268 @@ +/* + * ebtables + * + * Authors: + * Bart De Schuymer + * + * ebtables.c,v 2.0, April, 2002 + * + * This code is stongly inspired on the iptables code which is + * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling + */ + +#ifndef _UAPI__LINUX_BRIDGE_EFF_H +#define _UAPI__LINUX_BRIDGE_EFF_H +#include +#include +#include + +#define EBT_TABLE_MAXNAMELEN 32 +#define EBT_CHAIN_MAXNAMELEN EBT_TABLE_MAXNAMELEN +#define EBT_FUNCTION_MAXNAMELEN EBT_TABLE_MAXNAMELEN + +/* verdicts >0 are "branches" */ +#define EBT_ACCEPT -1 +#define EBT_DROP -2 +#define EBT_CONTINUE -3 +#define EBT_RETURN -4 +#define NUM_STANDARD_TARGETS 4 +/* ebtables target modules store the verdict inside an int. We can + * reclaim a part of this int for backwards compatible extensions. + * The 4 lsb are more than enough to store the verdict. */ +#define EBT_VERDICT_BITS 0x0000000F + +struct xt_match; +struct xt_target; + +struct ebt_counter { + uint64_t pcnt; + uint64_t bcnt; +}; + +struct ebt_replace { + char name[EBT_TABLE_MAXNAMELEN]; + unsigned int valid_hooks; + /* nr of rules in the table */ + unsigned int nentries; + /* total size of the entries */ + unsigned int entries_size; + /* start of the chains */ + struct ebt_entries __user *hook_entry[NF_BR_NUMHOOKS]; + /* nr of counters userspace expects back */ + unsigned int num_counters; + /* where the kernel will put the old counters */ + struct ebt_counter __user *counters; + char __user *entries; +}; + +struct ebt_replace_kernel { + char name[EBT_TABLE_MAXNAMELEN]; + unsigned int valid_hooks; + /* nr of rules in the table */ + unsigned int nentries; + /* total size of the entries */ + unsigned int entries_size; + /* start of the chains */ + struct ebt_entries *hook_entry[NF_BR_NUMHOOKS]; + /* nr of counters userspace expects back */ + unsigned int num_counters; + /* where the kernel will put the old counters */ + struct ebt_counter *counters; + char *entries; +}; + +struct ebt_entries { + /* this field is always set to zero + * See EBT_ENTRY_OR_ENTRIES. + * Must be same size as ebt_entry.bitmask */ + unsigned int distinguisher; + /* the chain name */ + char name[EBT_CHAIN_MAXNAMELEN]; + /* counter offset for this chain */ + unsigned int counter_offset; + /* one standard (accept, drop, return) per hook */ + int policy; + /* nr. of entries */ + unsigned int nentries; + /* entry list */ + char data[0] __attribute__ ((aligned (__alignof__(struct ebt_replace)))); +}; + +/* used for the bitmask of struct ebt_entry */ + +/* This is a hack to make a difference between an ebt_entry struct and an + * ebt_entries struct when traversing the entries from start to end. + * Using this simplifies the code a lot, while still being able to use + * ebt_entries. + * Contrary, iptables doesn't use something like ebt_entries and therefore uses + * different techniques for naming the policy and such. So, iptables doesn't + * need a hack like this. + */ +#define EBT_ENTRY_OR_ENTRIES 0x01 +/* these are the normal masks */ +#define EBT_NOPROTO 0x02 +#define EBT_802_3 0x04 +#define EBT_SOURCEMAC 0x08 +#define EBT_DESTMAC 0x10 +#define EBT_F_MASK (EBT_NOPROTO | EBT_802_3 | EBT_SOURCEMAC | EBT_DESTMAC \ + | EBT_ENTRY_OR_ENTRIES) + +#define EBT_IPROTO 0x01 +#define EBT_IIN 0x02 +#define EBT_IOUT 0x04 +#define EBT_ISOURCE 0x8 +#define EBT_IDEST 0x10 +#define EBT_ILOGICALIN 0x20 +#define EBT_ILOGICALOUT 0x40 +#define EBT_INV_MASK (EBT_IPROTO | EBT_IIN | EBT_IOUT | EBT_ILOGICALIN \ + | EBT_ILOGICALOUT | EBT_ISOURCE | EBT_IDEST) + +struct ebt_entry_match { + union { + char name[EBT_FUNCTION_MAXNAMELEN]; + struct xt_match *match; + } u; + /* size of data */ + unsigned int match_size; + unsigned char data[0] __attribute__ ((aligned (__alignof__(struct ebt_replace)))); +}; + +struct ebt_entry_watcher { + union { + char name[EBT_FUNCTION_MAXNAMELEN]; + struct xt_target *watcher; + } u; + /* size of data */ + unsigned int watcher_size; + unsigned char data[0] __attribute__ ((aligned (__alignof__(struct ebt_replace)))); +}; + +struct ebt_entry_target { + union { + char name[EBT_FUNCTION_MAXNAMELEN]; + struct xt_target *target; + } u; + /* size of data */ + unsigned int target_size; + unsigned char data[0] __attribute__ ((aligned (__alignof__(struct ebt_replace)))); +}; + +#define EBT_STANDARD_TARGET "standard" +struct ebt_standard_target { + struct ebt_entry_target target; + int verdict; +}; + +/* one entry */ +struct ebt_entry { + /* this needs to be the first field */ + unsigned int bitmask; + unsigned int invflags; + __be16 ethproto; + /* the physical in-dev */ + char in[IFNAMSIZ]; + /* the logical in-dev */ + char logical_in[IFNAMSIZ]; + /* the physical out-dev */ + char out[IFNAMSIZ]; + /* the logical out-dev */ + char logical_out[IFNAMSIZ]; + unsigned char sourcemac[ETH_ALEN]; + unsigned char sourcemsk[ETH_ALEN]; + unsigned char destmac[ETH_ALEN]; + unsigned char destmsk[ETH_ALEN]; + /* sizeof ebt_entry + matches */ + unsigned int watchers_offset; + /* sizeof ebt_entry + matches + watchers */ + unsigned int target_offset; + /* sizeof ebt_entry + matches + watchers + target */ + unsigned int next_offset; + unsigned char elems[0] __attribute__ ((aligned (__alignof__(struct ebt_replace)))); +}; + +/* {g,s}etsockopt numbers */ +#define EBT_BASE_CTL 128 + +#define EBT_SO_SET_ENTRIES (EBT_BASE_CTL) +#define EBT_SO_SET_COUNTERS (EBT_SO_SET_ENTRIES+1) +#define EBT_SO_SET_MAX (EBT_SO_SET_COUNTERS+1) + +#define EBT_SO_GET_INFO (EBT_BASE_CTL) +#define EBT_SO_GET_ENTRIES (EBT_SO_GET_INFO+1) +#define EBT_SO_GET_INIT_INFO (EBT_SO_GET_ENTRIES+1) +#define EBT_SO_GET_INIT_ENTRIES (EBT_SO_GET_INIT_INFO+1) +#define EBT_SO_GET_MAX (EBT_SO_GET_INIT_ENTRIES+1) + + +/* blatently stolen from ip_tables.h + * fn returns 0 to continue iteration */ +#define EBT_MATCH_ITERATE(e, fn, args...) \ +({ \ + unsigned int __i; \ + int __ret = 0; \ + struct ebt_entry_match *__match; \ + \ + for (__i = sizeof(struct ebt_entry); \ + __i < (e)->watchers_offset; \ + __i += __match->match_size + \ + sizeof(struct ebt_entry_match)) { \ + __match = (void *)(e) + __i; \ + \ + __ret = fn(__match , ## args); \ + if (__ret != 0) \ + break; \ + } \ + if (__ret == 0) { \ + if (__i != (e)->watchers_offset) \ + __ret = -EINVAL; \ + } \ + __ret; \ +}) + +#define EBT_WATCHER_ITERATE(e, fn, args...) \ +({ \ + unsigned int __i; \ + int __ret = 0; \ + struct ebt_entry_watcher *__watcher; \ + \ + for (__i = e->watchers_offset; \ + __i < (e)->target_offset; \ + __i += __watcher->watcher_size + \ + sizeof(struct ebt_entry_watcher)) { \ + __watcher = (void *)(e) + __i; \ + \ + __ret = fn(__watcher , ## args); \ + if (__ret != 0) \ + break; \ + } \ + if (__ret == 0) { \ + if (__i != (e)->target_offset) \ + __ret = -EINVAL; \ + } \ + __ret; \ +}) + +#define EBT_ENTRY_ITERATE(entries, size, fn, args...) \ +({ \ + unsigned int __i; \ + int __ret = 0; \ + struct ebt_entry *__entry; \ + \ + for (__i = 0; __i < (size);) { \ + __entry = (void *)(entries) + __i; \ + __ret = fn(__entry , ## args); \ + if (__ret != 0) \ + break; \ + if (__entry->bitmask != 0) \ + __i += __entry->next_offset; \ + else \ + __i += sizeof(struct ebt_entries); \ + } \ + if (__ret == 0) { \ + if (__i != (size)) \ + __ret = -EINVAL; \ + } \ + __ret; \ +}) + +#endif /* _UAPI__LINUX_BRIDGE_EFF_H */ From 17c075923da59c217155d0758ee0715641ebc152 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 9 Oct 2012 09:48:59 +0100 Subject: [PATCH 50/61] UAPI: (Scripted) Disintegrate include/linux/netfilter_ipv4 Signed-off-by: David Howells Acked-by: Arnd Bergmann Acked-by: Thomas Gleixner Acked-by: Michael Kerrisk Acked-by: Paul E. McKenney Acked-by: Dave Jones --- include/linux/netfilter_ipv4/Kbuild | 10 - include/linux/netfilter_ipv4/ip_tables.h | 218 +---------------- include/uapi/linux/netfilter_ipv4/Kbuild | 10 + include/uapi/linux/netfilter_ipv4/ip_tables.h | 229 ++++++++++++++++++ .../linux/netfilter_ipv4/ipt_CLUSTERIP.h | 0 .../{ => uapi}/linux/netfilter_ipv4/ipt_ECN.h | 0 .../{ => uapi}/linux/netfilter_ipv4/ipt_LOG.h | 0 .../linux/netfilter_ipv4/ipt_REJECT.h | 0 .../{ => uapi}/linux/netfilter_ipv4/ipt_TTL.h | 0 .../linux/netfilter_ipv4/ipt_ULOG.h | 0 .../{ => uapi}/linux/netfilter_ipv4/ipt_ah.h | 0 .../{ => uapi}/linux/netfilter_ipv4/ipt_ecn.h | 0 .../{ => uapi}/linux/netfilter_ipv4/ipt_ttl.h | 0 13 files changed, 241 insertions(+), 226 deletions(-) create mode 100644 include/uapi/linux/netfilter_ipv4/ip_tables.h rename include/{ => uapi}/linux/netfilter_ipv4/ipt_CLUSTERIP.h (100%) rename include/{ => uapi}/linux/netfilter_ipv4/ipt_ECN.h (100%) rename include/{ => uapi}/linux/netfilter_ipv4/ipt_LOG.h (100%) rename include/{ => uapi}/linux/netfilter_ipv4/ipt_REJECT.h (100%) rename include/{ => uapi}/linux/netfilter_ipv4/ipt_TTL.h (100%) rename include/{ => uapi}/linux/netfilter_ipv4/ipt_ULOG.h (100%) rename include/{ => uapi}/linux/netfilter_ipv4/ipt_ah.h (100%) rename include/{ => uapi}/linux/netfilter_ipv4/ipt_ecn.h (100%) rename include/{ => uapi}/linux/netfilter_ipv4/ipt_ttl.h (100%) diff --git a/include/linux/netfilter_ipv4/Kbuild b/include/linux/netfilter_ipv4/Kbuild index 8ba0c5b72ea..e69de29bb2d 100644 --- a/include/linux/netfilter_ipv4/Kbuild +++ b/include/linux/netfilter_ipv4/Kbuild @@ -1,10 +0,0 @@ -header-y += ip_tables.h -header-y += ipt_CLUSTERIP.h -header-y += ipt_ECN.h -header-y += ipt_LOG.h -header-y += ipt_REJECT.h -header-y += ipt_TTL.h -header-y += ipt_ULOG.h -header-y += ipt_ah.h -header-y += ipt_ecn.h -header-y += ipt_ttl.h diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h index db79231914c..901e84db847 100644 --- a/include/linux/netfilter_ipv4/ip_tables.h +++ b/include/linux/netfilter_ipv4/ip_tables.h @@ -11,230 +11,17 @@ * flags are stored in host byte order (of course). * Port numbers are stored in HOST byte order. */ - #ifndef _IPTABLES_H #define _IPTABLES_H -#ifdef __KERNEL__ #include #include #include #include -#endif -#include -#include -#include - -#include - -#ifndef __KERNEL__ -#define IPT_FUNCTION_MAXNAMELEN XT_FUNCTION_MAXNAMELEN -#define IPT_TABLE_MAXNAMELEN XT_TABLE_MAXNAMELEN -#define ipt_match xt_match -#define ipt_target xt_target -#define ipt_table xt_table -#define ipt_get_revision xt_get_revision -#define ipt_entry_match xt_entry_match -#define ipt_entry_target xt_entry_target -#define ipt_standard_target xt_standard_target -#define ipt_error_target xt_error_target -#define ipt_counters xt_counters -#define IPT_CONTINUE XT_CONTINUE -#define IPT_RETURN XT_RETURN - -/* This group is older than old (iptables < v1.4.0-rc1~89) */ -#include -#define ipt_udp xt_udp -#define ipt_tcp xt_tcp -#define IPT_TCP_INV_SRCPT XT_TCP_INV_SRCPT -#define IPT_TCP_INV_DSTPT XT_TCP_INV_DSTPT -#define IPT_TCP_INV_FLAGS XT_TCP_INV_FLAGS -#define IPT_TCP_INV_OPTION XT_TCP_INV_OPTION -#define IPT_TCP_INV_MASK XT_TCP_INV_MASK -#define IPT_UDP_INV_SRCPT XT_UDP_INV_SRCPT -#define IPT_UDP_INV_DSTPT XT_UDP_INV_DSTPT -#define IPT_UDP_INV_MASK XT_UDP_INV_MASK - -/* The argument to IPT_SO_ADD_COUNTERS. */ -#define ipt_counters_info xt_counters_info -/* Standard return verdict, or do jump. */ -#define IPT_STANDARD_TARGET XT_STANDARD_TARGET -/* Error verdict. */ -#define IPT_ERROR_TARGET XT_ERROR_TARGET - -/* fn returns 0 to continue iteration */ -#define IPT_MATCH_ITERATE(e, fn, args...) \ - XT_MATCH_ITERATE(struct ipt_entry, e, fn, ## args) - -/* fn returns 0 to continue iteration */ -#define IPT_ENTRY_ITERATE(entries, size, fn, args...) \ - XT_ENTRY_ITERATE(struct ipt_entry, entries, size, fn, ## args) -#endif - -/* Yes, Virginia, you have to zero the padding. */ -struct ipt_ip { - /* Source and destination IP addr */ - struct in_addr src, dst; - /* Mask for src and dest IP addr */ - struct in_addr smsk, dmsk; - char iniface[IFNAMSIZ], outiface[IFNAMSIZ]; - unsigned char iniface_mask[IFNAMSIZ], outiface_mask[IFNAMSIZ]; - - /* Protocol, 0 = ANY */ - __u16 proto; - - /* Flags word */ - __u8 flags; - /* Inverse flags */ - __u8 invflags; -}; - -/* Values for "flag" field in struct ipt_ip (general ip structure). */ -#define IPT_F_FRAG 0x01 /* Set if rule is a fragment rule */ -#define IPT_F_GOTO 0x02 /* Set if jump is a goto */ -#define IPT_F_MASK 0x03 /* All possible flag bits mask. */ - -/* Values for "inv" field in struct ipt_ip. */ -#define IPT_INV_VIA_IN 0x01 /* Invert the sense of IN IFACE. */ -#define IPT_INV_VIA_OUT 0x02 /* Invert the sense of OUT IFACE */ -#define IPT_INV_TOS 0x04 /* Invert the sense of TOS. */ -#define IPT_INV_SRCIP 0x08 /* Invert the sense of SRC IP. */ -#define IPT_INV_DSTIP 0x10 /* Invert the sense of DST OP. */ -#define IPT_INV_FRAG 0x20 /* Invert the sense of FRAG. */ -#define IPT_INV_PROTO XT_INV_PROTO -#define IPT_INV_MASK 0x7F /* All possible flag bits mask. */ - -/* This structure defines each of the firewall rules. Consists of 3 - parts which are 1) general IP header stuff 2) match specific - stuff 3) the target to perform if the rule matches */ -struct ipt_entry { - struct ipt_ip ip; - - /* Mark with fields that we care about. */ - unsigned int nfcache; - - /* Size of ipt_entry + matches */ - __u16 target_offset; - /* Size of ipt_entry + matches + target */ - __u16 next_offset; - - /* Back pointer */ - unsigned int comefrom; - - /* Packet and byte counters. */ - struct xt_counters counters; - - /* The matches (if any), then the target. */ - unsigned char elems[0]; -}; - -/* - * New IP firewall options for [gs]etsockopt at the RAW IP level. - * Unlike BSD Linux inherits IP options so you don't have to use a raw - * socket for this. Instead we check rights in the calls. - * - * ATTENTION: check linux/in.h before adding new number here. - */ -#define IPT_BASE_CTL 64 - -#define IPT_SO_SET_REPLACE (IPT_BASE_CTL) -#define IPT_SO_SET_ADD_COUNTERS (IPT_BASE_CTL + 1) -#define IPT_SO_SET_MAX IPT_SO_SET_ADD_COUNTERS - -#define IPT_SO_GET_INFO (IPT_BASE_CTL) -#define IPT_SO_GET_ENTRIES (IPT_BASE_CTL + 1) -#define IPT_SO_GET_REVISION_MATCH (IPT_BASE_CTL + 2) -#define IPT_SO_GET_REVISION_TARGET (IPT_BASE_CTL + 3) -#define IPT_SO_GET_MAX IPT_SO_GET_REVISION_TARGET - -/* ICMP matching stuff */ -struct ipt_icmp { - __u8 type; /* type to match */ - __u8 code[2]; /* range of code */ - __u8 invflags; /* Inverse flags */ -}; - -/* Values for "inv" field for struct ipt_icmp. */ -#define IPT_ICMP_INV 0x01 /* Invert the sense of type/code test */ - -/* The argument to IPT_SO_GET_INFO */ -struct ipt_getinfo { - /* Which table: caller fills this in. */ - char name[XT_TABLE_MAXNAMELEN]; - - /* Kernel fills these in. */ - /* Which hook entry points are valid: bitmask */ - unsigned int valid_hooks; - - /* Hook entry points: one per netfilter hook. */ - unsigned int hook_entry[NF_INET_NUMHOOKS]; - - /* Underflow points. */ - unsigned int underflow[NF_INET_NUMHOOKS]; - - /* Number of entries */ - unsigned int num_entries; - - /* Size of entries. */ - unsigned int size; -}; - -/* The argument to IPT_SO_SET_REPLACE. */ -struct ipt_replace { - /* Which table. */ - char name[XT_TABLE_MAXNAMELEN]; - - /* Which hook entry points are valid: bitmask. You can't - change this. */ - unsigned int valid_hooks; - - /* Number of entries */ - unsigned int num_entries; - - /* Total size of new entries */ - unsigned int size; - - /* Hook entry points. */ - unsigned int hook_entry[NF_INET_NUMHOOKS]; - - /* Underflow points. */ - unsigned int underflow[NF_INET_NUMHOOKS]; - - /* Information about old entries: */ - /* Number of counters (must be equal to current number of entries). */ - unsigned int num_counters; - /* The old entries' counters. */ - struct xt_counters __user *counters; - - /* The entries (hang off end: not really an array). */ - struct ipt_entry entries[0]; -}; - -/* The argument to IPT_SO_GET_ENTRIES. */ -struct ipt_get_entries { - /* Which table: user fills this in. */ - char name[XT_TABLE_MAXNAMELEN]; - - /* User fills this in: total entry size. */ - unsigned int size; - - /* The entries. */ - struct ipt_entry entrytable[0]; -}; - -/* Helper functions */ -static __inline__ struct xt_entry_target * -ipt_get_target(struct ipt_entry *e) -{ - return (void *)e + e->target_offset; -} - -/* - * Main firewall chains definitions and global var's definitions. - */ -#ifdef __KERNEL__ #include +#include + extern void ipt_init(void) __init; extern struct xt_table *ipt_register_table(struct net *net, @@ -303,5 +90,4 @@ compat_ipt_get_target(struct compat_ipt_entry *e) } #endif /* CONFIG_COMPAT */ -#endif /*__KERNEL__*/ #endif /* _IPTABLES_H */ diff --git a/include/uapi/linux/netfilter_ipv4/Kbuild b/include/uapi/linux/netfilter_ipv4/Kbuild index aafaa5aa54d..fb008437dde 100644 --- a/include/uapi/linux/netfilter_ipv4/Kbuild +++ b/include/uapi/linux/netfilter_ipv4/Kbuild @@ -1 +1,11 @@ # UAPI Header export list +header-y += ip_tables.h +header-y += ipt_CLUSTERIP.h +header-y += ipt_ECN.h +header-y += ipt_LOG.h +header-y += ipt_REJECT.h +header-y += ipt_TTL.h +header-y += ipt_ULOG.h +header-y += ipt_ah.h +header-y += ipt_ecn.h +header-y += ipt_ttl.h diff --git a/include/uapi/linux/netfilter_ipv4/ip_tables.h b/include/uapi/linux/netfilter_ipv4/ip_tables.h new file mode 100644 index 00000000000..f1e6ef25603 --- /dev/null +++ b/include/uapi/linux/netfilter_ipv4/ip_tables.h @@ -0,0 +1,229 @@ +/* + * 25-Jul-1998 Major changes to allow for ip chain table + * + * 3-Jan-2000 Named tables to allow packet selection for different uses. + */ + +/* + * Format of an IP firewall descriptor + * + * src, dst, src_mask, dst_mask are always stored in network byte order. + * flags are stored in host byte order (of course). + * Port numbers are stored in HOST byte order. + */ + +#ifndef _UAPI_IPTABLES_H +#define _UAPI_IPTABLES_H + +#include +#include +#include + +#include + +#ifndef __KERNEL__ +#define IPT_FUNCTION_MAXNAMELEN XT_FUNCTION_MAXNAMELEN +#define IPT_TABLE_MAXNAMELEN XT_TABLE_MAXNAMELEN +#define ipt_match xt_match +#define ipt_target xt_target +#define ipt_table xt_table +#define ipt_get_revision xt_get_revision +#define ipt_entry_match xt_entry_match +#define ipt_entry_target xt_entry_target +#define ipt_standard_target xt_standard_target +#define ipt_error_target xt_error_target +#define ipt_counters xt_counters +#define IPT_CONTINUE XT_CONTINUE +#define IPT_RETURN XT_RETURN + +/* This group is older than old (iptables < v1.4.0-rc1~89) */ +#include +#define ipt_udp xt_udp +#define ipt_tcp xt_tcp +#define IPT_TCP_INV_SRCPT XT_TCP_INV_SRCPT +#define IPT_TCP_INV_DSTPT XT_TCP_INV_DSTPT +#define IPT_TCP_INV_FLAGS XT_TCP_INV_FLAGS +#define IPT_TCP_INV_OPTION XT_TCP_INV_OPTION +#define IPT_TCP_INV_MASK XT_TCP_INV_MASK +#define IPT_UDP_INV_SRCPT XT_UDP_INV_SRCPT +#define IPT_UDP_INV_DSTPT XT_UDP_INV_DSTPT +#define IPT_UDP_INV_MASK XT_UDP_INV_MASK + +/* The argument to IPT_SO_ADD_COUNTERS. */ +#define ipt_counters_info xt_counters_info +/* Standard return verdict, or do jump. */ +#define IPT_STANDARD_TARGET XT_STANDARD_TARGET +/* Error verdict. */ +#define IPT_ERROR_TARGET XT_ERROR_TARGET + +/* fn returns 0 to continue iteration */ +#define IPT_MATCH_ITERATE(e, fn, args...) \ + XT_MATCH_ITERATE(struct ipt_entry, e, fn, ## args) + +/* fn returns 0 to continue iteration */ +#define IPT_ENTRY_ITERATE(entries, size, fn, args...) \ + XT_ENTRY_ITERATE(struct ipt_entry, entries, size, fn, ## args) +#endif + +/* Yes, Virginia, you have to zero the padding. */ +struct ipt_ip { + /* Source and destination IP addr */ + struct in_addr src, dst; + /* Mask for src and dest IP addr */ + struct in_addr smsk, dmsk; + char iniface[IFNAMSIZ], outiface[IFNAMSIZ]; + unsigned char iniface_mask[IFNAMSIZ], outiface_mask[IFNAMSIZ]; + + /* Protocol, 0 = ANY */ + __u16 proto; + + /* Flags word */ + __u8 flags; + /* Inverse flags */ + __u8 invflags; +}; + +/* Values for "flag" field in struct ipt_ip (general ip structure). */ +#define IPT_F_FRAG 0x01 /* Set if rule is a fragment rule */ +#define IPT_F_GOTO 0x02 /* Set if jump is a goto */ +#define IPT_F_MASK 0x03 /* All possible flag bits mask. */ + +/* Values for "inv" field in struct ipt_ip. */ +#define IPT_INV_VIA_IN 0x01 /* Invert the sense of IN IFACE. */ +#define IPT_INV_VIA_OUT 0x02 /* Invert the sense of OUT IFACE */ +#define IPT_INV_TOS 0x04 /* Invert the sense of TOS. */ +#define IPT_INV_SRCIP 0x08 /* Invert the sense of SRC IP. */ +#define IPT_INV_DSTIP 0x10 /* Invert the sense of DST OP. */ +#define IPT_INV_FRAG 0x20 /* Invert the sense of FRAG. */ +#define IPT_INV_PROTO XT_INV_PROTO +#define IPT_INV_MASK 0x7F /* All possible flag bits mask. */ + +/* This structure defines each of the firewall rules. Consists of 3 + parts which are 1) general IP header stuff 2) match specific + stuff 3) the target to perform if the rule matches */ +struct ipt_entry { + struct ipt_ip ip; + + /* Mark with fields that we care about. */ + unsigned int nfcache; + + /* Size of ipt_entry + matches */ + __u16 target_offset; + /* Size of ipt_entry + matches + target */ + __u16 next_offset; + + /* Back pointer */ + unsigned int comefrom; + + /* Packet and byte counters. */ + struct xt_counters counters; + + /* The matches (if any), then the target. */ + unsigned char elems[0]; +}; + +/* + * New IP firewall options for [gs]etsockopt at the RAW IP level. + * Unlike BSD Linux inherits IP options so you don't have to use a raw + * socket for this. Instead we check rights in the calls. + * + * ATTENTION: check linux/in.h before adding new number here. + */ +#define IPT_BASE_CTL 64 + +#define IPT_SO_SET_REPLACE (IPT_BASE_CTL) +#define IPT_SO_SET_ADD_COUNTERS (IPT_BASE_CTL + 1) +#define IPT_SO_SET_MAX IPT_SO_SET_ADD_COUNTERS + +#define IPT_SO_GET_INFO (IPT_BASE_CTL) +#define IPT_SO_GET_ENTRIES (IPT_BASE_CTL + 1) +#define IPT_SO_GET_REVISION_MATCH (IPT_BASE_CTL + 2) +#define IPT_SO_GET_REVISION_TARGET (IPT_BASE_CTL + 3) +#define IPT_SO_GET_MAX IPT_SO_GET_REVISION_TARGET + +/* ICMP matching stuff */ +struct ipt_icmp { + __u8 type; /* type to match */ + __u8 code[2]; /* range of code */ + __u8 invflags; /* Inverse flags */ +}; + +/* Values for "inv" field for struct ipt_icmp. */ +#define IPT_ICMP_INV 0x01 /* Invert the sense of type/code test */ + +/* The argument to IPT_SO_GET_INFO */ +struct ipt_getinfo { + /* Which table: caller fills this in. */ + char name[XT_TABLE_MAXNAMELEN]; + + /* Kernel fills these in. */ + /* Which hook entry points are valid: bitmask */ + unsigned int valid_hooks; + + /* Hook entry points: one per netfilter hook. */ + unsigned int hook_entry[NF_INET_NUMHOOKS]; + + /* Underflow points. */ + unsigned int underflow[NF_INET_NUMHOOKS]; + + /* Number of entries */ + unsigned int num_entries; + + /* Size of entries. */ + unsigned int size; +}; + +/* The argument to IPT_SO_SET_REPLACE. */ +struct ipt_replace { + /* Which table. */ + char name[XT_TABLE_MAXNAMELEN]; + + /* Which hook entry points are valid: bitmask. You can't + change this. */ + unsigned int valid_hooks; + + /* Number of entries */ + unsigned int num_entries; + + /* Total size of new entries */ + unsigned int size; + + /* Hook entry points. */ + unsigned int hook_entry[NF_INET_NUMHOOKS]; + + /* Underflow points. */ + unsigned int underflow[NF_INET_NUMHOOKS]; + + /* Information about old entries: */ + /* Number of counters (must be equal to current number of entries). */ + unsigned int num_counters; + /* The old entries' counters. */ + struct xt_counters __user *counters; + + /* The entries (hang off end: not really an array). */ + struct ipt_entry entries[0]; +}; + +/* The argument to IPT_SO_GET_ENTRIES. */ +struct ipt_get_entries { + /* Which table: user fills this in. */ + char name[XT_TABLE_MAXNAMELEN]; + + /* User fills this in: total entry size. */ + unsigned int size; + + /* The entries. */ + struct ipt_entry entrytable[0]; +}; + +/* Helper functions */ +static __inline__ struct xt_entry_target * +ipt_get_target(struct ipt_entry *e) +{ + return (void *)e + e->target_offset; +} + +/* + * Main firewall chains definitions and global var's definitions. + */ +#endif /* _UAPI_IPTABLES_H */ diff --git a/include/linux/netfilter_ipv4/ipt_CLUSTERIP.h b/include/uapi/linux/netfilter_ipv4/ipt_CLUSTERIP.h similarity index 100% rename from include/linux/netfilter_ipv4/ipt_CLUSTERIP.h rename to include/uapi/linux/netfilter_ipv4/ipt_CLUSTERIP.h diff --git a/include/linux/netfilter_ipv4/ipt_ECN.h b/include/uapi/linux/netfilter_ipv4/ipt_ECN.h similarity index 100% rename from include/linux/netfilter_ipv4/ipt_ECN.h rename to include/uapi/linux/netfilter_ipv4/ipt_ECN.h diff --git a/include/linux/netfilter_ipv4/ipt_LOG.h b/include/uapi/linux/netfilter_ipv4/ipt_LOG.h similarity index 100% rename from include/linux/netfilter_ipv4/ipt_LOG.h rename to include/uapi/linux/netfilter_ipv4/ipt_LOG.h diff --git a/include/linux/netfilter_ipv4/ipt_REJECT.h b/include/uapi/linux/netfilter_ipv4/ipt_REJECT.h similarity index 100% rename from include/linux/netfilter_ipv4/ipt_REJECT.h rename to include/uapi/linux/netfilter_ipv4/ipt_REJECT.h diff --git a/include/linux/netfilter_ipv4/ipt_TTL.h b/include/uapi/linux/netfilter_ipv4/ipt_TTL.h similarity index 100% rename from include/linux/netfilter_ipv4/ipt_TTL.h rename to include/uapi/linux/netfilter_ipv4/ipt_TTL.h diff --git a/include/linux/netfilter_ipv4/ipt_ULOG.h b/include/uapi/linux/netfilter_ipv4/ipt_ULOG.h similarity index 100% rename from include/linux/netfilter_ipv4/ipt_ULOG.h rename to include/uapi/linux/netfilter_ipv4/ipt_ULOG.h diff --git a/include/linux/netfilter_ipv4/ipt_ah.h b/include/uapi/linux/netfilter_ipv4/ipt_ah.h similarity index 100% rename from include/linux/netfilter_ipv4/ipt_ah.h rename to include/uapi/linux/netfilter_ipv4/ipt_ah.h diff --git a/include/linux/netfilter_ipv4/ipt_ecn.h b/include/uapi/linux/netfilter_ipv4/ipt_ecn.h similarity index 100% rename from include/linux/netfilter_ipv4/ipt_ecn.h rename to include/uapi/linux/netfilter_ipv4/ipt_ecn.h diff --git a/include/linux/netfilter_ipv4/ipt_ttl.h b/include/uapi/linux/netfilter_ipv4/ipt_ttl.h similarity index 100% rename from include/linux/netfilter_ipv4/ipt_ttl.h rename to include/uapi/linux/netfilter_ipv4/ipt_ttl.h From ff1e1756c974fc1cb68b6e74cd123ad59cf07a2d Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 9 Oct 2012 09:49:01 +0100 Subject: [PATCH 51/61] UAPI: (Scripted) Disintegrate include/linux/netfilter_ipv6 Signed-off-by: David Howells Acked-by: Arnd Bergmann Acked-by: Thomas Gleixner Acked-by: Michael Kerrisk Acked-by: Paul E. McKenney Acked-by: Dave Jones --- include/linux/netfilter_ipv6/Kbuild | 12 - include/linux/netfilter_ipv6/ip6_tables.h | 256 +---------------- include/uapi/linux/netfilter_ipv6/Kbuild | 12 + .../uapi/linux/netfilter_ipv6/ip6_tables.h | 267 ++++++++++++++++++ .../{ => uapi}/linux/netfilter_ipv6/ip6t_HL.h | 0 .../linux/netfilter_ipv6/ip6t_LOG.h | 0 .../linux/netfilter_ipv6/ip6t_NPT.h | 0 .../linux/netfilter_ipv6/ip6t_REJECT.h | 0 .../{ => uapi}/linux/netfilter_ipv6/ip6t_ah.h | 0 .../linux/netfilter_ipv6/ip6t_frag.h | 0 .../{ => uapi}/linux/netfilter_ipv6/ip6t_hl.h | 0 .../linux/netfilter_ipv6/ip6t_ipv6header.h | 0 .../{ => uapi}/linux/netfilter_ipv6/ip6t_mh.h | 0 .../linux/netfilter_ipv6/ip6t_opts.h | 0 .../{ => uapi}/linux/netfilter_ipv6/ip6t_rt.h | 0 15 files changed, 281 insertions(+), 266 deletions(-) create mode 100644 include/uapi/linux/netfilter_ipv6/ip6_tables.h rename include/{ => uapi}/linux/netfilter_ipv6/ip6t_HL.h (100%) rename include/{ => uapi}/linux/netfilter_ipv6/ip6t_LOG.h (100%) rename include/{ => uapi}/linux/netfilter_ipv6/ip6t_NPT.h (100%) rename include/{ => uapi}/linux/netfilter_ipv6/ip6t_REJECT.h (100%) rename include/{ => uapi}/linux/netfilter_ipv6/ip6t_ah.h (100%) rename include/{ => uapi}/linux/netfilter_ipv6/ip6t_frag.h (100%) rename include/{ => uapi}/linux/netfilter_ipv6/ip6t_hl.h (100%) rename include/{ => uapi}/linux/netfilter_ipv6/ip6t_ipv6header.h (100%) rename include/{ => uapi}/linux/netfilter_ipv6/ip6t_mh.h (100%) rename include/{ => uapi}/linux/netfilter_ipv6/ip6t_opts.h (100%) rename include/{ => uapi}/linux/netfilter_ipv6/ip6t_rt.h (100%) diff --git a/include/linux/netfilter_ipv6/Kbuild b/include/linux/netfilter_ipv6/Kbuild index b88c0058bf7..e69de29bb2d 100644 --- a/include/linux/netfilter_ipv6/Kbuild +++ b/include/linux/netfilter_ipv6/Kbuild @@ -1,12 +0,0 @@ -header-y += ip6_tables.h -header-y += ip6t_HL.h -header-y += ip6t_LOG.h -header-y += ip6t_NPT.h -header-y += ip6t_REJECT.h -header-y += ip6t_ah.h -header-y += ip6t_frag.h -header-y += ip6t_hl.h -header-y += ip6t_ipv6header.h -header-y += ip6t_mh.h -header-y += ip6t_opts.h -header-y += ip6t_rt.h diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h index 08c2cbbaa32..5f84c6229dc 100644 --- a/include/linux/netfilter_ipv6/ip6_tables.h +++ b/include/linux/netfilter_ipv6/ip6_tables.h @@ -11,268 +11,17 @@ * flags are stored in host byte order (of course). * Port numbers are stored in HOST byte order. */ - #ifndef _IP6_TABLES_H #define _IP6_TABLES_H -#ifdef __KERNEL__ #include #include #include #include -#endif -#include -#include -#include - -#include - -#ifndef __KERNEL__ -#define IP6T_FUNCTION_MAXNAMELEN XT_FUNCTION_MAXNAMELEN -#define IP6T_TABLE_MAXNAMELEN XT_TABLE_MAXNAMELEN -#define ip6t_match xt_match -#define ip6t_target xt_target -#define ip6t_table xt_table -#define ip6t_get_revision xt_get_revision -#define ip6t_entry_match xt_entry_match -#define ip6t_entry_target xt_entry_target -#define ip6t_standard_target xt_standard_target -#define ip6t_error_target xt_error_target -#define ip6t_counters xt_counters -#define IP6T_CONTINUE XT_CONTINUE -#define IP6T_RETURN XT_RETURN - -/* Pre-iptables-1.4.0 */ -#include -#define ip6t_tcp xt_tcp -#define ip6t_udp xt_udp -#define IP6T_TCP_INV_SRCPT XT_TCP_INV_SRCPT -#define IP6T_TCP_INV_DSTPT XT_TCP_INV_DSTPT -#define IP6T_TCP_INV_FLAGS XT_TCP_INV_FLAGS -#define IP6T_TCP_INV_OPTION XT_TCP_INV_OPTION -#define IP6T_TCP_INV_MASK XT_TCP_INV_MASK -#define IP6T_UDP_INV_SRCPT XT_UDP_INV_SRCPT -#define IP6T_UDP_INV_DSTPT XT_UDP_INV_DSTPT -#define IP6T_UDP_INV_MASK XT_UDP_INV_MASK - -#define ip6t_counters_info xt_counters_info -#define IP6T_STANDARD_TARGET XT_STANDARD_TARGET -#define IP6T_ERROR_TARGET XT_ERROR_TARGET -#define IP6T_MATCH_ITERATE(e, fn, args...) \ - XT_MATCH_ITERATE(struct ip6t_entry, e, fn, ## args) -#define IP6T_ENTRY_ITERATE(entries, size, fn, args...) \ - XT_ENTRY_ITERATE(struct ip6t_entry, entries, size, fn, ## args) -#endif - -/* Yes, Virginia, you have to zero the padding. */ -struct ip6t_ip6 { - /* Source and destination IP6 addr */ - struct in6_addr src, dst; - /* Mask for src and dest IP6 addr */ - struct in6_addr smsk, dmsk; - char iniface[IFNAMSIZ], outiface[IFNAMSIZ]; - unsigned char iniface_mask[IFNAMSIZ], outiface_mask[IFNAMSIZ]; - - /* Upper protocol number - * - The allowed value is 0 (any) or protocol number of last parsable - * header, which is 50 (ESP), 59 (No Next Header), 135 (MH), or - * the non IPv6 extension headers. - * - The protocol numbers of IPv6 extension headers except of ESP and - * MH do not match any packets. - * - You also need to set IP6T_FLAGS_PROTO to "flags" to check protocol. - */ - __u16 proto; - /* TOS to match iff flags & IP6T_F_TOS */ - __u8 tos; - - /* Flags word */ - __u8 flags; - /* Inverse flags */ - __u8 invflags; -}; - -/* Values for "flag" field in struct ip6t_ip6 (general ip6 structure). */ -#define IP6T_F_PROTO 0x01 /* Set if rule cares about upper - protocols */ -#define IP6T_F_TOS 0x02 /* Match the TOS. */ -#define IP6T_F_GOTO 0x04 /* Set if jump is a goto */ -#define IP6T_F_MASK 0x07 /* All possible flag bits mask. */ - -/* Values for "inv" field in struct ip6t_ip6. */ -#define IP6T_INV_VIA_IN 0x01 /* Invert the sense of IN IFACE. */ -#define IP6T_INV_VIA_OUT 0x02 /* Invert the sense of OUT IFACE */ -#define IP6T_INV_TOS 0x04 /* Invert the sense of TOS. */ -#define IP6T_INV_SRCIP 0x08 /* Invert the sense of SRC IP. */ -#define IP6T_INV_DSTIP 0x10 /* Invert the sense of DST OP. */ -#define IP6T_INV_FRAG 0x20 /* Invert the sense of FRAG. */ -#define IP6T_INV_PROTO XT_INV_PROTO -#define IP6T_INV_MASK 0x7F /* All possible flag bits mask. */ - -/* This structure defines each of the firewall rules. Consists of 3 - parts which are 1) general IP header stuff 2) match specific - stuff 3) the target to perform if the rule matches */ -struct ip6t_entry { - struct ip6t_ip6 ipv6; - - /* Mark with fields that we care about. */ - unsigned int nfcache; - - /* Size of ipt_entry + matches */ - __u16 target_offset; - /* Size of ipt_entry + matches + target */ - __u16 next_offset; - - /* Back pointer */ - unsigned int comefrom; - - /* Packet and byte counters. */ - struct xt_counters counters; - - /* The matches (if any), then the target. */ - unsigned char elems[0]; -}; - -/* Standard entry */ -struct ip6t_standard { - struct ip6t_entry entry; - struct xt_standard_target target; -}; - -struct ip6t_error { - struct ip6t_entry entry; - struct xt_error_target target; -}; - -#define IP6T_ENTRY_INIT(__size) \ -{ \ - .target_offset = sizeof(struct ip6t_entry), \ - .next_offset = (__size), \ -} - -#define IP6T_STANDARD_INIT(__verdict) \ -{ \ - .entry = IP6T_ENTRY_INIT(sizeof(struct ip6t_standard)), \ - .target = XT_TARGET_INIT(XT_STANDARD_TARGET, \ - sizeof(struct xt_standard_target)), \ - .target.verdict = -(__verdict) - 1, \ -} - -#define IP6T_ERROR_INIT \ -{ \ - .entry = IP6T_ENTRY_INIT(sizeof(struct ip6t_error)), \ - .target = XT_TARGET_INIT(XT_ERROR_TARGET, \ - sizeof(struct xt_error_target)), \ - .target.errorname = "ERROR", \ -} - -/* - * New IP firewall options for [gs]etsockopt at the RAW IP level. - * Unlike BSD Linux inherits IP options so you don't have to use - * a raw socket for this. Instead we check rights in the calls. - * - * ATTENTION: check linux/in6.h before adding new number here. - */ -#define IP6T_BASE_CTL 64 - -#define IP6T_SO_SET_REPLACE (IP6T_BASE_CTL) -#define IP6T_SO_SET_ADD_COUNTERS (IP6T_BASE_CTL + 1) -#define IP6T_SO_SET_MAX IP6T_SO_SET_ADD_COUNTERS - -#define IP6T_SO_GET_INFO (IP6T_BASE_CTL) -#define IP6T_SO_GET_ENTRIES (IP6T_BASE_CTL + 1) -#define IP6T_SO_GET_REVISION_MATCH (IP6T_BASE_CTL + 4) -#define IP6T_SO_GET_REVISION_TARGET (IP6T_BASE_CTL + 5) -#define IP6T_SO_GET_MAX IP6T_SO_GET_REVISION_TARGET - -/* ICMP matching stuff */ -struct ip6t_icmp { - __u8 type; /* type to match */ - __u8 code[2]; /* range of code */ - __u8 invflags; /* Inverse flags */ -}; - -/* Values for "inv" field for struct ipt_icmp. */ -#define IP6T_ICMP_INV 0x01 /* Invert the sense of type/code test */ - -/* The argument to IP6T_SO_GET_INFO */ -struct ip6t_getinfo { - /* Which table: caller fills this in. */ - char name[XT_TABLE_MAXNAMELEN]; - - /* Kernel fills these in. */ - /* Which hook entry points are valid: bitmask */ - unsigned int valid_hooks; - - /* Hook entry points: one per netfilter hook. */ - unsigned int hook_entry[NF_INET_NUMHOOKS]; - - /* Underflow points. */ - unsigned int underflow[NF_INET_NUMHOOKS]; - - /* Number of entries */ - unsigned int num_entries; - - /* Size of entries. */ - unsigned int size; -}; - -/* The argument to IP6T_SO_SET_REPLACE. */ -struct ip6t_replace { - /* Which table. */ - char name[XT_TABLE_MAXNAMELEN]; - - /* Which hook entry points are valid: bitmask. You can't - change this. */ - unsigned int valid_hooks; - - /* Number of entries */ - unsigned int num_entries; - - /* Total size of new entries */ - unsigned int size; - - /* Hook entry points. */ - unsigned int hook_entry[NF_INET_NUMHOOKS]; - - /* Underflow points. */ - unsigned int underflow[NF_INET_NUMHOOKS]; - - /* Information about old entries: */ - /* Number of counters (must be equal to current number of entries). */ - unsigned int num_counters; - /* The old entries' counters. */ - struct xt_counters __user *counters; - - /* The entries (hang off end: not really an array). */ - struct ip6t_entry entries[0]; -}; - -/* The argument to IP6T_SO_GET_ENTRIES. */ -struct ip6t_get_entries { - /* Which table: user fills this in. */ - char name[XT_TABLE_MAXNAMELEN]; - - /* User fills this in: total entry size. */ - unsigned int size; - - /* The entries. */ - struct ip6t_entry entrytable[0]; -}; - -/* Helper functions */ -static __inline__ struct xt_entry_target * -ip6t_get_target(struct ip6t_entry *e) -{ - return (void *)e + e->target_offset; -} - -/* - * Main firewall chains definitions and global var's definitions. - */ - -#ifdef __KERNEL__ #include +#include + extern void ip6t_init(void) __init; extern void *ip6t_alloc_initial_table(const struct xt_table *); @@ -327,5 +76,4 @@ compat_ip6t_get_target(struct compat_ip6t_entry *e) } #endif /* CONFIG_COMPAT */ -#endif /*__KERNEL__*/ #endif /* _IP6_TABLES_H */ diff --git a/include/uapi/linux/netfilter_ipv6/Kbuild b/include/uapi/linux/netfilter_ipv6/Kbuild index aafaa5aa54d..75a668ca235 100644 --- a/include/uapi/linux/netfilter_ipv6/Kbuild +++ b/include/uapi/linux/netfilter_ipv6/Kbuild @@ -1 +1,13 @@ # UAPI Header export list +header-y += ip6_tables.h +header-y += ip6t_HL.h +header-y += ip6t_LOG.h +header-y += ip6t_NPT.h +header-y += ip6t_REJECT.h +header-y += ip6t_ah.h +header-y += ip6t_frag.h +header-y += ip6t_hl.h +header-y += ip6t_ipv6header.h +header-y += ip6t_mh.h +header-y += ip6t_opts.h +header-y += ip6t_rt.h diff --git a/include/uapi/linux/netfilter_ipv6/ip6_tables.h b/include/uapi/linux/netfilter_ipv6/ip6_tables.h new file mode 100644 index 00000000000..bf1ef65cc58 --- /dev/null +++ b/include/uapi/linux/netfilter_ipv6/ip6_tables.h @@ -0,0 +1,267 @@ +/* + * 25-Jul-1998 Major changes to allow for ip chain table + * + * 3-Jan-2000 Named tables to allow packet selection for different uses. + */ + +/* + * Format of an IP6 firewall descriptor + * + * src, dst, src_mask, dst_mask are always stored in network byte order. + * flags are stored in host byte order (of course). + * Port numbers are stored in HOST byte order. + */ + +#ifndef _UAPI_IP6_TABLES_H +#define _UAPI_IP6_TABLES_H + +#include +#include +#include + +#include + +#ifndef __KERNEL__ +#define IP6T_FUNCTION_MAXNAMELEN XT_FUNCTION_MAXNAMELEN +#define IP6T_TABLE_MAXNAMELEN XT_TABLE_MAXNAMELEN +#define ip6t_match xt_match +#define ip6t_target xt_target +#define ip6t_table xt_table +#define ip6t_get_revision xt_get_revision +#define ip6t_entry_match xt_entry_match +#define ip6t_entry_target xt_entry_target +#define ip6t_standard_target xt_standard_target +#define ip6t_error_target xt_error_target +#define ip6t_counters xt_counters +#define IP6T_CONTINUE XT_CONTINUE +#define IP6T_RETURN XT_RETURN + +/* Pre-iptables-1.4.0 */ +#include +#define ip6t_tcp xt_tcp +#define ip6t_udp xt_udp +#define IP6T_TCP_INV_SRCPT XT_TCP_INV_SRCPT +#define IP6T_TCP_INV_DSTPT XT_TCP_INV_DSTPT +#define IP6T_TCP_INV_FLAGS XT_TCP_INV_FLAGS +#define IP6T_TCP_INV_OPTION XT_TCP_INV_OPTION +#define IP6T_TCP_INV_MASK XT_TCP_INV_MASK +#define IP6T_UDP_INV_SRCPT XT_UDP_INV_SRCPT +#define IP6T_UDP_INV_DSTPT XT_UDP_INV_DSTPT +#define IP6T_UDP_INV_MASK XT_UDP_INV_MASK + +#define ip6t_counters_info xt_counters_info +#define IP6T_STANDARD_TARGET XT_STANDARD_TARGET +#define IP6T_ERROR_TARGET XT_ERROR_TARGET +#define IP6T_MATCH_ITERATE(e, fn, args...) \ + XT_MATCH_ITERATE(struct ip6t_entry, e, fn, ## args) +#define IP6T_ENTRY_ITERATE(entries, size, fn, args...) \ + XT_ENTRY_ITERATE(struct ip6t_entry, entries, size, fn, ## args) +#endif + +/* Yes, Virginia, you have to zero the padding. */ +struct ip6t_ip6 { + /* Source and destination IP6 addr */ + struct in6_addr src, dst; + /* Mask for src and dest IP6 addr */ + struct in6_addr smsk, dmsk; + char iniface[IFNAMSIZ], outiface[IFNAMSIZ]; + unsigned char iniface_mask[IFNAMSIZ], outiface_mask[IFNAMSIZ]; + + /* Upper protocol number + * - The allowed value is 0 (any) or protocol number of last parsable + * header, which is 50 (ESP), 59 (No Next Header), 135 (MH), or + * the non IPv6 extension headers. + * - The protocol numbers of IPv6 extension headers except of ESP and + * MH do not match any packets. + * - You also need to set IP6T_FLAGS_PROTO to "flags" to check protocol. + */ + __u16 proto; + /* TOS to match iff flags & IP6T_F_TOS */ + __u8 tos; + + /* Flags word */ + __u8 flags; + /* Inverse flags */ + __u8 invflags; +}; + +/* Values for "flag" field in struct ip6t_ip6 (general ip6 structure). */ +#define IP6T_F_PROTO 0x01 /* Set if rule cares about upper + protocols */ +#define IP6T_F_TOS 0x02 /* Match the TOS. */ +#define IP6T_F_GOTO 0x04 /* Set if jump is a goto */ +#define IP6T_F_MASK 0x07 /* All possible flag bits mask. */ + +/* Values for "inv" field in struct ip6t_ip6. */ +#define IP6T_INV_VIA_IN 0x01 /* Invert the sense of IN IFACE. */ +#define IP6T_INV_VIA_OUT 0x02 /* Invert the sense of OUT IFACE */ +#define IP6T_INV_TOS 0x04 /* Invert the sense of TOS. */ +#define IP6T_INV_SRCIP 0x08 /* Invert the sense of SRC IP. */ +#define IP6T_INV_DSTIP 0x10 /* Invert the sense of DST OP. */ +#define IP6T_INV_FRAG 0x20 /* Invert the sense of FRAG. */ +#define IP6T_INV_PROTO XT_INV_PROTO +#define IP6T_INV_MASK 0x7F /* All possible flag bits mask. */ + +/* This structure defines each of the firewall rules. Consists of 3 + parts which are 1) general IP header stuff 2) match specific + stuff 3) the target to perform if the rule matches */ +struct ip6t_entry { + struct ip6t_ip6 ipv6; + + /* Mark with fields that we care about. */ + unsigned int nfcache; + + /* Size of ipt_entry + matches */ + __u16 target_offset; + /* Size of ipt_entry + matches + target */ + __u16 next_offset; + + /* Back pointer */ + unsigned int comefrom; + + /* Packet and byte counters. */ + struct xt_counters counters; + + /* The matches (if any), then the target. */ + unsigned char elems[0]; +}; + +/* Standard entry */ +struct ip6t_standard { + struct ip6t_entry entry; + struct xt_standard_target target; +}; + +struct ip6t_error { + struct ip6t_entry entry; + struct xt_error_target target; +}; + +#define IP6T_ENTRY_INIT(__size) \ +{ \ + .target_offset = sizeof(struct ip6t_entry), \ + .next_offset = (__size), \ +} + +#define IP6T_STANDARD_INIT(__verdict) \ +{ \ + .entry = IP6T_ENTRY_INIT(sizeof(struct ip6t_standard)), \ + .target = XT_TARGET_INIT(XT_STANDARD_TARGET, \ + sizeof(struct xt_standard_target)), \ + .target.verdict = -(__verdict) - 1, \ +} + +#define IP6T_ERROR_INIT \ +{ \ + .entry = IP6T_ENTRY_INIT(sizeof(struct ip6t_error)), \ + .target = XT_TARGET_INIT(XT_ERROR_TARGET, \ + sizeof(struct xt_error_target)), \ + .target.errorname = "ERROR", \ +} + +/* + * New IP firewall options for [gs]etsockopt at the RAW IP level. + * Unlike BSD Linux inherits IP options so you don't have to use + * a raw socket for this. Instead we check rights in the calls. + * + * ATTENTION: check linux/in6.h before adding new number here. + */ +#define IP6T_BASE_CTL 64 + +#define IP6T_SO_SET_REPLACE (IP6T_BASE_CTL) +#define IP6T_SO_SET_ADD_COUNTERS (IP6T_BASE_CTL + 1) +#define IP6T_SO_SET_MAX IP6T_SO_SET_ADD_COUNTERS + +#define IP6T_SO_GET_INFO (IP6T_BASE_CTL) +#define IP6T_SO_GET_ENTRIES (IP6T_BASE_CTL + 1) +#define IP6T_SO_GET_REVISION_MATCH (IP6T_BASE_CTL + 4) +#define IP6T_SO_GET_REVISION_TARGET (IP6T_BASE_CTL + 5) +#define IP6T_SO_GET_MAX IP6T_SO_GET_REVISION_TARGET + +/* ICMP matching stuff */ +struct ip6t_icmp { + __u8 type; /* type to match */ + __u8 code[2]; /* range of code */ + __u8 invflags; /* Inverse flags */ +}; + +/* Values for "inv" field for struct ipt_icmp. */ +#define IP6T_ICMP_INV 0x01 /* Invert the sense of type/code test */ + +/* The argument to IP6T_SO_GET_INFO */ +struct ip6t_getinfo { + /* Which table: caller fills this in. */ + char name[XT_TABLE_MAXNAMELEN]; + + /* Kernel fills these in. */ + /* Which hook entry points are valid: bitmask */ + unsigned int valid_hooks; + + /* Hook entry points: one per netfilter hook. */ + unsigned int hook_entry[NF_INET_NUMHOOKS]; + + /* Underflow points. */ + unsigned int underflow[NF_INET_NUMHOOKS]; + + /* Number of entries */ + unsigned int num_entries; + + /* Size of entries. */ + unsigned int size; +}; + +/* The argument to IP6T_SO_SET_REPLACE. */ +struct ip6t_replace { + /* Which table. */ + char name[XT_TABLE_MAXNAMELEN]; + + /* Which hook entry points are valid: bitmask. You can't + change this. */ + unsigned int valid_hooks; + + /* Number of entries */ + unsigned int num_entries; + + /* Total size of new entries */ + unsigned int size; + + /* Hook entry points. */ + unsigned int hook_entry[NF_INET_NUMHOOKS]; + + /* Underflow points. */ + unsigned int underflow[NF_INET_NUMHOOKS]; + + /* Information about old entries: */ + /* Number of counters (must be equal to current number of entries). */ + unsigned int num_counters; + /* The old entries' counters. */ + struct xt_counters __user *counters; + + /* The entries (hang off end: not really an array). */ + struct ip6t_entry entries[0]; +}; + +/* The argument to IP6T_SO_GET_ENTRIES. */ +struct ip6t_get_entries { + /* Which table: user fills this in. */ + char name[XT_TABLE_MAXNAMELEN]; + + /* User fills this in: total entry size. */ + unsigned int size; + + /* The entries. */ + struct ip6t_entry entrytable[0]; +}; + +/* Helper functions */ +static __inline__ struct xt_entry_target * +ip6t_get_target(struct ip6t_entry *e) +{ + return (void *)e + e->target_offset; +} + +/* + * Main firewall chains definitions and global var's definitions. + */ + +#endif /* _UAPI_IP6_TABLES_H */ diff --git a/include/linux/netfilter_ipv6/ip6t_HL.h b/include/uapi/linux/netfilter_ipv6/ip6t_HL.h similarity index 100% rename from include/linux/netfilter_ipv6/ip6t_HL.h rename to include/uapi/linux/netfilter_ipv6/ip6t_HL.h diff --git a/include/linux/netfilter_ipv6/ip6t_LOG.h b/include/uapi/linux/netfilter_ipv6/ip6t_LOG.h similarity index 100% rename from include/linux/netfilter_ipv6/ip6t_LOG.h rename to include/uapi/linux/netfilter_ipv6/ip6t_LOG.h diff --git a/include/linux/netfilter_ipv6/ip6t_NPT.h b/include/uapi/linux/netfilter_ipv6/ip6t_NPT.h similarity index 100% rename from include/linux/netfilter_ipv6/ip6t_NPT.h rename to include/uapi/linux/netfilter_ipv6/ip6t_NPT.h diff --git a/include/linux/netfilter_ipv6/ip6t_REJECT.h b/include/uapi/linux/netfilter_ipv6/ip6t_REJECT.h similarity index 100% rename from include/linux/netfilter_ipv6/ip6t_REJECT.h rename to include/uapi/linux/netfilter_ipv6/ip6t_REJECT.h diff --git a/include/linux/netfilter_ipv6/ip6t_ah.h b/include/uapi/linux/netfilter_ipv6/ip6t_ah.h similarity index 100% rename from include/linux/netfilter_ipv6/ip6t_ah.h rename to include/uapi/linux/netfilter_ipv6/ip6t_ah.h diff --git a/include/linux/netfilter_ipv6/ip6t_frag.h b/include/uapi/linux/netfilter_ipv6/ip6t_frag.h similarity index 100% rename from include/linux/netfilter_ipv6/ip6t_frag.h rename to include/uapi/linux/netfilter_ipv6/ip6t_frag.h diff --git a/include/linux/netfilter_ipv6/ip6t_hl.h b/include/uapi/linux/netfilter_ipv6/ip6t_hl.h similarity index 100% rename from include/linux/netfilter_ipv6/ip6t_hl.h rename to include/uapi/linux/netfilter_ipv6/ip6t_hl.h diff --git a/include/linux/netfilter_ipv6/ip6t_ipv6header.h b/include/uapi/linux/netfilter_ipv6/ip6t_ipv6header.h similarity index 100% rename from include/linux/netfilter_ipv6/ip6t_ipv6header.h rename to include/uapi/linux/netfilter_ipv6/ip6t_ipv6header.h diff --git a/include/linux/netfilter_ipv6/ip6t_mh.h b/include/uapi/linux/netfilter_ipv6/ip6t_mh.h similarity index 100% rename from include/linux/netfilter_ipv6/ip6t_mh.h rename to include/uapi/linux/netfilter_ipv6/ip6t_mh.h diff --git a/include/linux/netfilter_ipv6/ip6t_opts.h b/include/uapi/linux/netfilter_ipv6/ip6t_opts.h similarity index 100% rename from include/linux/netfilter_ipv6/ip6t_opts.h rename to include/uapi/linux/netfilter_ipv6/ip6t_opts.h diff --git a/include/linux/netfilter_ipv6/ip6t_rt.h b/include/uapi/linux/netfilter_ipv6/ip6t_rt.h similarity index 100% rename from include/linux/netfilter_ipv6/ip6t_rt.h rename to include/uapi/linux/netfilter_ipv6/ip6t_rt.h From 611128eb3927941abcfdca5b7515cf02aab601ec Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 9 Oct 2012 09:49:05 +0100 Subject: [PATCH 52/61] UAPI: (Scripted) Disintegrate include/linux/tc_act Signed-off-by: David Howells Acked-by: Arnd Bergmann Acked-by: Thomas Gleixner Acked-by: Michael Kerrisk Acked-by: Paul E. McKenney Acked-by: Dave Jones --- include/linux/tc_act/Kbuild | 7 ------- include/uapi/linux/tc_act/Kbuild | 7 +++++++ include/{ => uapi}/linux/tc_act/tc_csum.h | 0 include/{ => uapi}/linux/tc_act/tc_gact.h | 0 include/{ => uapi}/linux/tc_act/tc_ipt.h | 0 include/{ => uapi}/linux/tc_act/tc_mirred.h | 0 include/{ => uapi}/linux/tc_act/tc_nat.h | 0 include/{ => uapi}/linux/tc_act/tc_pedit.h | 0 include/{ => uapi}/linux/tc_act/tc_skbedit.h | 0 9 files changed, 7 insertions(+), 7 deletions(-) rename include/{ => uapi}/linux/tc_act/tc_csum.h (100%) rename include/{ => uapi}/linux/tc_act/tc_gact.h (100%) rename include/{ => uapi}/linux/tc_act/tc_ipt.h (100%) rename include/{ => uapi}/linux/tc_act/tc_mirred.h (100%) rename include/{ => uapi}/linux/tc_act/tc_nat.h (100%) rename include/{ => uapi}/linux/tc_act/tc_pedit.h (100%) rename include/{ => uapi}/linux/tc_act/tc_skbedit.h (100%) diff --git a/include/linux/tc_act/Kbuild b/include/linux/tc_act/Kbuild index 67b501c302b..e69de29bb2d 100644 --- a/include/linux/tc_act/Kbuild +++ b/include/linux/tc_act/Kbuild @@ -1,7 +0,0 @@ -header-y += tc_gact.h -header-y += tc_ipt.h -header-y += tc_mirred.h -header-y += tc_pedit.h -header-y += tc_nat.h -header-y += tc_skbedit.h -header-y += tc_csum.h diff --git a/include/uapi/linux/tc_act/Kbuild b/include/uapi/linux/tc_act/Kbuild index aafaa5aa54d..0623ec4e728 100644 --- a/include/uapi/linux/tc_act/Kbuild +++ b/include/uapi/linux/tc_act/Kbuild @@ -1 +1,8 @@ # UAPI Header export list +header-y += tc_csum.h +header-y += tc_gact.h +header-y += tc_ipt.h +header-y += tc_mirred.h +header-y += tc_nat.h +header-y += tc_pedit.h +header-y += tc_skbedit.h diff --git a/include/linux/tc_act/tc_csum.h b/include/uapi/linux/tc_act/tc_csum.h similarity index 100% rename from include/linux/tc_act/tc_csum.h rename to include/uapi/linux/tc_act/tc_csum.h diff --git a/include/linux/tc_act/tc_gact.h b/include/uapi/linux/tc_act/tc_gact.h similarity index 100% rename from include/linux/tc_act/tc_gact.h rename to include/uapi/linux/tc_act/tc_gact.h diff --git a/include/linux/tc_act/tc_ipt.h b/include/uapi/linux/tc_act/tc_ipt.h similarity index 100% rename from include/linux/tc_act/tc_ipt.h rename to include/uapi/linux/tc_act/tc_ipt.h diff --git a/include/linux/tc_act/tc_mirred.h b/include/uapi/linux/tc_act/tc_mirred.h similarity index 100% rename from include/linux/tc_act/tc_mirred.h rename to include/uapi/linux/tc_act/tc_mirred.h diff --git a/include/linux/tc_act/tc_nat.h b/include/uapi/linux/tc_act/tc_nat.h similarity index 100% rename from include/linux/tc_act/tc_nat.h rename to include/uapi/linux/tc_act/tc_nat.h diff --git a/include/linux/tc_act/tc_pedit.h b/include/uapi/linux/tc_act/tc_pedit.h similarity index 100% rename from include/linux/tc_act/tc_pedit.h rename to include/uapi/linux/tc_act/tc_pedit.h diff --git a/include/linux/tc_act/tc_skbedit.h b/include/uapi/linux/tc_act/tc_skbedit.h similarity index 100% rename from include/linux/tc_act/tc_skbedit.h rename to include/uapi/linux/tc_act/tc_skbedit.h From b6bf56c6be2be7ccebebfd91b1f11889296cbd9e Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 9 Oct 2012 09:49:06 +0100 Subject: [PATCH 53/61] UAPI: (Scripted) Disintegrate include/linux/tc_ematch Signed-off-by: David Howells Acked-by: Arnd Bergmann Acked-by: Thomas Gleixner Acked-by: Michael Kerrisk Acked-by: Paul E. McKenney Acked-by: Dave Jones --- include/linux/tc_ematch/Kbuild | 4 ---- include/uapi/linux/tc_ematch/Kbuild | 4 ++++ include/{ => uapi}/linux/tc_ematch/tc_em_cmp.h | 0 include/{ => uapi}/linux/tc_ematch/tc_em_meta.h | 0 include/{ => uapi}/linux/tc_ematch/tc_em_nbyte.h | 0 include/{ => uapi}/linux/tc_ematch/tc_em_text.h | 0 6 files changed, 4 insertions(+), 4 deletions(-) rename include/{ => uapi}/linux/tc_ematch/tc_em_cmp.h (100%) rename include/{ => uapi}/linux/tc_ematch/tc_em_meta.h (100%) rename include/{ => uapi}/linux/tc_ematch/tc_em_nbyte.h (100%) rename include/{ => uapi}/linux/tc_ematch/tc_em_text.h (100%) diff --git a/include/linux/tc_ematch/Kbuild b/include/linux/tc_ematch/Kbuild index 4a58a1c32a0..e69de29bb2d 100644 --- a/include/linux/tc_ematch/Kbuild +++ b/include/linux/tc_ematch/Kbuild @@ -1,4 +0,0 @@ -header-y += tc_em_cmp.h -header-y += tc_em_meta.h -header-y += tc_em_nbyte.h -header-y += tc_em_text.h diff --git a/include/uapi/linux/tc_ematch/Kbuild b/include/uapi/linux/tc_ematch/Kbuild index aafaa5aa54d..53fca392553 100644 --- a/include/uapi/linux/tc_ematch/Kbuild +++ b/include/uapi/linux/tc_ematch/Kbuild @@ -1 +1,5 @@ # UAPI Header export list +header-y += tc_em_cmp.h +header-y += tc_em_meta.h +header-y += tc_em_nbyte.h +header-y += tc_em_text.h diff --git a/include/linux/tc_ematch/tc_em_cmp.h b/include/uapi/linux/tc_ematch/tc_em_cmp.h similarity index 100% rename from include/linux/tc_ematch/tc_em_cmp.h rename to include/uapi/linux/tc_ematch/tc_em_cmp.h diff --git a/include/linux/tc_ematch/tc_em_meta.h b/include/uapi/linux/tc_ematch/tc_em_meta.h similarity index 100% rename from include/linux/tc_ematch/tc_em_meta.h rename to include/uapi/linux/tc_ematch/tc_em_meta.h diff --git a/include/linux/tc_ematch/tc_em_nbyte.h b/include/uapi/linux/tc_ematch/tc_em_nbyte.h similarity index 100% rename from include/linux/tc_ematch/tc_em_nbyte.h rename to include/uapi/linux/tc_ematch/tc_em_nbyte.h diff --git a/include/linux/tc_ematch/tc_em_text.h b/include/uapi/linux/tc_ematch/tc_em_text.h similarity index 100% rename from include/linux/tc_ematch/tc_em_text.h rename to include/uapi/linux/tc_ematch/tc_em_text.h From 91e2b89b04d3f568dc4ec99f242a4a742061e399 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Wed, 3 Oct 2012 00:57:23 +0000 Subject: [PATCH 54/61] ixgbevf: Set the netdev number of Tx queues The driver was not setting the number of real Tx queues in the net_device structure. This caused some serious issues such as Tx hangs and extremely poor performance with some usages of the driver. The issue is best observed by running: iperf -c -P Where n is greater than one. The greater the value of n the more likely the problem is to show up. Signed-off-by: Greg Rose Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 0ee9bd4819f..de1ad506665 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -1747,6 +1747,7 @@ err_tx_ring_allocation: **/ static int ixgbevf_set_interrupt_capability(struct ixgbevf_adapter *adapter) { + struct net_device *netdev = adapter->netdev; int err = 0; int vector, v_budget; @@ -1775,6 +1776,12 @@ static int ixgbevf_set_interrupt_capability(struct ixgbevf_adapter *adapter) ixgbevf_acquire_msix_vectors(adapter, v_budget); + err = netif_set_real_num_tx_queues(netdev, adapter->num_tx_queues); + if (err) + goto out; + + err = netif_set_real_num_rx_queues(netdev, adapter->num_rx_queues); + out: return err; } From c88887e09099b7f77659cb6aa1b9d670a7632f4b Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Wed, 22 Aug 2012 02:04:37 +0000 Subject: [PATCH 55/61] ixgbe/ixgbevf: Limit maximum jumbo frame size to 9.5K to avoid Tx hangs This change limits the PF/VF driver to 9.5K max jumbo frame size in order prevent a possible Tx hang in the adapter when sending frames between pools. All of the parts in ixgbe support a maximum frame of 15.5K for standard traffic, however with SR-IOV or DCB enabled they should be limiting the MTU size to 9.5K. Instead of adding extra checks which would have to change the MTU when we go into or out of these modes it is preferred to just use a standard 9.5K MTU limit for all modes so that this extra overhead can be avoided. Signed-off-by: Alexander Duyck Tested-by: Phil Schmitt Tested-by: Sibai Li Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe.h | 2 +- drivers/net/ethernet/intel/ixgbevf/ixgbevf.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index 5bd26763554..30efc9f0f47 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -410,7 +410,7 @@ static inline u16 ixgbe_desc_unused(struct ixgbe_ring *ring) #define IXGBE_TX_CTXTDESC(R, i) \ (&(((struct ixgbe_adv_tx_context_desc *)((R)->desc))[i])) -#define IXGBE_MAX_JUMBO_FRAME_SIZE 16128 +#define IXGBE_MAX_JUMBO_FRAME_SIZE 9728 /* Maximum Supported Size 9.5KB */ #ifdef IXGBE_FCOE /* Use 3K as the baby jumbo frame size for FCoE */ #define IXGBE_FCOE_JUMBO_FRAME_SIZE 3072 diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h index 383b4e1cd17..4a9c9c28568 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h @@ -175,7 +175,7 @@ struct ixgbevf_q_vector { #define IXGBEVF_TX_CTXTDESC(R, i) \ (&(((struct ixgbe_adv_tx_context_desc *)((R)->desc))[i])) -#define IXGBE_MAX_JUMBO_FRAME_SIZE 16128 +#define IXGBE_MAX_JUMBO_FRAME_SIZE 9728 /* Maximum Supported Size 9.5KB */ #define OTHER_VECTOR 1 #define NON_Q_VECTORS (OTHER_VECTOR) From 16e310ae6ed352c4963b1f2413fcd88fa693eeda Mon Sep 17 00:00:00 2001 From: Bruce Allan Date: Tue, 9 Oct 2012 01:11:26 +0000 Subject: [PATCH 56/61] e1000e: add device IDs for i218 i218 is the next-generation LOM that will be available on systems with the Lynx Point LP Platform Controller Hub (PCH) chipset from Intel. This patch provides the initial support of those devices. Signed-off-by: Bruce Allan Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e1000e/hw.h | 2 ++ drivers/net/ethernet/intel/e1000e/netdev.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/drivers/net/ethernet/intel/e1000e/hw.h b/drivers/net/ethernet/intel/e1000e/hw.h index ed5b40985ed..d37bfd96c98 100644 --- a/drivers/net/ethernet/intel/e1000e/hw.h +++ b/drivers/net/ethernet/intel/e1000e/hw.h @@ -412,6 +412,8 @@ enum e1e_registers { #define E1000_DEV_ID_PCH2_LV_V 0x1503 #define E1000_DEV_ID_PCH_LPT_I217_LM 0x153A #define E1000_DEV_ID_PCH_LPT_I217_V 0x153B +#define E1000_DEV_ID_PCH_LPTLP_I218_LM 0x155A +#define E1000_DEV_ID_PCH_LPTLP_I218_V 0x1559 #define E1000_REVISION_4 4 diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index fb659dd8db0..de57a2ba6bd 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -6558,6 +6558,8 @@ static DEFINE_PCI_DEVICE_TABLE(e1000_pci_tbl) = { { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LPT_I217_LM), board_pch_lpt }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LPT_I217_V), board_pch_lpt }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LPTLP_I218_LM), board_pch_lpt }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LPTLP_I218_V), board_pch_lpt }, { 0, 0, 0, 0, 0, 0, 0 } /* terminate list */ }; From 0f796579848f284705cef8c9348f8178b37e1abe Mon Sep 17 00:00:00 2001 From: Haicheng Li Date: Mon, 8 Oct 2012 23:43:12 +0000 Subject: [PATCH 57/61] pch_gbe: Fix build error by selecting all the possible dependencies. Fengguang reported a kernel build failure as following: drivers/built-in.o: In function `pch_gbe_ioctl': pch_gbe_main.c:(.text+0x510370): undefined reference to `pch_ch_control_write' pch_gbe_main.c:(.text+0x510393): undefined reference to `pch_ch_control_write' pch_gbe_main.c:(.text+0x5103b3): undefined reference to `pch_ch_control_write' ... It's a regression by commit da1586461. The root cause is that the CONFIG_PPS is not set there, consequently CONFIG_PTP_1588_CLOCK can not be set anyway, which finally causes ptp_pch and pch_gbe_main build failures. As David prefers to use *select* to fix such module co-dependency issues, this patch explicitly selects all the possible dependencies of PCH_PTP. Reported-by: Fengguang Wu Reviewed-by: David S. Miller Signed-off-by: Haicheng Li Signed-off-by: David S. Miller --- drivers/net/ethernet/oki-semi/pch_gbe/Kconfig | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/Kconfig b/drivers/net/ethernet/oki-semi/pch_gbe/Kconfig index 97302419a37..5296cc8d3cb 100644 --- a/drivers/net/ethernet/oki-semi/pch_gbe/Kconfig +++ b/drivers/net/ethernet/oki-semi/pch_gbe/Kconfig @@ -26,6 +26,9 @@ if PCH_GBE config PCH_PTP bool "PCH PTP clock support" default n + depends on EXPERIMENTAL + select PPS + select PTP_1588_CLOCK select PTP_1588_CLOCK_PCH ---help--- Say Y here if you want to use Precision Time Protocol (PTP) in the From 1451ae6ef8bff77007338b2870bb3de2658afff8 Mon Sep 17 00:00:00 2001 From: Ajit Khaparde Date: Mon, 8 Oct 2012 18:18:21 +0000 Subject: [PATCH 58/61] be2net: Remove code that stops further access to BE NIC based on UE bits On certain platforms, BE hardware could falsely indicate UE. For BE family of NICs, do not set hw_error based on the UE bits. If there was a real fatal error, the corresponding h/w block will automatically go offline and stop traffic. Signed-off-by: Ajit Khaparde Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be_main.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index eb3f2cb3b93..d1b6cc58763 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -2129,8 +2129,11 @@ void be_detect_error(struct be_adapter *adapter) ue_hi = (ue_hi & ~ue_hi_mask); } - if (ue_lo || ue_hi || - sliport_status & SLIPORT_STATUS_ERR_MASK) { + /* On certain platforms BE hardware can indicate spurious UEs. + * Allow the h/w to stop working completely in case of a real UE. + * Hence not setting the hw_error for UE detection. + */ + if (sliport_status & SLIPORT_STATUS_ERR_MASK) { adapter->hw_error = true; dev_err(&adapter->pdev->dev, "Error detected in the card\n"); From 50fb47ae4d812d5eabc57539701dc0fd6404495e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 8 Oct 2012 21:20:48 +0000 Subject: [PATCH 59/61] farsync: fix support for over 30 cards We're trying to fill a 64 bit bitmap but only the lower 30 shifts work because the shift wraps around. Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- drivers/net/wan/farsync.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wan/farsync.c b/drivers/net/wan/farsync.c index 1a623183cbe..b6271325f80 100644 --- a/drivers/net/wan/farsync.c +++ b/drivers/net/wan/farsync.c @@ -597,7 +597,7 @@ fst_q_work_item(u64 * queue, int card_index) * bottom half for the card. Note the limitation of 64 cards. * That ought to be enough */ - mask = 1 << card_index; + mask = (u64)1 << card_index; *queue |= mask; spin_unlock_irqrestore(&fst_work_q_lock, flags); } From 4085a7f0a09c73ef3e9de113c9f04d78c79be645 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 8 Oct 2012 22:55:43 +0000 Subject: [PATCH 60/61] netdev/phy: Prototype of_mdio_find_bus() Ensure that of_mdio_find_bus() matches the prototype in the header (and stop sparse complaining) by including the header with the prototype. Signed-off-by: Mark Brown Signed-off-by: David S. Miller --- drivers/net/phy/mdio_bus.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index 170eb411ab5..c1ef3000ea6 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include From 5175a5e76bbdf20a614fb47ce7a38f0f39e70226 Mon Sep 17 00:00:00 2001 From: "jeff.liu" Date: Mon, 8 Oct 2012 18:57:27 +0000 Subject: [PATCH 61/61] RDS: fix rds-ping spinlock recursion This is the revised patch for fixing rds-ping spinlock recursion according to Venkat's suggestions. RDS ping/pong over TCP feature has been broken for years(2.6.39 to 3.6.0) since we have to set TCP cork and call kernel_sendmsg() between ping/pong which both need to lock "struct sock *sk". However, this lock has already been hold before rds_tcp_data_ready() callback is triggerred. As a result, we always facing spinlock resursion which would resulting in system panic. Given that RDS ping is only used to test the connectivity and not for serious performance measurements, we can queue the pong transmit to rds_wq as a delayed response. Reported-by: Dan Carpenter CC: Venkat Venkatsubra CC: David S. Miller CC: James Morris Signed-off-by: Jie Liu Signed-off-by: David S. Miller --- net/rds/send.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rds/send.c b/net/rds/send.c index 96531d4033a..88eace57dd6 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -1122,7 +1122,7 @@ rds_send_pong(struct rds_connection *conn, __be16 dport) rds_stats_inc(s_send_pong); if (!test_bit(RDS_LL_SEND_FULL, &conn->c_flags)) - rds_send_xmit(conn); + queue_delayed_work(rds_wq, &conn->c_send_w, 0); rds_message_put(rm); return 0;