From 5984be90046fa978d94a5ec08bbf8f760cff2b30 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Tue, 6 Mar 2012 15:50:49 +0200 Subject: [PATCH] mlx4_core: Report thermal error events Print an error message when a thermal error async event is reported by the HW. Signed-off-by: Jack Morgenstein Signed-off-by: Dotan Barak Signed-off-by: Roland Dreier --- drivers/net/ethernet/mellanox/mlx4/eq.c | 32 ++++++++++++++++++++++- drivers/net/ethernet/mellanox/mlx4/mlx4.h | 4 +++ include/linux/mlx4/device.h | 5 ++++ 3 files changed, 40 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index 8fa41f3082c..780b5adf8e7 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -79,7 +79,8 @@ enum { (1ull << MLX4_EVENT_TYPE_SRQ_LIMIT) | \ (1ull << MLX4_EVENT_TYPE_CMD) | \ (1ull << MLX4_EVENT_TYPE_COMM_CHANNEL) | \ - (1ull << MLX4_EVENT_TYPE_FLR_EVENT)) + (1ull << MLX4_EVENT_TYPE_FLR_EVENT) | \ + (1ull << MLX4_EVENT_TYPE_FATAL_WARNING)) static void eq_set_ci(struct mlx4_eq *eq, int req_not) { @@ -443,6 +444,35 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) queue_work(priv->mfunc.master.comm_wq, &priv->mfunc.master.slave_flr_event_work); break; + + case MLX4_EVENT_TYPE_FATAL_WARNING: + if (eqe->subtype == MLX4_FATAL_WARNING_SUBTYPE_WARMING) { + if (mlx4_is_master(dev)) + for (i = 0; i < dev->num_slaves; i++) { + mlx4_dbg(dev, "%s: Sending " + "MLX4_FATAL_WARNING_SUBTYPE_WARMING" + " to slave: %d\n", __func__, i); + if (i == dev->caps.function) + continue; + mlx4_slave_event(dev, i, eqe); + } + mlx4_err(dev, "Temperature Threshold was reached! " + "Threshold: %d celsius degrees; " + "Current Temperature: %d\n", + be16_to_cpu(eqe->event.warming.warning_threshold), + be16_to_cpu(eqe->event.warming.current_temperature)); + } else + mlx4_warn(dev, "Unhandled event FATAL WARNING (%02x), " + "subtype %02x on EQ %d at index %u. owner=%x, " + "nent=0x%x, slave=%x, ownership=%s\n", + eqe->type, eqe->subtype, eq->eqn, + eq->cons_index, eqe->owner, eq->nent, + eqe->slave_id, + !!(eqe->owner & 0x80) ^ + !!(eq->cons_index & eq->nent) ? "HW" : "SW"); + + break; + case MLX4_EVENT_TYPE_EEC_CATAS_ERROR: case MLX4_EVENT_TYPE_ECC_DETECT: default: diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index c92269f8c05..ac2d6061268 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -363,6 +363,10 @@ struct mlx4_eqe { struct { __be32 slave_id; } __packed flr_event; + struct { + __be16 current_temperature; + __be16 warning_threshold; + } __packed warming; } event; u8 slave_id; u8 reserved3[2]; diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 263d2ae21ac..4b3fbf12253 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -133,6 +133,7 @@ enum mlx4_event { MLX4_EVENT_TYPE_CMD = 0x0a, MLX4_EVENT_TYPE_VEP_UPDATE = 0x19, MLX4_EVENT_TYPE_COMM_CHANNEL = 0x18, + MLX4_EVENT_TYPE_FATAL_WARNING = 0x1b, MLX4_EVENT_TYPE_FLR_EVENT = 0x1c, MLX4_EVENT_TYPE_NONE = 0xff, }; @@ -142,6 +143,10 @@ enum { MLX4_PORT_CHANGE_SUBTYPE_ACTIVE = 4 }; +enum { + MLX4_FATAL_WARNING_SUBTYPE_WARMING = 0, +}; + enum { MLX4_PERM_LOCAL_READ = 1 << 10, MLX4_PERM_LOCAL_WRITE = 1 << 11,