DRBD: Add support for dissecting DRBD on InfiniBand

InfiniBand and RDMA over Converged Ethernet (RoCE) are handled by the
infiniband dissector. Register DRBD as a dissector for infiniband
payloads.

Not yet implemented:
* Dissection of data payloads. For P_DATA and similar packets, the data
  payload is transmitted over multiple packets. Only the first packet
  containing the DRBD header is dissected.
* Dissection of the InfiniBand Immediate Data. DRBD uses this for
  sequence numbers and for indicating which stream the packet belongs
  to.
* Any attempt to associate packets into conversations. This could be
  achieved when the connection packets have been captured, but would be
  very difficult otherwise.
* An association with infiniband.cm.req.serviceid.dport. When using
  RoCE, this is the port that the user specifies for the connection, so
  it would be useful to associate it with the DRBD conversation and show
  it. Otherwise it is tricky to identify DRBD connections.
This commit is contained in:
Joel Colledge 2021-12-07 14:54:45 +01:00 committed by A Wireshark GitLab Utility
parent aac695d62e
commit ddbafcd9ba
1 changed files with 173 additions and 25 deletions

View File

@ -384,6 +384,15 @@ static const value_string disk_state_names[] = {
#define DP_WSAME 512
#define DP_ZEROES 1024
#define DRBD_STREAM_DATA 0
#define DRBD_STREAM_CONTROL 1
static const value_string stream_names[] = {
{ DRBD_STREAM_DATA, "Data" },
{ DRBD_STREAM_CONTROL, "Control" },
{ 0, NULL }
};
static void dissect_drbd_message(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree);
static void read_state_twopc_prepare(tvbuff_t *tvb, packet_info *pinfo, drbd_conv *conv_data);
@ -580,6 +589,9 @@ static int hf_drbd_max_possible_size = -1;
static int hf_drbd_offset = -1;
static int hf_drbd_dagtag = -1;
static int hf_drbd_dagtag_node_id = -1;
static int hf_drbd_new_rx_descs_data = -1;
static int hf_drbd_new_rx_descs_control = -1;
static int hf_drbd_rx_desc_stolen_from = -1;
static int hf_drbd_state_role = -1;
static int hf_drbd_state_peer = -1;
@ -682,10 +694,12 @@ static gboolean is_bit_set_64(guint64 value, int bit) {
#define DRBD_FRAME_HEADER_80_LEN 8
#define DRBD_FRAME_HEADER_95_LEN 8
#define DRBD_FRAME_HEADER_100_LEN 16
#define DRBD_TRANSPORT_RDMA_PACKET_LEN 16
#define DRBD_MAGIC 0x83740267
#define DRBD_MAGIC_BIG 0x835a
#define DRBD_MAGIC_100 0x8620ec20
#define DRBD_TRANSPORT_RDMA_MAGIC 0x5257494E
static guint get_drbd_pdu_len(packet_info *pinfo _U_, tvbuff_t *tvb, int offset, void *data _U_)
{
@ -722,8 +736,7 @@ static int dissect_drbd(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree, voi
return tvb_reported_length(tvb);
}
static gboolean test_drbd_protocol(tvbuff_t *tvb, packet_info *pinfo,
proto_tree *tree, void *data _U_)
static gboolean test_drbd_header(tvbuff_t *tvb)
{
guint reported_length = tvb_reported_length(tvb);
if (reported_length < DRBD_FRAME_HEADER_80_LEN || tvb_captured_length(tvb) < 4) {
@ -743,13 +756,31 @@ static gboolean test_drbd_protocol(tvbuff_t *tvb, packet_info *pinfo,
match = TRUE;
}
if (match) {
conversation_t *conversation = find_or_create_conversation(pinfo);
conversation_set_dissector(conversation, drbd_handle);
dissect_drbd(tvb, pinfo, tree, data);
return match;
}
static gboolean test_drbd_rdma_control_header(tvbuff_t *tvb)
{
guint reported_length = tvb_reported_length(tvb);
if (reported_length < DRBD_TRANSPORT_RDMA_PACKET_LEN || tvb_captured_length(tvb) < 4) {
return FALSE;
}
return match;
guint32 magic32 = tvb_get_ntohl(tvb, 0);
return magic32 == DRBD_TRANSPORT_RDMA_MAGIC;
}
static gboolean test_drbd_protocol(tvbuff_t *tvb, packet_info *pinfo,
proto_tree *tree, void *data _U_)
{
if (!test_drbd_header(tvb))
return FALSE;
conversation_t *conversation = find_or_create_conversation(pinfo);
conversation_set_dissector(conversation, drbd_handle);
dissect_drbd(tvb, pinfo, tree, data);
return TRUE;
}
/*
@ -838,13 +869,23 @@ static tvbuff_t *decode_header(tvbuff_t *tvb, proto_tree *pt, guint16 *command)
return NULL;
}
static const value_payload_decoder *find_payload_decoder(guint16 command)
{
for (unsigned int i = 0; i < array_length(payload_decoders); i++) {
if (payload_decoders[i].value == command) {
return &payload_decoders[i];
}
}
return NULL;
}
static void dissect_drbd_message(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree)
{
proto_tree *drbd_tree;
proto_item *ti;
guint16 command = -1;
col_set_str(pinfo->cinfo, COL_PROTOCOL, "DRBD");
col_clear(pinfo->cinfo, COL_INFO);
ti = proto_tree_add_item(tree, proto_drbd, tvb, 0, -1, ENC_NA);
@ -855,27 +896,17 @@ static void dissect_drbd_message(tvbuff_t *tvb, packet_info *pinfo, proto_tree *
if (!payload_tvb)
return;
/*
* Indicate what kind of message this is.
*/
/* Indicate what kind of message this is. */
const gchar *packet_name = val_to_str(command, packet_names, "Unknown (0x%02x)");
const gchar *info_text = col_get_text(pinfo->cinfo, COL_INFO);
if (!info_text || !info_text[0]) {
col_append_ports(pinfo->cinfo, COL_INFO, PT_TCP, pinfo->srcport, pinfo->destport);
col_append_fstr(pinfo->cinfo, COL_INFO, " [%s]", packet_name);
} else {
col_append_sep_fstr(pinfo->cinfo, COL_INFO, " ", "[%s]", packet_name);
col_append_fstr(pinfo->cinfo, COL_INFO, " [%s]", packet_name);
}
col_set_fence(pinfo->cinfo, COL_INFO);
const value_payload_decoder *payload_decoder = NULL;
for (unsigned int i = 0; i < array_length(payload_decoders); i++) {
if (payload_decoders[i].value == command) {
payload_decoder = &payload_decoders[i];
break;
}
}
conversation_t *conv = find_drbd_conversation(pinfo);
drbd_conv *conv_data = (drbd_conv *)conversation_get_proto_data(conv, proto_drbd);
if (!conv_data) {
@ -884,6 +915,8 @@ static void dissect_drbd_message(tvbuff_t *tvb, packet_info *pinfo, proto_tree *
conversation_add_proto_data(conv, proto_drbd, conv_data);
}
const value_payload_decoder *payload_decoder = find_payload_decoder(command);
if (!PINFO_FD_VISITED(pinfo) && payload_decoder && payload_decoder->state_reader_fn)
(*payload_decoder->state_reader_fn) (payload_tvb, pinfo, conv_data);
@ -896,6 +929,104 @@ static void dissect_drbd_message(tvbuff_t *tvb, packet_info *pinfo, proto_tree *
(*payload_decoder->tree_fn) (payload_tvb, drbd_tree, conv_data);
}
static void drbd_ib_append_col_info(packet_info *pinfo, const gchar *packet_name)
{
const gchar *info_text;
col_clear(pinfo->cinfo, COL_INFO);
info_text = col_get_text(pinfo->cinfo, COL_INFO);
if (!info_text || !info_text[0])
col_append_fstr(pinfo->cinfo, COL_INFO, "QP=0x%06x [%s]", pinfo->destport, packet_name);
else
col_append_fstr(pinfo->cinfo, COL_INFO, " [%s]", packet_name);
col_set_fence(pinfo->cinfo, COL_INFO);
}
static void dissect_drbd_ib_message(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree)
{
proto_tree *drbd_tree;
proto_item *ti;
guint16 command = -1;
ti = proto_tree_add_item(tree, proto_drbd, tvb, 0, -1, ENC_NA);
drbd_tree = proto_item_add_subtree(ti, ett_drbd);
tvbuff_t *payload_tvb = decode_header(tvb, drbd_tree, &command);
if (!payload_tvb)
return;
/* Indicate what kind of message this is. */
const gchar *packet_name = val_to_str(command, packet_names, "Unknown (0x%02x)");
drbd_ib_append_col_info(pinfo, packet_name);
if (tree == NULL)
return;
proto_item_set_text(ti, "DRBD [%s]", packet_name);
const value_payload_decoder *payload_decoder = find_payload_decoder(command);
if (payload_decoder && payload_decoder->tree_fn)
(*payload_decoder->tree_fn) (payload_tvb, drbd_tree, NULL);
}
static void dissect_drbd_ib_control_message(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree)
{
proto_tree *drbd_tree;
proto_item *ti;
drbd_ib_append_col_info(pinfo, "RDMA Flow Control");
if (tree == NULL)
return;
ti = proto_tree_add_item(tree, proto_drbd, tvb, 0, -1, ENC_NA);
proto_item_set_text(ti, "DRBD [RDMA Flow Control]");
drbd_tree = proto_item_add_subtree(ti, ett_drbd);
proto_tree_add_item(drbd_tree, hf_drbd_new_rx_descs_data, tvb, 4, 4, ENC_BIG_ENDIAN);
proto_tree_add_item(drbd_tree, hf_drbd_new_rx_descs_control, tvb, 8, 4, ENC_BIG_ENDIAN);
proto_tree_add_item(drbd_tree, hf_drbd_rx_desc_stolen_from, tvb, 12, 4, ENC_BIG_ENDIAN);
}
static gboolean dissect_drbd_ib(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree, void *data)
{
if (!test_drbd_header(tvb) && !test_drbd_rdma_control_header(tvb))
return FALSE;
col_set_str(pinfo->cinfo, COL_PROTOCOL, "DRBD RDMA");
while (1) {
guint length;
gboolean is_control_packet = test_drbd_rdma_control_header(tvb);
if (is_control_packet)
length = DRBD_TRANSPORT_RDMA_PACKET_LEN;
else
length = get_drbd_pdu_len(pinfo, tvb, 0, data);
/* Was a header recognized? */
if (length == 0)
break;
tvbuff_t *packet_tvb = tvb_new_subset_length(tvb, 0, length);
if (is_control_packet)
dissect_drbd_ib_control_message(packet_tvb, pinfo, tree);
else
dissect_drbd_ib_message(packet_tvb, pinfo, tree);
/* Is there enough data for another DRBD packet? */
if (tvb_reported_length(tvb) < length + DRBD_FRAME_HEADER_80_LEN)
break;
/* Move to the next DRBD packet. */
tvb = tvb_new_subset_remaining(tvb, length);
}
return TRUE;
}
static void insert_twopc(tvbuff_t *tvb, packet_info *pinfo, drbd_conv *conv_data, enum drbd_packet command)
{
drbd_twopc_key *key = wmem_new0(wmem_file_scope(), drbd_twopc_key);
@ -951,9 +1082,13 @@ static void decode_payload_data(tvbuff_t *tvb, proto_tree *tree, drbd_conv *conv
decode_data_common(tvb, tree);
guint nbytes = tvb_reported_length_remaining(tvb, 24);
proto_tree_add_uint(tree, hf_drbd_size, tvb, 24, nbytes, nbytes);
proto_tree_add_bytes_format(tree, hf_drbd_data, tvb, 24,
-1, NULL, "Data (%u byte%s)", nbytes, plurality(nbytes, "", "s"));
proto_tree_add_uint(tree, hf_drbd_size, tvb, 0, 0, nbytes);
/* For infiniband the data is not in this tvb, so we do not show the data field. */
if (tvb_captured_length(tvb) >= 24 + nbytes) {
proto_tree_add_bytes_format(tree, hf_drbd_data, tvb, 24,
nbytes, NULL, "Data (%u byte%s)", nbytes, plurality(nbytes, "", "s"));
}
}
static void decode_payload_barrier(tvbuff_t *tvb, proto_tree *tree, drbd_conv *conv_data _U_)
@ -1127,6 +1262,9 @@ static void decode_payload_twopc_commit(tvbuff_t *tvb, proto_tree *tree, drbd_co
drbd_twopc_key key;
decode_twopc_request_common(tvb, tree, &key);
if (!conv_data)
return;
drbd_twopc_val *val = wmem_map_lookup(conv_data->twopc, &key);
if (!val)
return;
@ -1202,8 +1340,11 @@ static void decode_payload_data_wsame(tvbuff_t *tvb, proto_tree *tree, drbd_conv
proto_tree_add_item(tree, hf_drbd_size, tvb, 24, 4, ENC_BIG_ENDIAN);
guint nbytes = tvb_reported_length_remaining(tvb, 28);
proto_tree_add_bytes_format(tree, hf_drbd_data, tvb, 28,
-1, NULL, "Data (%u byte%s)", nbytes, plurality(nbytes, "", "s"));
/* For infiniband the data is not in this tvb, so we do not show the data field. */
if (tvb_captured_length(tvb) >= 28 + nbytes) {
proto_tree_add_bytes_format(tree, hf_drbd_data, tvb, 28,
nbytes, NULL, "Data (%u byte%s)", nbytes, plurality(nbytes, "", "s"));
}
}
static void decode_payload_rs_deallocated(tvbuff_t *tvb, proto_tree *tree, drbd_conv *conv_data _U_)
@ -1261,6 +1402,9 @@ static void decode_payload_twopc_reply(tvbuff_t *tvb, proto_tree *tree, drbd_con
&key.initiator_node_id);
proto_tree_add_item(tree, hf_drbd_reachable_nodes, tvb, 8, 8, ENC_BIG_ENDIAN);
if (!conv_data)
return;
drbd_twopc_val *val = wmem_map_lookup(conv_data->twopc, &key);
if (!val)
return;
@ -1397,6 +1541,9 @@ void proto_register_drbd(void)
{ &hf_drbd_offset, { "offset", "drbd.offset", FT_UINT32, BASE_DEC, NULL, 0x0, NULL, HFILL }},
{ &hf_drbd_dagtag, { "dagtag", "drbd.dagtag", FT_UINT64, BASE_DEC_HEX, NULL, 0x0, NULL, HFILL }},
{ &hf_drbd_dagtag_node_id, { "dagtag_node_id", "drbd.dagtag_node_id", FT_INT32, BASE_DEC, NULL, 0x0, NULL, HFILL }},
{ &hf_drbd_new_rx_descs_data, { "New descriptors received (data)", "drbd.new_rx_descs_data", FT_INT32, BASE_DEC, NULL, 0x0, NULL, HFILL }},
{ &hf_drbd_new_rx_descs_control, { "New descriptors received (control)", "drbd.new_rx_descs_control", FT_INT32, BASE_DEC, NULL, 0x0, NULL, HFILL }},
{ &hf_drbd_rx_desc_stolen_from, { "Descriptor stolen from", "drbd.rx_desc_stolen_from", FT_INT32, BASE_DEC, VALS(stream_names), 0x0, NULL, HFILL }},
{ &hf_drbd_state_role, { "role", "drbd.state.role", FT_UINT32, BASE_DEC, VALS(role_names), STATE_ROLE, NULL, HFILL }},
{ &hf_drbd_state_peer, { "peer", "drbd.state.peer", FT_UINT32, BASE_DEC, VALS(role_names), STATE_PEER, NULL, HFILL }},
@ -1453,6 +1600,7 @@ void proto_reg_handoff_drbd(void)
{
drbd_handle = create_dissector_handle(dissect_drbd, proto_drbd);
heur_dissector_add("tcp", test_drbd_protocol, "DRBD over TCP", "drbd_tcp", proto_drbd, HEURISTIC_DISABLE);
heur_dissector_add("infiniband.payload", dissect_drbd_ib, "DRBD over RDMA", "drbd_rdma", proto_drbd, HEURISTIC_DISABLE);
}
/*