If multipathing is enabled, each NVMe subsystem creates a head
namespace (e.g., nvme0n1) and multiple private namespaces
(e.g., nvme0c0n1 and nvme0c1n1) in sysfs. When creating links for
private namespaces, links of head namespace are used, so the
namespace creation order must be followed (e.g., nvme0n1 ->
nvme0c1n1). If the order is not followed, links of sysfs will be
incomplete or kernel panic will occur.
The kernel panic was:
kernel BUG at fs/sysfs/symlink.c:27!
Call Trace:
nvme_mpath_add_disk_links+0x5d/0x80 [nvme_core]
nvme_validate_ns+0x5c2/0x850 [nvme_core]
nvme_scan_work+0x1af/0x2d0 [nvme_core]
Correct order
Context A Context B
nvme0n1
nvme0c0n1 nvme0c1n1
Incorrect order
Context A Context B
nvme0c1n1
nvme0n1
nvme0c0n1
The nvme_mpath_add_disk (for creating head namespace) is called
just before the nvme_mpath_add_disk_links (for creating private
namespaces). In nvme_mpath_add_disk, the first context acquires
the lock of subsystem and creates a head namespace, and other
contexts do nothing by checking GENHD_FL_UP of a head namespace
after waiting to acquire the lock. We verified the code with or
without multipathing using three vendors of dual-port NVMe SSDs.
Signed-off-by: Baegjae Sung <[email protected]>
Reviewed-by: Christoph Hellwig <[email protected]>
Signed-off-by: Keith Busch <[email protected]>
}
static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid,
- struct nvme_id_ns *id, bool *new)
+ struct nvme_id_ns *id)
{
struct nvme_ctrl *ctrl = ns->ctrl;
bool is_shared = id->nmic & (1 << 0);
ret = PTR_ERR(head);
goto out_unlock;
}
-
- *new = true;
} else {
struct nvme_ns_ids ids;
ret = -EINVAL;
goto out_unlock;
}
-
- *new = false;
}
list_add_tail(&ns->siblings, &head->list);
struct nvme_id_ns *id;
char disk_name[DISK_NAME_LEN];
int node = dev_to_node(ctrl->dev), flags = GENHD_FL_EXT_DEVT;
- bool new = true;
ns = kzalloc_node(sizeof(*ns), GFP_KERNEL, node);
if (!ns)
if (id->ncap == 0)
goto out_free_id;
- if (nvme_init_ns_head(ns, nsid, id, &new))
+ if (nvme_init_ns_head(ns, nsid, id))
goto out_free_id;
nvme_setup_streams_ns(ctrl, ns);
pr_warn("%s: failed to register lightnvm sysfs group for identification\n",
ns->disk->disk_name);
- if (new)
- nvme_mpath_add_disk(ns->head);
+ nvme_mpath_add_disk(ns->head);
nvme_mpath_add_disk_links(ns);
return;
out_unlink_ns:
{
if (!head->disk)
return;
- device_add_disk(&head->subsys->dev, head->disk);
- if (sysfs_create_group(&disk_to_dev(head->disk)->kobj,
- &nvme_ns_id_attr_group))
- pr_warn("%s: failed to create sysfs group for identification\n",
- head->disk->disk_name);
+
+ mutex_lock(&head->subsys->lock);
+ if (!(head->disk->flags & GENHD_FL_UP)) {
+ device_add_disk(&head->subsys->dev, head->disk);
+ if (sysfs_create_group(&disk_to_dev(head->disk)->kobj,
+ &nvme_ns_id_attr_group))
+ pr_warn("%s: failed to create sysfs group for identification\n",
+ head->disk->disk_name);
+ }
+ mutex_unlock(&head->subsys->lock);
}
void nvme_mpath_add_disk_links(struct nvme_ns *ns)