内核块设备 – 使用自旋锁落入死锁

我刚刚实现了一个虚拟块设备,我想用它来探索
linux内核如何处理块设备.

我的设备只是一个内存区域,分为两个512字节扇区.

我正在使用全局结构来存储设备信息:

typedef struct
{
    uint32_t hard_sector_size;        // Size of a device sector
    uint32_t sector_number;           // Number of sector on device
    uint32_t size;                    // Total size of virtual device in bytes
    uint8_t* data;                    // Device memory buffer
    spinlock_t device_lock;           // Device structure access spinlock
    struct request_queue *queue;      // Device request queue
    struct gendisk *gendisk;          // Device "disk" representation
    int major;                        // Device major number attributed by kernel
    int minor;                        // Device minor number fixed at initialization
    uint32_t r_users;                 // Number of read access
    uint32_t w_users;                 // Number of write access
}blk_mod_t;

blk_mod_t self;

[...]

现在我想保护这个结构免受并发访问.为此,我正在使用device_lock字段.
如果锁定,结构正在更新,所以我应该等到完成.
如果没有,我可以访问结构字段.

现在我只将这个螺旋锁用于以下三个功能

static int block_mod_open(struct block_device *bdev, fmode_t mode)
{
    access_mode_t access_mode;
    DEBUG("Entering open function\n");

    if((mode & FMODE_READ) && (mode & FMODE_WRITE))
    {
        NOTICE("Oppened in read/write mode\n");
        mode = ACCESS_RW;
    }
    else if(mode & FMODE_READ)
    {
        NOTICE("Oppened in read only mode\n");
        mode = ACCESS_RONLY;
    }
    else if(mode & FMODE_WRITE)
    {
        NOTICE("Oppened in write only mode\n");
        mode = ACCESS_WONLY;
    }

    DEBUG("<--\n");
    spin_lock(&self.device_lock);

    if(ACCESS_RW == access_mode)
    {
        self.r_users++;
        self.w_users++;
    }
    else if(ACCESS_RONLY == access_mode)
    {
        self.r_users++;
    }
    else
    {
        self.w_users++;
    }
    NOTICE("Read access: %d\tWrite access: %d\n", self.r_users, self.w_users);

    DEBUG("-->\n");
    spin_unlock(&self.device_lock);

    DEBUG("Exiting open function\n");
    return 0;
}

static void block_mod_release(struct gendisk *disk, fmode_t mode)
{
    access_mode_t access_mode;
    DEBUG("Entering release function\n");

    if((mode & FMODE_READ) && (mode & FMODE_WRITE))
    {
        NOTICE("Closed read/write mode\n");
        mode = ACCESS_RW;
    }
    else if(mode & FMODE_READ)
    {
        NOTICE("Closed read only mode\n");
        mode = ACCESS_RONLY;
    }
    else if(mode & FMODE_WRITE)
    {
        NOTICE("Closed write only mode\n");
        mode = ACCESS_WONLY;
    }

    DEBUG("<--\n");
    spin_lock(&self.device_lock);

    if(ACCESS_RW == access_mode)
    {
        self.r_users--;
        self.w_users--;
    }
    else if(ACCESS_RONLY == access_mode)
    {
        self.r_users--;
    }
    else
    {
        self.w_users--;
    }
    NOTICE("Read access: %d\tWrite access: %d\n", self.r_users, self.w_users);

    DEBUG("-->\n");
    spin_unlock(&self.device_lock);

    DEBUG("Exiting release function\n");
    return;
}

static void block_mod_transfer(unsigned long sector, unsigned long nsect, char *buffer, int write)
{
    unsigned long offset = sector*KERNEL_SECTOR_SIZE;
    unsigned long nbytes = nsect*KERNEL_SECTOR_SIZE;
    DEBUG("Entering transfer function\n");
    DEBUG("<--\n");
    spin_lock(&self.device_lock);

    if((offset + nbytes) > self.size) {
        WARNING("Beyond-end write (%ld %ld)\n", offset, nbytes);
        spin_unlock(&self.device_lock);
        return;
    }
    if(write)
    {
        NOTICE("Writing to device\n");
        memcpy(self.data + offset, buffer, nbytes);
    }
    else
    {
        NOTICE("Reading from device\n");
        memcpy(buffer, self.data + offset, nbytes);
    }

    DEBUG("-->\n");
    spin_unlock(&self.device_lock);
    DEBUG("Exiting transfer function\n");
}

我用以下函数处理请求

static void block_mod_request(struct request_queue *queue)
{
    DEBUG("Entering request function\n");
    struct request *request;

    while(NULL != (request = blk_fetch_request(queue)))
    {
        blk_mod_t *self = request->rq_disk->private_data;
        // Check if request is a filesystem request (i.e. moves block of data)
        if(REQ_TYPE_FS != request->cmd_type)
        {
            // Close request with unsuccessful status
            WARNING("Skip non-fs request\n");
            __blk_end_request_cur(request, -EIO);
            continue;
        }
        // Treat request
        block_mod_transfer(blk_rq_pos(request), blk_rq_cur_sectors(request), bio_data(request->bio), rq_data_dir(request));
        // Close request with successful status
        __blk_end_request_cur(request, 0);
    }

    DEBUG("Exiting request function\n");
    return;
}

加载模块时没有什么特别的事情发生.但是,如果我尝试从中读取,我陷入僵局,因为我的系统不再响应,我必须重新启动.

这是输出:

root@PC325:~# echo 8 > /proc/sys/kernel/printk
root@PC325:~# insmod block_mod.ko 
[   64.546791] block_mod: loading out-of-tree module taints kernel.
[   64.548197] block_mod: module license '(c) Test license' taints kernel.
[   64.549951] Disabling lock debugging due to kernel taint
[   64.552816] Inserting module 'blk_mod_test'
[   64.554085] Got major number : '254'
[   64.554940] Data allocated (size = 1024)
[   64.557378] Request queue initialized
[   64.558178] Sent hard sector size to request queue
[   64.559188] Gendisk allocated
[   64.559817] Gendisk filled
[   64.560416] Gendisk capacity set
[   64.563285] Gendisk added
root@PC325:~# [   64.565280] Entering open function
[   64.566035] Oppened in read only mode
[   64.566773] <--
[   64.567138] Read access: 1   Write access: 0
[   64.567977] -->
[   64.568342] Exiting open function
[   64.571080] Entering release function
[   64.571855] Closed read only mode
[   64.572531] <--
[   64.572924] Read access: 0   Write access: 0
[   64.573749] -->
[   64.574116] Exiting release function
root@PC325:~# cat /dev/blkmodtest 
[   78.488228] Entering open function
[   78.488988] Oppened in read only mode
[   78.489733] <--
[   78.490100] Read access: 1   Write access: 0
[   78.490925] -->
[   78.491290] Exiting open function
[   78.492026] Entering request function
[   78.492743] Entering transfer function
[   78.493469] <--
-------------- DEADLOCK HERE --------------

更新:添加init和exit函数

static int __init block_mod_init(void)
{
    char* message = "abcdefghijklmnopqrstuvwxyz";
    int i;

    INFO("Inserting module '%s'\n", MODULE_NAME);

    // Initialize driver data structure
    memset(&self, 0, sizeof(blk_mod_t));
    self.hard_sector_size = DEVICE_HARD_SECTOR_SIZE;
    self.sector_number = DEVICE_SECTOR_NUMBER;
    self.size = self.sector_number*self.hard_sector_size;
    self.minor = 1;

    // Get a major number from kernel
    if(0 > (self.major = register_blkdev(self.major, MODULE_NAME)))
    {
        ERROR("Unable to get major number for '%s'\n", MODULE_NAME);
        unregister_blkdev(self.major, MODULE_NAME);
        return -1;
    }
    DEBUG("Got major number : '%d'\n", self.major);

    // Allocate data space
    if(NULL == (self.data = vmalloc(self.size)))
    {
        ERROR("Unable to allocate memory for '%s'\n", MODULE_NAME);
        unregister_blkdev(self.major, MODULE_NAME);
        return -2;
    }
    for(i=0;i<self.size;i++)
    {
        self.data[i] = message[i%strlen(message)];
    }
    spin_lock_init(&self.device_lock);
    DEBUG("Data allocated (size = %d)\n", self.size);

    // Allocate the request queue
    if(NULL == (self.queue = blk_init_queue(block_mod_request, &self.device_lock)))
    {
        ERROR("Unable to initialize request queue for '%s'\n", MODULE_NAME);
        vfree(self.data);
        unregister_blkdev(self.major, MODULE_NAME);
        return -3;
    }
    DEBUG("Request queue initialized\n");

    // Send device hard sector size to request queue
    blk_queue_logical_block_size(self.queue, self.hard_sector_size);
    self.queue->queuedata = &self;
    DEBUG("Sent hard sector size to request queue\n");

    // Allocate the gendisk structure
    if(NULL == (self.gendisk = alloc_disk(self.minor)))
    {
        ERROR("Unable to initialize gendisk for '%s'\n", MODULE_NAME);
        blk_cleanup_queue(self.queue);
        vfree(self.data);
        unregister_blkdev(self.major, MODULE_NAME);
        return -4;
    }
    DEBUG("Gendisk allocated\n");

    // Fill gendisk structure
    self.gendisk->major = self.major;
    self.gendisk->first_minor = self.minor;
    self.gendisk->fops = &self_ops;
    self.gendisk->queue = self.queue;
    self.gendisk->private_data = &self;
    snprintf(self.gendisk->disk_name, 32, "blkmodtest");
    DEBUG("Gendisk filled\n");
    set_capacity(self.gendisk, self.sector_number*(self.hard_sector_size/KERNEL_SECTOR_SIZE));
    DEBUG("Gendisk capacity set\n");
    add_disk(self.gendisk);
    DEBUG("Gendisk added\n");

    return 0;
}

static void __exit block_mod_cleanup(void)
{
    del_gendisk(self.gendisk);
    put_disk(self.gendisk);
    blk_cleanup_queue(self.queue);
    vfree(self.data);
    unregister_blkdev(self.major, MODULE_NAME);

    INFO("Removing module '%s'\n", MODULE_NAME);
    return;
}

更新:添加宏和枚举定义

#define MODULE_NAME                    "blk_mod_test"
#define KERNEL_SECTOR_SIZE             512
#define DEVICE_HARD_SECTOR_SIZE        512
#define DEVICE_SECTOR_NUMBER           2

typedef enum
{
    ACCESS_RONLY = 0,
    ACCESS_WONLY = 1,
    ACCESS_RW    = 2,
}access_mode_t;

我不明白的是,在我尝试将它锁定到block_mod_transfer之前,已经释放了自旋锁(在block_mod_open结束时).

因此,我不明白为什么当自旋锁似乎可用时,内核陷入死锁.

为什么我在这种情况下陷入僵局?我究竟做错了什么?

最佳答案 感谢@ CraigEstey的评论,我终于发现问题来自于请求队列使用与我的设备结构相同的自旋锁初始化.

// Allocate the request queue
if(NULL == (self.queue = blk_init_queue(block_mod_request, &self.device_lock)))
{
    ERROR("Unable to initialize request queue for '%s'\n", MODULE_NAME);
    vfree(self.data);
    unregister_blkdev(self.major, MODULE_NAME);
    return -3;
}

因此,当调用请求队列的回调函数(即block_mod_request)时,自旋锁已经被保持并且我陷入死锁.

点赞