同步管理

实验:互斥锁

linux 内核中的 mutex 互斥锁是一种同步机制,和我们之前了解的应用层中的同步概念是完全一样。相比于自旋锁,互斥锁对 CPU 的占用更少,其在占用状态会进入睡眠状态,直到锁被释放。

我们来了解一下互斥锁这套 API:初始化使用 mutex_init 函数;加锁使用 mutex_lock 函数;尝试加锁使用 mutex_trylock 函数;解锁使用 mutex_unlock 函数。

在之前的虚拟 FIFO 设备驱动中,我们没有考虑到多进程访问设备的情况,现在使用互斥锁对需要的资源进行保护。

代码清单 1 互斥锁
  1. #include <linux/module.h>
  2. #include <linux/fs.h>
  3. #include <linux/uaccess.h>
  4. #include <linux/init.h>
  5. #include <linux/miscdevice.h>
  6. #include <linux/device.h>
  7. #include <linux/slab.h>
  8. #include <linux/kfifo.h>
  9. #include <linux/wait.h>
  10. #include <linux/sched.h>
  11. #include <linux/cdev.h>
  12. #include <linux/poll.h>
  13.  
  14. #define DEMO_NAME "mydemo_dev"
  15. #define MYDEMO_FIFO_SIZE 64
  16.  
  17. static dev_t dev;
  18. static struct cdev* demo_cdev;
  19. static struct class* mydemo_class;
  20.  
  21. struct mydemo_device
  22. {
  23.     char name[64];
  24.     struct device* dev;
  25.     wait_queue_head_t read_queue;
  26.     wait_queue_head_t write_queue;
  27.     struct kfifo mydemo_fifo;
  28.     struct fasync_struct* fasync;
  29.     struct mutex lock;
  30. };
  31.  
  32. struct mydemo_private_data
  33. {
  34.     struct mydemo_device* device;
  35.     char name[64];
  36. };
  37.  
  38. #define MYDEMO_MAX_DEVICES 8
  39. static struct mydemo_device* mydemo_device[MYDEMO_MAX_DEVICES];
  40.  
  41. static int demodrv_open(struct inode* inode, struct file* file)
  42. {
  43.     unsigned int minor = iminor(inode);
  44.     struct mydemo_private_data* data;
  45.     struct mydemo_device* device = mydemo_device[minor];
  46.  
  47.     dev_info(device->dev, "%s: major=%d, minor=%d, device=%s\n", __func__,
  48.         MAJOR(inode->i_rdev), MINOR(inode->i_rdev), device->name);
  49.  
  50.     data = kmalloc(sizeof(struct mydemo_private_data), GFP_KERNEL);
  51.     if (!data)
  52.         return -ENOMEM;
  53.  
  54.     sprintf(data->name, "private_data_%d", minor);
  55.  
  56.     data->device = device;
  57.     file->private_data = data;
  58. }
  59.  
  60. static int demodrv_release(struct inode* inode, struct file* file)
  61. {
  62.     struct mydemo_private_data* data = file->private_data;
  63.    
  64.     kfree(data);
  65.  
  66.     return 0;
  67. }
  68.  
  69. static ssize_t demodrv_read(struct file* file, char __user* buf, size_t count, loff_t* ppos)
  70. {
  71.     struct mydemo_private_data* data = file->private_data;
  72.     struct mydemo_device* device = data->device;
  73.     int actual_readed;
  74.     int ret;
  75.  
  76.     if (kfifo_is_empty(&device->mydemo_fifo))
  77.     {
  78.         if (file->f_flags & O_NONBLOCK)
  79.             return -EAGAIN;
  80.  
  81.         dev_info(device->dev, "%s:%s pid=%d, going to sleep, %s\n", __func__,
  82.             device->name, current->pid, data->name);
  83.         ret = wait_event_interruptible(device->read_queue, !kfifo_is_empty(&device->mydemo_fifo));
  84.         if (ret)
  85.             return ret;
  86.     }
  87.  
  88.     mutex_lock(&device->lock);
  89.     ret = kfifo_to_user(&device->mydemo_fifo, buf, count, &actual_readed);
  90.     if (ret)
  91.         return -EIO;
  92.     mutex_unlock(&device->lock);
  93.  
  94.     if (!kfifo_is_full(&device->mydemo_fifo))
  95.     {
  96.         wake_up_interruptible(&device->write_queue);
  97.         kill_fasync(&device->fasync, SIGIO, POLL_OUT);
  98.     }
  99.  
  100.     dev_info(device->dev, "%s:%s, pid=%d, actual_readed=%d, pos=%lld\n", __func__,
  101.         device->name, current->pid, actual_readed, *ppos);
  102.     return actual_readed;
  103. }
  104.  
  105. static ssize_t demodrv_write(struct file* file, const char __user* buf, size_t count, loff_t* ppos)
  106. {
  107.     struct mydemo_private_data* data = file->private_data;
  108.     struct mydemo_device* device = data->device;
  109.  
  110.     unsigned int actual_write;
  111.     int ret;
  112.  
  113.     if (kfifo_is_full(&device->mydemo_fifo))
  114.     {
  115.         if (file->f_flags & O_NONBLOCK)
  116.             return -EAGAIN;
  117.  
  118.         dev_info(device->dev, "%s:%s pid=%d, going to sleep\n", __func__,
  119.             device->name, current->pid);
  120.         ret = wait_event_interruptible(device->write_queue, !kfifo_is_full(&device->mydemo_fifo));
  121.         if (ret)
  122.             return ret;
  123.     }
  124.  
  125.     mutex_lock(&device->lock);
  126.  
  127.     ret = kfifo_from_user(&device->mydemo_fifo, buf, count, &actual_write);
  128.     if (ret)
  129.         return -EIO;
  130.     mutex_unlock(&device->lock);
  131.  
  132.     if (!kfifo_is_empty(&device->mydemo_fifo))
  133.     {
  134.         wake_up_interruptible(&device->read_queue);
  135.         kill_fasync(&device->fasync, SIGIO, POLL_IN);
  136.         printk("%s kill fasync\n", __func__);
  137.     }
  138.  
  139.     dev_info(device->dev, "%s:%s pid=%d, actual_write=%d, ppos=%lld, ret=%d\n", __func__,
  140.         device->name, current->pid, actual_write, *ppos, ret);
  141.  
  142.     return actual_write;
  143. }
  144.  
  145. static unsigned int demodrv_poll(struct file* file, poll_table* wait)
  146. {
  147.     int mask = 0;
  148.     struct mydemo_private_data* data = file->private_data;
  149.     struct mydemo_device* device = data->device;
  150.  
  151.     mutex_lock(&device->lock);
  152.  
  153.     poll_wait(file, &device->read_queue, wait);
  154.     poll_wait(file, &device->write_queue, wait);
  155.  
  156.     if (!kfifo_is_empty(&device->mydemo_fifo))
  157.         mask |= POLLIN | POLLRDNORM;
  158.     if (!kfifo_is_full(&device->mydemo_fifo))
  159.         maks |= POLLOUT | POLLWRNORM;
  160.  
  161.     mutex_unlock(&device->lock);
  162.  
  163.     return mask;
  164. }
  165.  
  166. static int demodrv_fasync(int fd, struct file* file, int on)
  167. {
  168.     struct mydemo_private_data* data = file->private_data;
  169.     struct mydemo_device* device = data->device;
  170.     int ret;
  171.  
  172.     mutex_lock(&device->lock);
  173.  
  174.     dev_info(device->dev, "%s send SIGIO\n", __func__);
  175.     ret = fasync_helper(fd, file, on, &device->fasync);
  176.     mutex_unlock(&device->lock);
  177.  
  178.     return ret;
  179. }
  180.  
  181. static const struct file_operations demodrv_fops =
  182. {
  183.     .owner = THIS_MODULE,
  184.     .open = demodrv_open,
  185.     .release = demodrv_release,
  186.     .read = demodrv_read,
  187.     .write = demodrv_write,
  188.     .poll = demodrv_poll,
  189.     .fasync = demodrv_fasync,
  190. };
  191.  
  192. static int __init simple_char_init()
  193. {
  194.     int ret;
  195.     int i;
  196.     struct mydemo_device* device;
  197.  
  198.     ret = alloc_chrdev_region(&dev, 0, MYDEMO_MAX_DEVICES, DEMO_NAME);
  199.     if (ret)
  200.     {
  201.         printk("failed to allocate char device region\n");
  202.         return ret;
  203.     }
  204.  
  205.     demo_cdev = cdev_alloc();
  206.     if (!demo_cdev)
  207.     {
  208.         printk("cdev_alloc failed\n");
  209.         goto unregister_chrdev;
  210.     }
  211.  
  212.     cdev_init(demo_cdev, &demodrv_fops);
  213.  
  214.     ret = cdev_add(demo_cdev, dev, MYDEMO_MAX_DEVICES);
  215.     if (ret)
  216.     {
  217.         printk("cdev_add failed\n");
  218.         goto cdev_fail;
  219.     }
  220.  
  221.     mydemo_class = class_create(THIS_MODULE, "my_class");
  222.  
  223.     for (i = 0; i < MYDEMO_MAX_DEVICES; i++)
  224.     {
  225.         device = kzalloc(sizeof(struct mydemo_device), GFP_KERNEL);
  226.         if (!device)
  227.         {
  228.             ret = -ENOMEM;
  229.             goto free_device;
  230.         }
  231.  
  232.         sprintf(device->name, "%s%d", DEMO_NAME, i);
  233.         mutex_init(&device->lock);
  234.  
  235.         device->dev = device_create(mydemo_class, NULL, MKDEV(dev, i), NULL, "mydemo:%d:%d", MAJOR(dev), i);
  236.         dev_info(device->dev, "create device: %d:%d\n", MAJOR(dev), MINOR(i));
  237.         mydemo_device[i] = device;
  238.         init_waitqueue_head(&device->read_queue);
  239.         init_waitqueue_head(&device->write_queue);
  240.  
  241.         ret = kfifo_alloc(&device->mydemo_fifo, MYDEMO_FIFO_SIZE, GFP_KERNEL);
  242.         if (ret)
  243.         {
  244.             ret = -ENOMEM;
  245.             goto free_kfifo;
  246.         }
  247.  
  248.         printk("mydemo_fifo=%p\n", &device->mydemo_fifo);
  249.     }
  250.  
  251.     printk("succeeded register char device: %s\n", DEMO_NAME);
  252.  
  253.     return 0;
  254.  
  255. free_kfifo:
  256.     for (i = 0; i < MYDEMO_MAX_DEVICES; i++)
  257.         if (&device->mydemo_fifo)
  258.             kfifo_free(&device->mydemo_fifo);
  259.  
  260. free_device:
  261.     for (i = 0; i < MYDEMO_MAX_DEVICES; i++)
  262.         if (mydemo_device[i])
  263.             kfree(mydemo_device[i]);
  264.  
  265. cdev_fail:
  266.     cdev_del(demo_cdev);
  267.  
  268. unregister_chrdev:
  269.     unregister_chrdev_region(dev, MYDEMO_MAX_DEVICES);
  270.  
  271.     return ret;
  272. }
  273.  
  274. static void __exit simple_char_exit()
  275. {
  276.     int i;
  277.     printk("removing device\n");
  278.  
  279.     if (demo_cdev)
  280.         cdev_del(demo_cdev);
  281.  
  282.     unregister_chrdev_region(dev, MYDEMO_MAX_DEVICES);
  283.  
  284.     for (i = 0; i < MYDEMO_MAX_DEVICES; i++)
  285.     {
  286.         if (mydemo_device[i])
  287.         {
  288.             device_destroy(mydemo_class, MKDEV(dev, i));
  289.             kfree(mydemo_device[i]);
  290.         }
  291.     }
  292.     class_destroy(mydemo_class);
  293. }
  294.  
  295. module_init(simple_char_init);
  296. module_exit(simple_char_exit);
  297.  
  298. MODULE_AUTHOR("rlk");
  299. MODULE_LICENSE("GPL v2");
  300. MODULE_DESCRIPTION("simpe character device");

代码清单 1 有点长,我们主要特意关注使用互斥锁同步的地方。demodrv_read 和 demodrv_write 中,我们对 FIFO 的读写操作进行同步,确保只有一个进程使用;demodrv_poll 中,我们对读写等待队列和 FIFO 状态读取进行同步;demodrv_fasync 中,我们对异步通知列表进行同步。

fasync 机制回过头来看有点陌生,重新温习一下。

.fasync 接口维护异步通知列表;kill_fasync 进行通知。

实验:RCU 锁

参照上一个互斥锁的实验,如果现在是读取密集型的场景。读写之间是必须要同步的,但是多个读线程也受互斥锁同步控制,效率很低。

针对读取密集场景,linux 内核使用 RCU 锁提高性能。我们看一下 RCU 的基本原理:

Read: 多个读取者可以同时访问数据,允许并发无锁的读取数据。

Copy: 需要修改数据时,先创建一个数据的副本,在副本上先做修改。

Update: 当确定没有线程读取旧数据时,将副本作为新数据进行更新。

以上也就是 RCU 的缩写。Read 和 Copy 很好理解,主要是不明白 Update 中是如何确定没有线程读取旧数据的。此处我们先不做深究,只知道使用相关的标准 API 能达到这个目的就可以了。

MARK: 如何确定没有线程读取旧数据?

我们来了解 RCU 锁的相关 API:

rcu_read_lock:进入 RCU 读取临界区。

rcu_read_unlock:离开 RCU 读取临界区。

rcu_dereference:安全的读取 RCU 受保护的指针。

rcu_assign_pointer:安全的发布新的 RCU 受保护的指针。

synchronize_rcu:阻塞直到所有正在进行的 RCU 读取临界区结束。

call_rcu:synchronize_rcu 的异步版本,满足条件时执行指定的回调函数。

最后我们看 RCU 锁的示例代码。如代码清单 2 演示了使用 RCU 机制来同步 struct foo 结构体的访问,创建了读者线程和写者线程。

读者线程通过 rcu_read_lockrcu_read_unlockrcu_dereference 安全的读取共享数据。

写者线程使用 rcu_assign_pointer 发布新数据,同时使用 call_rcu 在适当时候释放旧数据。

代码清单 2 RCU 锁
  1. #include <linux/kernel.h>
  2. #include <linux/module.h>
  3. #include <linux/init.h>
  4. #include <linux/slab.h>
  5. #include <linux/spinlock.h>
  6. #include <linux/rcupdate.h>
  7. #include <linux/kthread.h>
  8. #include <linux/delay.h>
  9.  
  10. struct foo
  11. {
  12.     int a;
  13.     struct rcu_head rcu;
  14. };
  15.  
  16. static struct foo* g_ptr;
  17.  
  18. // 读者线程 1
  19. static int myrcu_reader_thread1(void* data)
  20. {
  21.     struct foo* p1 = NULL;
  22.  
  23.     while (1)
  24.     {
  25.         if (kthread_should_stop())
  26.             break;
  27.         msleep(20);
  28.         rcu_read_lock();
  29.         mdelay(200);
  30.         p1 = rcu_dereference(g_ptr);
  31.         if (p1)
  32.             printk("%s: read a=%d\n", __func__, p1->a);
  33.         rcu_read_unlock();
  34.     }
  35.  
  36.     return 0;
  37. }
  38.  
  39. // 读者线程 2
  40. static int myrcu_reader_thread2(void* data)
  41. {
  42.     struct foo* p2 = NULL;
  43.  
  44.     while (1)
  45.     {
  46.         if (kthread_should_stop())
  47.             break;
  48.         msleep(30);
  49.         rcu_read_lock();
  50.         mdelay(100);
  51.         p2 = rcu_dereference(g_ptr);
  52.         if (p2)
  53.             printk("%s: read a=%d\n", __func__, p2->a);
  54.         rcu_read_unlock();
  55.     }
  56.  
  57.     return 0;
  58. }
  59.  
  60. static void myrcu_del(struct rcu_head* rh)
  61. {
  62.     struct foo* p = container_of(rh, struct foo, rcu);
  63.     printk("%s: a=%d\n", __func__, p->a);
  64.     kfree(p);
  65. }
  66.  
  67. // 写者线程
  68. static int myrcu_writer_thread(void* p)
  69. {
  70.     struct foo* old;
  71.     struct foo* new_ptr;
  72.     int value = (unsigned long)p;
  73.  
  74.     while (1)
  75.     {
  76.         if (kthread_should_stop())
  77.             break;
  78.         msleep(250);
  79.         new_ptr = kmalloc(sizeof(struct foo), GFP_KERNEL);
  80.         old = g_ptr;
  81.         *new_ptr = *old;
  82.         new_ptr->a = value;
  83.         rcu_assign_pointer(g_ptr, new_ptr);
  84.         call_rcu(&old->rcu, myrcu_del);
  85.         printk("%s: write to new %d\n", __func__, value);
  86.         value++;
  87.     }
  88.  
  89.     return 0;
  90. }
  91.  
  92. static struct task_struct* reader_thread1;
  93. static struct task_struct* reader_thread2;
  94. static struct task_struct* writer_thread;
  95.  
  96. static int __init my_test_init(void)
  97. {
  98.     int value = 5;
  99.  
  100.     printk("figo: my module init\n");
  101.     g_ptr = kzalloc(sizeof(struct foo), GFP_KERNEL);
  102.  
  103.     reader_thread1 = kthread_run(myrcu_reader_thread1, NULL, "rcu_reader1");
  104.     reader_thread2 = kthread_run(myrcu_reader_thread2, NULL, "rcu_reader2");
  105.     writer_thread = kthread_run(myrcu_writer_thread, (void*)(unsigned long)value, "rcu_write");
  106.  
  107.     return 0;
  108. }
  109.  
  110. static void __exit my_test_exit(void)
  111. {
  112.     printk("goodbye\n");
  113.     kthread_stop(reader_thread1);
  114.     kthread_stop(reader_thread2);
  115.     kthread_stop(writer_thread);
  116.     if (g_ptr)
  117.         kfree(g_ptr);
  118. }
  119.  
  120. MODULE_LICENSE("GPL");
  121. module_init(my_test_init);
  122. module_exit(my_test_exit);