了解netfilter钩子中的自旋锁

我正在编写一个小内核模块,用于测量网络数据包退出节点所用的时间.

该模块是netfilter库中的一个钩子.

对于它接收的每个数据包,它会计算一个哈希值,从skbuff获取tstamp和实际时间戳,并将所有这些数据保存在链表中.
为了将这些数据传递给用户空间,我创建了一个proc设备,当用户从设备中读取时,我发送了一个链表列表.

要对列表进行更改(读取和写入),我有一个自旋锁.问题是有时当我处理数据包时从proc设备读取系统崩溃.

我认为问题出在函数“dump_data_to_proc”中,更具体地说是在尝试获取自旋锁时.我做了一些测试,它只在cplink路由器中运行时崩溃(softlockup).当我在“普通”PC(单核)中运行模块时,它不会崩溃,

#include <linux/module.h>    /* Needed by all modules */ 
#include <linux/kernel.h>   /* Needed for KERN_INFO */ 
#include <linux/init.h>   /* Needed for the macros */ 
#include <linux/skbuff.h> 
#include <linux/netfilter.h> 
#include <linux/netfilter_ipv4.h> 
#include <linux/ip.h> 
#include <linux/spinlock.h> 

#include <net/ipv6.h>

#include <linux/proc_fs.h>  /* Necessary because of proc fs */
#include <asm/uaccess.h>    /* for copy_from_user */

#include "kmodule_measure_process_time.h" 
#include "hash.c" 

//DEBUG >=5 is very slow in the tplink
#define DEBUG 2 
#define PROCFS_MAX_SIZE     64
#define PROCFS_NAME         "measures"
#define MAXIMUM_SAMPLES     10000


static struct nf_hook_ops nfho;
unsigned int total_packets_processed= 0;
unsigned int total_packets_discarded=0;
int temp_counter=0;

struct values_list *HEAD;

spinlock_t list_lock  ;


static int hello_proc(struct seq_file *m, void *v) {
  seq_printf(m, " stats Mod initialized.\n");
  return 0;
}

static int proc_open(struct inode *inode, struct  file *file) {
  return single_open(file, hello_proc, NULL);
}



ssize_t dump_data_to_proc(struct file *filp, char  *buffer, size_t length, loff_t *offset){

  int bytesRead = 0;
  struct values_list *temp=NULL;
  int bytesError=0;
  char buff[PROCFS_MAX_SIZE];

  spin_lock(&list_lock);
  temp=HEAD;
  if(temp!=NULL){
    HEAD = temp->next;
}
    spin_unlock(&list_lock);


if(temp!=NULL){
    bytesRead = snprintf(buff, PROCFS_MAX_SIZE ,"%u|%llu|%llu\n", temp->hash,temp->arrival_timestap, temp->exit_timestap);
    length = length - bytesRead+1;
    kfree(temp);
    temp_counter--;
}

bytesError= copy_to_user(buffer, buff, bytesRead);

if(bytesError!=0){
#if DEBUG >0
  printk(KERN_INFO "Error: failed to copy to user");
#endif
}
return bytesRead;
}


static const struct file_operations proc_fops = {
  .owner = THIS_MODULE,
  .open = proc_open,
  .read = dump_data_to_proc,
  .llseek = seq_lseek,
  .release = single_release,
};


static unsigned int hook_func(unsigned int hooknum, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *))
{   

    uint32_t hash=0;
    ktime_t now_timeval;
    struct timespec now;
    u64 timestamp_arrival_time=0;
    u64 timestamp_now=0;
    struct ipv6hdr * ipheader;
    struct values_list *node;
    int number_of_samples=0;

    spin_lock(&list_lock);   
    number_of_samples=temp_counter;        
    spin_unlock(&list_lock);           

    if(number_of_samples > MAXIMUM_SAMPLES){
        #if DEBUG > 5
        printk(KERN_INFO "Discarded one sample because the list is full.\n");
        #endif
        total_packets_discarded++; // probably this should be inside a spinlock
        return NF_ACCEPT;
    }

    //calculate arrival time and actual time in ns
    timestamp_arrival_time =  ktime_to_ns(skb->tstamp);
    getnstimeofday(&now);
    now_timeval = timespec_to_ktime(now);
    timestamp_now =  ktime_to_ns(now_timeval);

    //get Ipv6 addresses
    ipheader = (struct ipv6hdr *)skb_network_header(skb);

    hash=simple_hash((char *)&ipheader->saddr,sizeof(struct in6_addr)*2,hash);
    total_packets_processed++;


    node = (struct values_list *) kmalloc(sizeof(struct values_list),GFP_ATOMIC);
    if(!node){
        #if DEBUG >0
        printk(KERN_INFO "Error cannot malloc\n");
        #endif
        return NF_ACCEPT;
    }

    node->hash=hash;
    node->arrival_timestap=timestamp_arrival_time;
    node->exit_timestap=timestamp_now;

    spin_lock(&list_lock);           
    node->next=HEAD;
    HEAD=node;
    temp_counter++;
    spin_unlock(&list_lock); 

    return NF_ACCEPT;

}

static int __init init_main(void)
{
    nfho.hook = hook_func;
    nfho.hooknum = NF_INET_POST_ROUTING;
    nfho.pf = PF_INET6;
    nfho.priority = NF_IP_PRI_FIRST;
    nf_register_hook(&nfho);
#if DEBUG >0
    printk(KERN_INFO " kernel module: Successfully inserted protocol module into kernel.\n");
#endif 

    proc_create(PROCFS_NAME, 0, NULL, &proc_fops);

    spin_lock_init(&list_lock);

    //Some distros/devices disable timestamping of packets
    net_enable_timestamp(); 

    return 0;

}


static void __exit cleanup_main(void)
{

   struct values_list *temp;

    nf_unregister_hook(&nfho);
#if DEBUG >0
    printk(KERN_INFO " kernel module: Successfully unloaded protocol module.\n");
    printk(KERN_INFO "Number of packets processed:%d\n",total_packets_processed);
    printk(KERN_INFO "Number of packets discarded:%d\n",total_packets_discarded);
#endif

    remove_proc_entry(PROCFS_NAME, NULL);

    while(HEAD!=NULL){
        temp=HEAD;
        HEAD= HEAD->next;
        kfree(temp);
    }


}


module_init(init_main);
module_exit(cleanup_main);
/* *    Declaring code as GPL. */ 
MODULE_LICENSE("GPLv3");
MODULE_AUTHOR(DRIVER_AUTHOR);
MODULE_DESCRIPTION(DRIVER_DESC);

最佳答案 您的代码有2个问题:

>使用Linux内核宏代码. http://makelinux.com/ldd3/chp-11-sect-5只需将struct list_head作为元素添加到struct values_list中,并使用list_entry,list_add等
> Netfilter hools在softirq上下文中运行,因此您必须使用spin_lock_irqsave和spin_unlock_irqrestore.这很可能是您的系统因软件锁定而崩溃的原因.仔细阅读http://makelinux.com/ldd3/chp-5-sect-5

点赞