我正在编写一个小内核模块,用于测量网络数据包退出节点所用的时间.
该模块是netfilter库中的一个钩子.
对于它接收的每个数据包,它会计算一个哈希值,从skbuff获取tstamp和实际时间戳,并将所有这些数据保存在链表中.
为了将这些数据传递给用户空间,我创建了一个proc设备,当用户从设备中读取时,我发送了一个链表列表.
要对列表进行更改(读取和写入),我有一个自旋锁.问题是有时当我处理数据包时从proc设备读取系统崩溃.
我认为问题出在函数“dump_data_to_proc”中,更具体地说是在尝试获取自旋锁时.我做了一些测试,它只在cplink路由器中运行时崩溃(softlockup).当我在“普通”PC(单核)中运行模块时,它不会崩溃,
#include <linux/module.h> /* Needed by all modules */
#include <linux/kernel.h> /* Needed for KERN_INFO */
#include <linux/init.h> /* Needed for the macros */
#include <linux/skbuff.h>
#include <linux/netfilter.h>
#include <linux/netfilter_ipv4.h>
#include <linux/ip.h>
#include <linux/spinlock.h>
#include <net/ipv6.h>
#include <linux/proc_fs.h> /* Necessary because of proc fs */
#include <asm/uaccess.h> /* for copy_from_user */
#include "kmodule_measure_process_time.h"
#include "hash.c"
//DEBUG >=5 is very slow in the tplink
#define DEBUG 2
#define PROCFS_MAX_SIZE 64
#define PROCFS_NAME "measures"
#define MAXIMUM_SAMPLES 10000
static struct nf_hook_ops nfho;
unsigned int total_packets_processed= 0;
unsigned int total_packets_discarded=0;
int temp_counter=0;
struct values_list *HEAD;
spinlock_t list_lock ;
static int hello_proc(struct seq_file *m, void *v) {
seq_printf(m, " stats Mod initialized.\n");
return 0;
}
static int proc_open(struct inode *inode, struct file *file) {
return single_open(file, hello_proc, NULL);
}
ssize_t dump_data_to_proc(struct file *filp, char *buffer, size_t length, loff_t *offset){
int bytesRead = 0;
struct values_list *temp=NULL;
int bytesError=0;
char buff[PROCFS_MAX_SIZE];
spin_lock(&list_lock);
temp=HEAD;
if(temp!=NULL){
HEAD = temp->next;
}
spin_unlock(&list_lock);
if(temp!=NULL){
bytesRead = snprintf(buff, PROCFS_MAX_SIZE ,"%u|%llu|%llu\n", temp->hash,temp->arrival_timestap, temp->exit_timestap);
length = length - bytesRead+1;
kfree(temp);
temp_counter--;
}
bytesError= copy_to_user(buffer, buff, bytesRead);
if(bytesError!=0){
#if DEBUG >0
printk(KERN_INFO "Error: failed to copy to user");
#endif
}
return bytesRead;
}
static const struct file_operations proc_fops = {
.owner = THIS_MODULE,
.open = proc_open,
.read = dump_data_to_proc,
.llseek = seq_lseek,
.release = single_release,
};
static unsigned int hook_func(unsigned int hooknum, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *))
{
uint32_t hash=0;
ktime_t now_timeval;
struct timespec now;
u64 timestamp_arrival_time=0;
u64 timestamp_now=0;
struct ipv6hdr * ipheader;
struct values_list *node;
int number_of_samples=0;
spin_lock(&list_lock);
number_of_samples=temp_counter;
spin_unlock(&list_lock);
if(number_of_samples > MAXIMUM_SAMPLES){
#if DEBUG > 5
printk(KERN_INFO "Discarded one sample because the list is full.\n");
#endif
total_packets_discarded++; // probably this should be inside a spinlock
return NF_ACCEPT;
}
//calculate arrival time and actual time in ns
timestamp_arrival_time = ktime_to_ns(skb->tstamp);
getnstimeofday(&now);
now_timeval = timespec_to_ktime(now);
timestamp_now = ktime_to_ns(now_timeval);
//get Ipv6 addresses
ipheader = (struct ipv6hdr *)skb_network_header(skb);
hash=simple_hash((char *)&ipheader->saddr,sizeof(struct in6_addr)*2,hash);
total_packets_processed++;
node = (struct values_list *) kmalloc(sizeof(struct values_list),GFP_ATOMIC);
if(!node){
#if DEBUG >0
printk(KERN_INFO "Error cannot malloc\n");
#endif
return NF_ACCEPT;
}
node->hash=hash;
node->arrival_timestap=timestamp_arrival_time;
node->exit_timestap=timestamp_now;
spin_lock(&list_lock);
node->next=HEAD;
HEAD=node;
temp_counter++;
spin_unlock(&list_lock);
return NF_ACCEPT;
}
static int __init init_main(void)
{
nfho.hook = hook_func;
nfho.hooknum = NF_INET_POST_ROUTING;
nfho.pf = PF_INET6;
nfho.priority = NF_IP_PRI_FIRST;
nf_register_hook(&nfho);
#if DEBUG >0
printk(KERN_INFO " kernel module: Successfully inserted protocol module into kernel.\n");
#endif
proc_create(PROCFS_NAME, 0, NULL, &proc_fops);
spin_lock_init(&list_lock);
//Some distros/devices disable timestamping of packets
net_enable_timestamp();
return 0;
}
static void __exit cleanup_main(void)
{
struct values_list *temp;
nf_unregister_hook(&nfho);
#if DEBUG >0
printk(KERN_INFO " kernel module: Successfully unloaded protocol module.\n");
printk(KERN_INFO "Number of packets processed:%d\n",total_packets_processed);
printk(KERN_INFO "Number of packets discarded:%d\n",total_packets_discarded);
#endif
remove_proc_entry(PROCFS_NAME, NULL);
while(HEAD!=NULL){
temp=HEAD;
HEAD= HEAD->next;
kfree(temp);
}
}
module_init(init_main);
module_exit(cleanup_main);
/* * Declaring code as GPL. */
MODULE_LICENSE("GPLv3");
MODULE_AUTHOR(DRIVER_AUTHOR);
MODULE_DESCRIPTION(DRIVER_DESC);
最佳答案 您的代码有2个问题:
>使用Linux内核宏代码. http://makelinux.com/ldd3/chp-11-sect-5只需将struct list_head作为元素添加到struct values_list中,并使用list_entry,list_add等
> Netfilter hools在softirq上下文中运行,因此您必须使用spin_lock_irqsave和spin_unlock_irqrestore.这很可能是您的系统因软件锁定而崩溃的原因.仔细阅读http://makelinux.com/ldd3/chp-5-sect-5