Index: linux-2.6.18-rc6/kernel/sched.c =================================================================== --- linux-2.6.18-rc6.orig/kernel/sched.c 2006-09-05 17:15:01.000000000 -0400 +++ linux-2.6.18-rc6/kernel/sched.c 2006-09-05 22:38:27.000000000 -0400 @@ -54,6 +54,8 @@ #include #include +#include + #include /* @@ -3383,6 +3385,7 @@ switch_tasks: clear_tsk_need_resched(prev); rcu_qsctr_inc(task_cpu(prev)); + logdev_record_switch(prev, next); update_cpu_clock(prev, rq, now); prev->sleep_avg -= run_time; Index: linux-2.6.18-rc6/include/linux/logdev.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.18-rc6/include/linux/logdev.h 2006-09-06 16:59:02.000000000 -0400 @@ -0,0 +1,192 @@ +/* + * Logdevice - A device used to record debuging information in the kernel. + * It uses a large memory ring buffer consisting of individual pages + * to keep down on hogging large sections. A user may then read the device + * to get debugging information out of it. Or if configured, this can + * be dumped to the network on a system crash. + * + * Copyright - 2005 - Steven Rostedt, Kihon Technologies, (rostedt at kihontech dot com) + */ +#ifndef _LOG_DEV_H +#define _LOG_DEV_H + +/* Random numbers out of my head used for MAGIC */ +#define LOGDEV_CUSTOM 0x1afb +#define LOGDEV_SWITCH_ID 0x2afc +#define LOGDEV_PKT_ID 0x42aa +#define LOGDEV_PRINT 0x4adb +#define LOGDEV_PRINT_TIME 0x4adc +#define LOGDEV_PRINT_TIME_FUNC 0x4add +#define LOGDEV_HDR_DONE 0xbbbb /* internal use only */ + +struct logdev_switch_struct { + unsigned long long t; + short pid_prev; + short pid_next; + short prev_len; + short next_len; + char prev_comm[0]; + char next_comm[0]; +}; + +struct logdev_pkt { + unsigned long long t; + short protocol; + short dir; + char packet[0]; +}; + +struct logdev_print { + char str[0]; +}; + +struct logdev_print_time { + unsigned long long t; + char str[0]; +}; + +struct logdev_print_time_func { + const char *file; + int line; + /* need to be after line, since we use this with print_time. */ + unsigned long long t; + char str[0]; +}; + +struct logdev_custom { + int id; + char data[0]; +}; + +struct logdev_header { + long counter; + int id; + int size; +}; + +struct logdev_item { + struct logdev_header hdr; + union { + struct logdev_switch_struct sw; + struct logdev_pkt pkt; + struct logdev_print print; + struct logdev_print_time print_time; + struct logdev_print_time_func print_time_func; + struct logdev_custom custom; + char data[0]; + } u; +}; + +#ifdef CONFIG_LOGDEV + +extern int in_logdump; + +/* + * Right now we only have one switch, but this may change + * in the future. + */ +extern unsigned long logdev_switches; +#define LOGDEV_SW_PRINT_ENABLED 0 +#define LOGDEV_SW_SWITCH_ENABLED 1 +#define LOGDEV_SW_BITS 2 + +#define LOGDEV_SW_ISSET(sw) (test_bit(sw, &logdev_switches)) +#define LOGDEV_SW_SET(sw) (set_bit(sw, &logdev_switches)) +#define LOGDEV_SW_CLEAR(sw) (clear_bit(sw, &logdev_switches)) + +#define logdev_print_ison() LOGDEV_SW_ISSET(LOGDEV_SW_PRINT_ENABLED) +#define logdev_print_on() LOGDEV_SW_SET(LOGDEV_SW_PRINT_ENABLED); +#define logdev_print_off() LOGDEV_SW_CLEAR(LOGDEV_SW_PRINT_ENABLED); + +#define logdev_switch_ison() LOGDEV_SW_ISSET(LOGDEV_SW_SWITCH_ENABLED) +#define logdev_switch_on() LOGDEV_SW_SET(LOGDEV_SW_SWITCH_ENABLED); +#define logdev_switch_off() LOGDEV_SW_CLEAR(LOGDEV_SW_SWITCH_ENABLED); + +typedef void (*logdev_callback_func)(struct logdev_header *hdr, + struct logdev_custom *custom, + void *rec); + +int logdev_print(const char *str, ...) + __attribute__ ((format (printf, 1, 2))); +int logdev_vprint(const char *str, va_list va); +int logdev_print_time(const char *str, ...) + __attribute__ ((format (printf, 1, 2))); +int logdev_print_time_func(const char *file, int line, const char *str, ...) + __attribute__ ((format (printf, 3, 4))); + +/* + * logdev_record is used for custom writes (saves of sprintf) + * use id and register a callback so that the logdump knows + * what to do when it sees this record. + */ +int logdev_record(int id, int size, const void *data); +void logdev_dump(void); +void logdev_record_switch(struct task_struct *prev, struct task_struct *next); + +#define LOGDEV(x,y...) logdev_##x(y) +#define LOGPRINTS(func,x...) do { if (logdev_print_ison()) LOGDEV(func,x); } while(0) + +/* + * Using the UPPER case here ignores the logdev_print_enabled flag + */ +#define LOGPRINT(x...) LOGDEV(print,x) +#define LOGTPRINT(x...) LOGDEV(print_time,x) +#define LOGTFPRINT(x...) LOGDEV(print_time_func,__FUNCTION__,__LINE__,x) + +/* + * Using these functions, will only log if logdev_print_enabled flag is set. + */ + +#define lprint(x...) LOGPRINTS(print,x) +#define ltprint(x...) LOGPRINTS(print_time,x) +#define lfprint(x...) LOGPRINTS(print_time_func,__FUNCTION__,__LINE__,x) + +/* + * lfnprint is identical to lfprint except that it adds a new line at the end. + */ +#define _lnprint(func,x,y...) func( x "%s\n", y) +#define lnprint(x...) _lnprint(lprint,x,"") +#define ltnprint(x...) _lnprint(ltprint,x,"") +#define lfnprint(x...) _lnprint(lfprint,x,"") + +struct logdev_callback { + struct list_head list; + int id; + logdev_callback_func func; +}; + +int logdev_register_callback(int custom_id, logdev_callback_func func); +int logdev_unregister_callback(int custom_id); + +int logdev_init(void); /* If we want to put this in main.c */ + +#else /* !LOGDEV */ +#define logdev_dump() do {} while(0) +#define logdev_record(id, size, data) do {} while(0) +#define logdev_record_switch(prev, next) do {} while(0) + +#define LOGPRINT(x...) do {} while(0) +#define LOGTPRINT(x...) do {} while(0) +#define LOGTFPRINT(x...) do {} while(0) + +#define logdev_print_ison() ( 0 ) +#define logdev_print_on() do {} while(0) +#define logdev_print_off() do {} while(0) + +#define logdev_switch_ison() ( 0 ) +#define logdev_switch_on() do {} while(0) +#define logdev_switch_off() do {} while(0) + +#define logdev_print(x...) do {} while(0) + +#define lprint(x...) do {} while(0) +#define ltprint(x...) do {} while(0) +#define lfprint(x...) do {} while(0) +#define ldnprint(x...) do {} while(0) + +#define logdev_register_callback(i,f) do {} while(0) +#define logdev_unregister_callback(i) do {} while(0) +#define in_logdump 0 +#endif /* LOGDEV */ + +#endif Index: linux-2.6.18-rc6/lib/Kconfig.debug =================================================================== --- linux-2.6.18-rc6.orig/lib/Kconfig.debug 2006-09-05 17:15:01.000000000 -0400 +++ linux-2.6.18-rc6/lib/Kconfig.debug 2006-09-05 23:10:40.000000000 -0400 @@ -355,6 +355,46 @@ config FORCED_INLINING become the default in the future, until then this option is there to test gcc for this. +config LOGDEV + bool "Enable logdev device" + depends on DEBUG_KERNEL + help + The logdev device stores data into the kernel that can be retrieved + later through a misc device (major 10). The minor number is + dynamic and is posted through /proc/logdev/minor. Utilities + to open and read the device can be found at + http://rostedt.homelinux.com/logdev + + This device allows for tracing lots of information in the kernel + when simply printk is too expensive. When the logdev is initialized, + it allocates a default of 1 meg of memory (in page size units). This + allows for saving data in a ring buffer without the need to allocate. + +config LOGDEV_PAGES + int "Number of pages to allocate for logdev device" + depends on LOGDEV + default 256 + help + The Logdev device allocates a number of pages for the sole + purpose of logging data. This is the number of pages that + the Logdev device should allocate upon loading / initializing. + +config LOGDEV_PRINT_ENABLED + bool "Default Logdev prints should be enabled on startup" + depends on LOGDEV + help + Enable this if you expect the LOGPRINT macros to be enabled + as soon as the logdev device is loaded. Otherwise you must + enable it with /proc/logdev/print + +config LOGDEV_SWITCH_ENABLED + bool "Default Logdev printing of context switches on startup" + depends on LOGDEV + help + Enable this if you expect the LOGSWITCH macros to be enabled + as soon as the logdev device is loaded. Otherwise you must + enable it with /proc/logdev/switch + config RCU_TORTURE_TEST tristate "torture tests for RCU" depends on DEBUG_KERNEL Index: linux-2.6.18-rc6/arch/i386/kernel/traps.c =================================================================== --- linux-2.6.18-rc6.orig/arch/i386/kernel/traps.c 2006-09-05 17:14:55.000000000 -0400 +++ linux-2.6.18-rc6/arch/i386/kernel/traps.c 2006-09-05 22:45:55.000000000 -0400 @@ -28,6 +28,7 @@ #include #include #include +#include #ifdef CONFIG_EISA #include @@ -690,6 +691,7 @@ void die_nmi (struct pt_regs *regs, cons printk(" on CPU%d, eip %08lx, registers:\n", smp_processor_id(), regs->eip); show_registers(regs); + logdev_dump(); printk(KERN_EMERG "console shuts up ...\n"); console_silent(); spin_unlock(&nmi_print_lock); Index: linux-2.6.18-rc6/drivers/char/logdev.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.18-rc6/drivers/char/logdev.c 2006-09-06 21:14:23.000000000 -0400 @@ -0,0 +1,2141 @@ +/* + * logdev.c + * + * Copyright (C) 2004 Steven Rostedt + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License (not later!) + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ +#undef CONFIG_CRITICAL_IRQSOFF_TIMING /* ignore this for this whole file! */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +static char *logdev_version = "0.3.3"; + +/* used for BUG(), since we can't add any headers in that file */ +int get_processor_id(void) { return smp_processor_id(); } +EXPORT_SYMBOL(get_processor_id); + +/* + * If need be, we can be placed really early before memory + * is locate, so if a flag is set, use bootmem otherwise use kmalloc. + * The earliest we can call logdev_init is just after smp_prepare_boot_cpu. + */ +#undef LOGDEV_USE_BOOTMEM + +#define LOGDEV_PROC_DIR "logdev" +#define LOGDEV_PROC_SW "switch" +#define LOGDEV_PROC_LEVEL "level" +#define LOGDEV_PROC_PRINT "print" +#define LOGDEV_PROC_MINOR "minor" +#define LOGDEV_PROC_SWITCHES "switches" +#define LOGDEV_PROC_ENTRY "entry" + +static void *logdev_malloc(int size) +{ +#ifndef LOGDEV_USE_BOOTMEM + return kmalloc(size, GFP_KERNEL); +#else + return alloc_bootmem(sizeof(int)); +#endif +} + +static void *logdev_get_free_pages(int order) +{ +#ifndef LOGDEV_USE_BOOTMEM + return (void*)__get_free_pages(GFP_KERNEL,order); +#else + return alloc_bootmem((1< 0 ? 1 : x < 0 ? -1 : 0; +} + +static atomic_t logdev_counter = ATOMIC_INIT(0); +#define logdev_counter_inc() atomic_inc_return(&logdev_counter); + +struct proc_dir_entry *logdev_proc_dir; + +/* + * Ring buffer structures + */ +struct logdev_entry { + unsigned int head; + unsigned int tail; + char *dat; +}; + +struct logdev_dev { + struct logdev_entry *entry; + int init; + int size; + int len; + int start; + int end; + int corrupted; + long minor; + struct proc_dir_entry *dir; + wait_queue_head_t wait; + spinlock_t lock; +}; + +/* + * If using bootmem is used, we need + */ +static DEFINE_PER_CPU(struct logdev_dev, logdev_dev); +#define get_logdev(cpu) &per_cpu(logdev_dev, cpu) + +#define LOGDEV_CPUS NR_CPUS + +#define LOGDEV_DEV_UNINITALIZED 0 +#define LOGDEV_DEV_RUNNING 1 +#define LOGDEV_DEV_SUSPENDED 2 + +#define dev_running(dev) ((dev)->init == LOGDEV_DEV_RUNNING) +#define dev_suspended(dev) ((dev)->init == LOGDEV_DEV_SUSPENDED) + +/* + * We don't support hotplug CPUS + */ +#define check_cpu(cpu) ({ \ + static int once = 1; \ + int x; \ + if (unlikely(x = (cpu >= LOGDEV_CPUS)) && once) { \ + once = 0; \ + printk("BUG %s:%d: cpu %d doesn't fit logdev cpus\n", \ + __FILE__, __LINE__, cpu); \ + } \ + x; \ + }) + +static int logdev_copy_from_dev(struct logdev_dev *dev, void *buf, + int size); +static int logdev_copy_to_dev(struct logdev_dev *dev, const void *dat, + int size); + +static DECLARE_MUTEX(user_sem); + +/* + * doesn't really need to be atomic, but helps that we don't need to + * write another proc interface function. + */ +static struct proc_dir_entry *logdev_proc_entry[LOGDEV_CPUS]; +static struct proc_dir_entry *logdev_proc_minor[LOGDEV_CPUS]; +static struct proc_dir_entry *logdev_proc_sw; +static struct proc_dir_entry *logdev_proc_print; +static struct proc_dir_entry *logdev_proc_level; + +static int logdev_copy_to_dev(struct logdev_dev *dev, const void *dat, + int size); + +static int option_logdev_print(char *opt) +{ + logdev_print_on(); + return 0; +} + +static int option_logdev_switch(char *opt) +{ + logdev_switch_on(); + return 0; +} + +__setup("logdevprint", option_logdev_print); +__setup("logdevswitch", option_logdev_switch); + +/* + * The following is to register call back functions to print out + * a custom record. + */ + +DEFINE_SPINLOCK(logdev_callbacks_lock); + +LIST_HEAD(logdev_callbacks); +EXPORT_SYMBOL_GPL(logdev_callbacks); + +int logdev_register_callback(int custom_id, logdev_callback_func func) +{ + struct list_head *p; + struct logdev_callback *cb; + unsigned long flags; + int ret = 0; + + cb = kmalloc(sizeof(*cb),GFP_KERNEL); + if (!cb) { + ret = -ENOMEM; + goto out; + } + + spin_lock_irqsave(&logdev_callbacks_lock, flags); + list_for_each(p,&logdev_callbacks) { + struct logdev_callback *c = list_entry(p, struct logdev_callback, list); + if (c->id == custom_id) { + spin_unlock_irqrestore(&logdev_callbacks_lock,flags); + kfree(cb); + ret = -EBUSY; + goto out; + } + } + + cb->id = custom_id; + cb->func = func; + list_add(&cb->list, &logdev_callbacks); + spin_unlock_irqrestore(&logdev_callbacks_lock, flags); + + out: + return ret; +} + +int logdev_unregister_callback(int custom_id) +{ + struct list_head *p; + struct logdev_callback *cb; + unsigned long flags; + int ret = 0; + + spin_lock_irqsave(&logdev_callbacks_lock, flags); + list_for_each(p,&logdev_callbacks) { + cb = list_entry(p, struct logdev_callback, list); + if (cb->id == custom_id) + break; + } + if (p == &logdev_callbacks) { + ret = -ENODEV; + spin_unlock_irqrestore(&logdev_callbacks_lock,flags); + goto out; + } + + list_del(&cb->list); + spin_unlock_irqrestore(&logdev_callbacks_lock, flags); + + kfree(cb); + + out: + return ret; +} + +/* + * User buffer is used to get data from userland, and this can sleep when + * copying. + */ +static char user_buffer[PAGE_SIZE]; + +/* + * We have a separate kernel buffer for each CPU. + * This buffer is used to copy snprintf data into the ring buffer. + */ +static char kern_buffer[LOGDEV_CPUS][PAGE_SIZE]; + +/* + * logdev_record_switch is used to track context switches. + * + * If the logdev_switch is not set, then this doesn't record. Thus allowing + * you to just record the context switches that are needed to record. + * Just use logdev_switch_on and logdev_switch_off to turn on this function. + * + * This is called from schedule, and interrupts should already be turned off. + */ +void logdev_record_switch(struct task_struct *prev, struct task_struct *next) +{ + struct logdev_dev *dev; + struct logdev_header hdr; + struct logdev_switch_struct rs; + int cpu = smp_processor_id(); + + if (!logdev_switch_ison()) + return; + + if (check_cpu(cpu)) + return; + + dev = get_logdev(cpu); + if (!dev_running(dev)) + return; + + WARN_ON(!irqs_disabled()); + + rs.t = sched_clock(); + rs.pid_prev = prev->pid; + rs.pid_next = next->pid; + rs.prev_len = strlen(prev->comm); + rs.next_len = strlen(next->comm); + + hdr.counter = logdev_counter_inc(); + hdr.id = LOGDEV_SWITCH_ID; + hdr.size = sizeof(hdr) + sizeof(rs) + rs.prev_len + rs.next_len; + + spin_lock(&dev->lock); + logdev_copy_to_dev(dev,&hdr,sizeof(hdr)); + logdev_copy_to_dev(dev,&rs,sizeof(rs)); + logdev_copy_to_dev(dev,prev->comm,rs.prev_len); + logdev_copy_to_dev(dev,next->comm,rs.next_len); + spin_unlock(&dev->lock); +} + +/* + * This routine may be placed in the network code if you want to see + * what packets are traveling through. You may use the direction to + * indicate if they are coming or going. + * direction = 0 : saddr ==> daddr + * 1 : saddr <== daddr + */ +void logdev_pkt(struct sk_buff *skb, int direction) +{ + struct logdev_dev *dev; + struct logdev_header hdr; + struct logdev_pkt rs; + struct tcphdr *th; + struct iphdr *iph; + unsigned long flags; + int cpu; + + local_irq_save(flags); + + cpu = smp_processor_id(); + + if (check_cpu(cpu)) + goto out; + + dev = get_logdev(cpu); + if (!dev_running(dev)) + goto out; + + hdr.id = LOGDEV_PKT_ID; + hdr.size = sizeof(hdr) + sizeof(rs); + hdr.counter = logdev_counter_inc(); + + rs.t = sched_clock(); + + iph = skb->nh.iph; + + rs.protocol = skb->protocol; + rs.dir = direction; + + if ((skb->protocol != __constant_htons(ETH_P_IP)) || + (skb->pkt_type != PACKET_HOST) || + (iph->protocol != IPPROTO_TCP)) { + /* Just copy the first 20 bytes of the packet */ + + hdr.size += 20; + + spin_lock(&dev->lock); + logdev_copy_to_dev(dev,&hdr,sizeof(hdr)); + logdev_copy_to_dev(dev,&rs,sizeof(rs)); + logdev_copy_to_dev(dev,iph,20); + spin_unlock(&dev->lock); + + } else { + int iplen; + int tcplen; + + iplen = iph->ihl<<2; + th = (struct tcphdr*)((void*)(skb->nh.iph)+(iplen)); + tcplen = th->doff<<2; + + hdr.size += iplen + tcplen; + + spin_lock(&dev->lock); + logdev_copy_to_dev(dev,&hdr,sizeof(hdr)); + logdev_copy_to_dev(dev,&rs,sizeof(rs)); + logdev_copy_to_dev(dev,iph,iplen+tcplen); + spin_unlock(&dev->lock); + } + + out: + local_irq_restore(flags); +} + +/* + * logdev_print acts like printk but it writes to the logdev device instead + * of a console. + */ +int logdev_vprint(const char *str, va_list va) +{ + struct logdev_dev *dev; + char *buffer; + int len=0; + struct logdev_header hdr; + struct logdev_print rs; + unsigned long flags; + int cpu; + + local_irq_save(flags); + + cpu = smp_processor_id(); + + if (check_cpu(cpu)) + goto out; + + dev = get_logdev(cpu); + if (!dev_running(dev)) + goto out; + + buffer = kern_buffer[cpu]; + len = vsnprintf(buffer, PAGE_SIZE, str, va); + + if (len >= PAGE_SIZE) { + buffer[PAGE_SIZE-1] = 0; + len = PAGE_SIZE; + } + + hdr.counter = logdev_counter_inc(); + hdr.id = LOGDEV_PRINT; + hdr.size = sizeof(hdr) + sizeof(rs) + len; + + spin_lock(&dev->lock); + logdev_copy_to_dev(dev, &hdr, sizeof(hdr)); + if (sizeof(rs)) + logdev_copy_to_dev(dev, &rs, sizeof(rs)); + logdev_copy_to_dev(dev, buffer, len); + spin_unlock(&dev->lock); + + out: + local_irq_restore(flags); + + return len; +} + +int logdev_print(const char *str, ...) +{ + va_list va; + struct logdev_dev *dev; + char *buffer; + int len=0; + struct logdev_header hdr; + struct logdev_print rs; + unsigned long flags; + int cpu; + + local_irq_save(flags); + + cpu = smp_processor_id(); + + if (check_cpu(cpu)) + goto out; + + dev = get_logdev(cpu); + if (!dev_running(dev)) + goto out; + + buffer = kern_buffer[cpu]; + + va_start(va,str); + len = vsnprintf(buffer, PAGE_SIZE, str, va); + va_end(va); + + if (len >= PAGE_SIZE) { + buffer[PAGE_SIZE-1] = 0; + len = PAGE_SIZE; + } + + hdr.counter = logdev_counter_inc(); + hdr.id = LOGDEV_PRINT; + hdr.size = sizeof(hdr) + sizeof(rs) + len; + + spin_lock(&dev->lock); + logdev_copy_to_dev(dev, &hdr, sizeof(hdr)); + if (sizeof(rs)) + logdev_copy_to_dev(dev, &rs, sizeof(rs)); + logdev_copy_to_dev(dev, buffer, len); + spin_unlock(&dev->lock); + + out: + local_irq_restore(flags); + + return len; +} + +/* + * logdev_print_time is the same as logdev_print but it attaches a timestamp to it. + * saves on doing it yourself. + */ +int logdev_print_time(const char *str, ...) +{ + struct logdev_dev *dev; + char *buffer; + va_list va; + int len = 0; + struct logdev_header hdr; + struct logdev_print_time rs; + unsigned long flags; + int cpu; + + local_irq_save(flags); + + cpu = smp_processor_id(); + + if (check_cpu(cpu)) + goto out; + + dev = get_logdev(cpu); + if (!dev_running(dev)) + goto out; + + buffer = kern_buffer[cpu]; + + va_start(va,str); + len = vsnprintf(buffer, PAGE_SIZE, str, va); + va_end(va); + + if (len >= PAGE_SIZE) { + buffer[PAGE_SIZE-1] = 0; + len = PAGE_SIZE; + } + + rs.t = sched_clock(); + + hdr.counter = logdev_counter_inc(); + hdr.id = LOGDEV_PRINT_TIME; + hdr.size = sizeof(hdr) + sizeof(rs) + len; + + spin_lock(&dev->lock); + logdev_copy_to_dev(dev, &hdr, sizeof(hdr)); + logdev_copy_to_dev(dev, &rs, sizeof(rs)); + logdev_copy_to_dev(dev, buffer, len); + spin_unlock(&dev->lock); + + out: + local_irq_restore(flags); + + return len; +} + +/* + * logdev_print_time_func quickly stores the time, function and line number. + * this is really only good for live runs since the function is just a pointer, + * so a user land process would need to have the System.map available. + */ +int logdev_print_time_func(const char *file, int line, const char *str, ...) +{ + struct logdev_dev *dev; + char *buffer; + va_list va; + int len = 0; + struct logdev_header hdr; + struct logdev_print_time_func rs; + unsigned long flags; + int cpu; + + local_irq_save(flags); + + cpu = smp_processor_id(); + + if (check_cpu(cpu)) + goto out; + + dev = get_logdev(cpu); + if (!dev_running(dev)) + goto out; + + buffer = kern_buffer[cpu]; + + va_start(va,str); + len = vsnprintf(buffer, PAGE_SIZE, str, va); + va_end(va); + + if (len >= PAGE_SIZE) { + buffer[PAGE_SIZE-1] = 0; + len = PAGE_SIZE; + } + + rs.t = sched_clock(); + rs.file = file; + rs.line = line; + + hdr.counter = logdev_counter_inc(); + hdr.id = LOGDEV_PRINT_TIME_FUNC; + hdr.size = sizeof(hdr) + sizeof(rs) + len; + + spin_lock(&dev->lock); + logdev_copy_to_dev(dev, &hdr, sizeof(hdr)); + logdev_copy_to_dev(dev, &rs, sizeof(rs)); + logdev_copy_to_dev(dev, buffer, len); + spin_unlock(&dev->lock); + + out: + local_irq_restore(flags); + + return len; +} + +/* + * If you feel like recording your own data, you can use logdev_record. + * just pass your own id, size and data. The size is the size of + * the data being passed and not the size actually being written to the device. + * That is already calculated. + * + * This record will be added as LOGDEV_CUSTOM and the given id will be the custom id. + */ +int logdev_record(int id, int size, const void *data) +{ + struct logdev_dev *dev; + struct logdev_header hdr; + struct logdev_custom rs; + unsigned long flags; + int cpu; + int ret = 0; + + local_irq_save(flags); + + cpu = smp_processor_id(); + + if (check_cpu(cpu)) + goto out; + + dev = get_logdev(cpu); + if (!dev_running(dev)) + goto out; + + hdr.counter = logdev_counter_inc(); + hdr.id = LOGDEV_CUSTOM; + hdr.size = sizeof(hdr) + sizeof(rs) + size; + + rs.id = id; + + spin_lock(&dev->lock); + logdev_copy_to_dev(dev, &hdr, sizeof(hdr)); + logdev_copy_to_dev(dev, &rs, sizeof(rs)); + ret = logdev_copy_to_dev(dev, data, size); + spin_unlock(&dev->lock); + +out: + local_irq_restore(flags); + + return ret; +} + +/* + * If you just want to write into the buffer using your own methods, then this + * is perfectly fine. Just pass in your data and the size of the data being + * passed in. You can read it out later with logdev_record_read. But you wont + * have the benefits of keeping integrity when the buffer overflows. + */ +int logdev_record_write(const char *data, int size) +{ + struct logdev_dev *dev; + int ret; + unsigned long flags; + int cpu; + + local_irq_save(flags); + + cpu = smp_processor_id(); + dev = get_logdev(cpu); + if (!dev_running(dev)) { + local_irq_restore(flags); + return 0; + } + + spin_lock(&dev->lock); + ret = logdev_copy_to_dev(dev, data, size); + spin_unlock(&dev->lock); + local_irq_restore(flags); + + return ret; +} + +/* + * logdev_record_read reads some data from the logdev device no matter what + * it was. + */ +int logdev_record_read(void *data, int size) +{ + struct logdev_dev *dev; + int ret; + unsigned long flags; + int cpu; + + local_irq_save(flags); + + cpu = smp_processor_id(); + dev = get_logdev(cpu); + if (!dev_running(dev)) { + local_irq_restore(flags); + return 0; + } + + spin_lock(&dev->lock); + ret = logdev_copy_from_dev(dev, data, size); + spin_unlock(&dev->lock); + local_irq_restore(flags); + + return ret; +} + +/* + * sched_clock isn't exported, so we export it ourselves. + */ + +/************************ User Land ******************************/ + +ssize_t logdev_write(struct file *filp, const char *buf, size_t count, loff_t *f_pos) +{ + int cpu = (long)filp->private_data; + struct logdev_dev *dev; + unsigned long flags; + + if (count > PAGE_SIZE) + count = PAGE_SIZE; + + if (down_interruptible(&user_sem)) + return -EINTR; + + dev = get_logdev(cpu); + if (!dev_running(dev)) + goto out; + + if (copy_from_user(user_buffer,buf,count)) { + count = -EFAULT; + goto out; + } + + spin_lock_irqsave(&dev->lock,flags); + count = logdev_copy_to_dev(dev,user_buffer,count); + spin_unlock_irqrestore(&dev->lock,flags); + + /* wake up those waiting for data */ + if (waitqueue_active(&dev->wait)) + wake_up_interruptible(&dev->wait); + +out: + up(&user_sem); + return count; +} + +ssize_t logdev_read(struct file *filp, char *buf, size_t count, loff_t *f_pos) +{ + int cpu = (long)filp->private_data; + struct logdev_dev *dev; + unsigned long flags; + + if (down_interruptible(&user_sem)) + return -EINTR; + + + dev = get_logdev(cpu); + + if (!dev_running(dev)) + goto out_up; + + spin_lock_irqsave(&dev->lock,flags); + + if (!dev->size) { + + /* TBD - FIXME */ +#if 1 + + count = 0; + goto out; +#endif + + if (filp->f_flags & O_NONBLOCK) { + count = -EAGAIN; + goto out; + } + + do { + DECLARE_WAITQUEUE(wait,current); + current->state = TASK_INTERRUPTIBLE; + add_wait_queue(&dev->wait,&wait); + spin_unlock_irqrestore(&dev->lock,flags); + schedule(); + spin_lock_irqsave(&dev->lock,flags); + remove_wait_queue(&dev->wait,&wait); + if (dev->size) + break; + if (signal_pending(current)) { + count = -ERESTARTSYS; + goto out; + } + } while(1); + } + + if (count > PAGE_SIZE) + count = PAGE_SIZE; + + + count = logdev_copy_from_dev(dev,user_buffer,count); + + out: + /* We can't be corrupted if we have no data */ + if (!dev->size) + dev->corrupted = 0; + spin_unlock_irqrestore(&dev->lock,flags); + + if (count > 0) { + /* Well if we fail here, we just lost the data read :-( */ + if (copy_to_user(buf,user_buffer,count)) + count = -EFAULT; + } + out_up: + up(&user_sem); + + return count; +} + +static int logdev_ioctl(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg) +{ + return -ENOTTY; +} + +static int logdev_close(struct inode *inode, struct file *filp) +{ +#if 0 + int cpu = (int)filp->private_data; +#endif + return 0; +} + + +static int logdev_open (struct inode *inode, struct file *filp) +{ + int minor = iminor(inode); + int i; + struct logdev_dev *dev; + + for (i=0; i < LOGDEV_CPUS; i++) { + dev = get_logdev(i); + if (dev->init && dev->minor == minor) + break; + } + if (i == LOGDEV_CPUS) + return -ENODEV; + + filp->private_data = (void*)(long)i; + + return 0; +} + +/** + * logdev_poll - poll file op for logdev + * @filp: the file + * @wait: poll table + * + * Poll implemention. + */ +static unsigned int +logdev_poll(struct file *filp, poll_table *wait) +{ + struct logdev_dev *dev = (struct logdev_dev*)filp->private_data; + unsigned int mask = 0; + + (void)dev; +#if 0 + if (filp->f_mode & FMODE_READ) { + poll_wait(filp, &app->read_wait, wait); + if (!empty_channel(dev)) + mask |= POLLIN | POLLRDNORM; + } +#endif + + return mask; +} + +static struct file_operations logdev_fops = { + .read = logdev_read, + .write = logdev_write, + .ioctl = logdev_ioctl, + .open = logdev_open, + .release = logdev_close, + .poll = logdev_poll, + .llseek = no_llseek, +}; + +/************************ End User Land ******************************/ + +int in_logdump; +EXPORT_SYMBOL_GPL(in_logdump); + +static int get_next_cpus(struct logdev_header *hdr, + int *_this_cpu, int *_next_cpu) +{ + int cpu; + int f = 0; + struct logdev_dev *dev; + int this_cpu = -1; + int next_cpu = -1; + int last_cpu = 1; + + for (cpu = 0; cpu < LOGDEV_CPUS; cpu++) { + dev = get_logdev(cpu); + if (!dev_suspended(dev)) + continue; + + /* Skip empty buffers */ + if (hdr[cpu].id == LOGDEV_HDR_DONE) + continue; + + /* + * If this is the first cpu, then use it, otherwise, + * compare. + */ + if (!f || + compare_cnt(hdr[cpu].counter, + hdr[this_cpu].counter) < 0) { + if (!f) + f = 1; + else { + /* we already have the first count */ + next_cpu = this_cpu; + last_cpu = 0; + f = 2; + } + this_cpu = cpu; + } else if (f == 1 || + compare_cnt(hdr[cpu].counter, + hdr[next_cpu].counter) < 0) { + f = 2; + next_cpu = cpu; + last_cpu = 0; + } + } + + *_this_cpu = this_cpu; + *_next_cpu = next_cpu; + + return last_cpu; +} + +static int process_log(struct logdev_dev *dev, struct logdev_header *hdr, + int cpu) +{ + int i; + int r; + int count; + int corrupt = 0; + int line = 0; + int newline = 1; + unsigned long long t; + unsigned long usec_rem; + unsigned long secs; + const char *file = NULL; + + r = sizeof(hdr[0]); + + switch (hdr[cpu].id) { + + case LOGDEV_PRINT_TIME_FUNC: + { + struct logdev_print_time_func rs; + int cap = sizeof(rs) - sizeof(struct logdev_print_time); + + logdev_copy_from_dev(dev, &rs, cap); + file = rs.file; + line = rs.line; + + r += cap; + + /* fall through */ + } + + case LOGDEV_PRINT_TIME: + { + struct logdev_print_time rs; + + logdev_copy_from_dev(dev, &rs, sizeof(rs)); + + if (newline) { + t = rs.t; + usec_rem = do_div(t, 1000000000)/1000; + secs = (unsigned long)t; + + printk("[%5lu.%06lu] ", + secs, usec_rem); + } + + r += sizeof(rs); + + /* fall through */ + } + + case LOGDEV_PRINT: + if (newline) { + printk("cpu:%d ",cpu); + + if (hdr[cpu].id == LOGDEV_PRINT_TIME_FUNC) + printk("%s:%d ",file, line); + } + for (i=r; i < hdr[cpu].size; i += r) { + count = hdr[cpu].size - i; + if (count > PAGE_SIZE-1) + count = PAGE_SIZE-1; + r = logdev_copy_from_dev(dev, kern_buffer[cpu], count); + if (r < 0) + break; + kern_buffer[cpu][count] = 0; + printk("%s", kern_buffer[cpu]); + newline = (count) && + (kern_buffer[cpu][count - 1] != '\n') ? 0 : 1; + } + break; + + case LOGDEV_SWITCH_ID: + { + struct logdev_switch_struct rs; + + printk(">>>> IN LOGDEV SWITCH <<<< cpu:%d \n", cpu); + + logdev_copy_from_dev(dev,&rs,sizeof(rs)); + + t = rs.t; + usec_rem = do_div(t, 1000000000)/1000; + secs = (unsigned long)t; + + printk("CPU=%d [%5lu.%06lu] ", + cpu, secs,usec_rem); + + logdev_copy_from_dev(dev,&kern_buffer[cpu],rs.prev_len); + kern_buffer[cpu][rs.prev_len] = 0; + printk("%s:%d -->> ",kern_buffer[cpu], rs.pid_prev); + + logdev_copy_from_dev(dev,&kern_buffer[cpu],rs.next_len); + kern_buffer[cpu][rs.next_len] = 0; + printk("%s:%d\n",kern_buffer[cpu], rs.pid_next); + + break; + } + + case LOGDEV_PKT_ID: + { + struct logdev_pkt rs; + struct tcphdr th; + struct iphdr iph; + int len = sizeof(hdr); + int iplen; + unsigned long end; + int tcplen; + unsigned long oaddr; + unsigned long iaddr; + unsigned short oport; + unsigned short iport; + int dir; + + /* + * Here we have "len" keeping track of how much data + * we taken out of the device. So if there's data + * left in the device for this record, after we have + * all the printable data, we know how much still needs + * to be retreived. + */ + + logdev_copy_from_dev(dev,&rs,sizeof(rs)); + len += sizeof(rs); + + t = rs.t; + usec_rem = do_div(t, 1000000000)/1000; + secs = (unsigned long)t; + + printk("[%5lu.%06lu] cpu:%d", + secs, usec_rem, cpu); + + if (rs.protocol != htons(ETH_P_IP)) { + printk("Not IP packet\n"); + goto pkt_done; + } + + + if ((hdr[cpu].size - len) < sizeof(iph)) { + printk(" IP Header corrupted\n"); + goto pkt_done; + } + + logdev_copy_from_dev(dev,&iph,sizeof(iph)); + len += sizeof(iph); + + iph.tot_len = ntohs(iph.tot_len); + iph.id = ntohs(iph.id); + iph.frag_off = ntohs(iph.frag_off); + iph.check = ntohs(iph.check); + iph.saddr = ntohl(iph.saddr); + iph.daddr = ntohl(iph.daddr); + + /* + * dir is the direction of the packet. + * 0 is incoming, 1 is outgoing. + */ + dir = rs.dir; + + if (dir) { + /* outgoing */ + oaddr = iph.daddr; + iaddr = iph.saddr; + } else { + /* incomming */ + oaddr = iph.saddr; + iaddr = iph.daddr; + } + +#define IPBYTE(ip,b) (unsigned char)(((ip)>>((b)*8))&0xff) + + if (iph.protocol != 6) { + printk("%d.%d.%d.%d %s " + "%d.%d.%d.%d (Not TCP packet)\n", + IPBYTE(iaddr,3), + IPBYTE(iaddr,2), + IPBYTE(iaddr,1), + IPBYTE(iaddr,0), + dir ? "==>" : "<==", + IPBYTE(oaddr,3), + IPBYTE(oaddr,2), + IPBYTE(oaddr,1), + IPBYTE(oaddr,0)); + + goto pkt_done; + } + + iplen = iph.ihl<<2; + + /* + * Skip any ip options. + */ + while (iplen > sizeof(iph)) { + int count = iplen - sizeof(iph); + if (count > PAGE_SIZE) + count = PAGE_SIZE; + logdev_copy_from_dev(dev,kern_buffer[cpu],count); + len += count; + iplen -= count; + } + + if ((hdr[cpu].size - len) < sizeof(th)) { + printk(" IP Header corrupted\n"); + goto pkt_done; + } + + logdev_copy_from_dev(dev,&th,sizeof(th)); + len += sizeof(th); + + tcplen = th.doff<<2; + + th.source = ntohs(th.source); + th.dest = ntohs(th.dest); + th.seq = ntohl(th.seq); + th.ack_seq = ntohl(th.ack_seq); + th.window = ntohs(th.window); + th.check = ntohs(th.check); + th.urg_ptr = ntohs(th.urg_ptr); + + iplen = iph.tot_len - (iplen + tcplen); + end = th.seq; + if (th.fin || th.syn) { + end++; + } + end += iplen; + + if (dir) { + /* outgoing */ + oport = th.dest; + iport = th.source; + } else { + /* incomming */ + oport = th.source; + iport = th.dest; + } + + printk("%d.%d.%d.%d:%d %s %d.%d.%d.%d:%d seq:%u ack:%u " + "(%c%c%c%c%c%c) len:%d win:%d end_seq:%lu\n", + IPBYTE(iaddr,3), + IPBYTE(iaddr,2), + IPBYTE(iaddr,1), + IPBYTE(iaddr,0), + iport, + dir ? "==>" : "<==", + IPBYTE(oaddr,3), + IPBYTE(oaddr,2), + IPBYTE(oaddr,1), + IPBYTE(oaddr,0), + oport, + th.seq, + th.ack_seq, + th.fin ? 'F' : '-', + th.syn ? 'S' : '-', + th.rst ? 'R' : '-', + th.psh ? 'P' : '-', + th.ack ? 'A' : '-', + th.urg ? 'U' : '-', + iplen, + th.window, + end + ); + + pkt_done: + + /* + * Clean up anything left over. + */ + while (len < hdr[cpu].size) { + int count = hdr[cpu].size - len; + logdev_copy_from_dev(dev, kern_buffer[cpu], count); + len += count; + } + + break; + } + + case LOGDEV_CUSTOM: + { + struct list_head *p; + struct logdev_custom custom; + int len; + + logdev_copy_from_dev(dev,&custom,sizeof(custom)); + + spin_lock(&logdev_callbacks_lock); + i = len = hdr[cpu].size - sizeof(hdr[0]) - sizeof(custom); + if (i > PAGE_SIZE) { + i = PAGE_SIZE; + /* show that we truncated */ + hdr[cpu].size -= len - i; + } + logdev_copy_from_dev(dev,kern_buffer[cpu],i); + + list_for_each(p, &logdev_callbacks) { + struct logdev_callback *cb = list_entry(p, struct logdev_callback, list); + if (cb->id == custom.id) { + cb->func(&hdr[cpu], &custom, kern_buffer[cpu]); + break; + } + } + + /* No record should be bigger than a page. Ignore all else */ + while (i < len) { + int count = len - i; + if (count > PAGE_SIZE) + count = PAGE_SIZE; + logdev_copy_from_dev(dev,kern_buffer[cpu],count); + i += count; + } + + /* check if we didn't find a call back */ + if (p == &logdev_callbacks) { + printk("skipping! LOGDEV_CUSTOM id %d\n",custom.id); + } + + spin_unlock(&logdev_callbacks_lock); + break; + } + default: + corrupt = 1; + if (!dev->corrupted) { + dev->corrupted = 1; + printk(">>>>> Unknown logdev header, cpu %d buffer may be " + "corrupted from this point on\n", cpu); + } + break; + } + + return corrupt; +} + +static int flush_buffer(struct logdev_dev *dev, struct logdev_header *hdr, + int cpu, int next_cpu, int last_cpu) +{ + int corrupt = 0; + int more_work = 1; + + while (last_cpu || compare_cnt(hdr[cpu].counter, + hdr[next_cpu].counter) <= 0) { + + BUG_ON(hdr[cpu].id == LOGDEV_HDR_DONE); + + /* + * Lets not set off watchdogs. + */ + touch_nmi_watchdog(); + + corrupt = process_log(dev, hdr, cpu); + + /* Read the next header for this */ + if ((logdev_copy_from_dev(dev,&hdr[cpu],sizeof(hdr[0]))) + != sizeof(hdr[0])) { + hdr[cpu].id = LOGDEV_HDR_DONE; + if (last_cpu) + more_work = 0; + break; + } + /* if we have a corrupted header, then stop this buffer flush. */ + if (corrupt) + break; + } + + return more_work; +} + +void logdev_dump(void) +{ + int save_switch; + int save_print; + struct logdev_dev *dev; + struct logdev_header hdr[LOGDEV_CPUS]; + static int started = 0; + int do_lock = 1; + extern int in_logdump; + unsigned long flags; + int more_work = 0; + int cpu; + + /* + * We don't care about race conditions with this started variable. + * It only exists to keep dumps a little cleaner. If two dumps get + * through at the same time, it doesn't hurt. + */ + if (started) + return; + + started = 1; + + /* Because of the started race, we also use in_logdump just for reference.*/ + in_logdump++; + + if (oops_in_progress) + do_lock = 0; + + local_irq_save(flags); + + /* + * This is for debugging, so we don't want to reintroduce more output. + */ + save_switch = logdev_switch_ison(); + save_print = logdev_print_ison(); + logdev_switch_off(); + logdev_print_off(); + + printk("****** Starting Logdev Dump ********\n"); + + /* + * Read all the available headers for each CPU. + */ + for (cpu = 0; cpu < LOGDEV_CPUS; cpu++) { + + dev = get_logdev(cpu); + if (!dev_running(dev)) + continue; + + /* + * Long time to hold the spin locks, but hey it's just debugging. + */ + spin_lock(&dev->lock); + + /* + * Try to limit the amount added while reading + * this buffer, suspend the buffer. (redundant but also good for + * accounting) + */ + dev->init = LOGDEV_DEV_SUSPENDED; + + if ((logdev_copy_from_dev(dev, &hdr[cpu], sizeof(hdr[0]))) + == sizeof(hdr[0])) + /* record that we have a buffer to work with. */ + more_work = 1; + else + /* record that the buffer is empty */ + hdr[cpu].id = LOGDEV_HDR_DONE; + } + + while (more_work) { + int this_cpu; + int next_cpu; + int last_cpu; + + /* + * Find the cpu to work with that has the earliest counter, + * and also the cpu with the next counter. + */ + last_cpu = get_next_cpus(hdr, &this_cpu, &next_cpu); + + cpu = this_cpu; + dev = get_logdev(cpu); + + BUG_ON(this_cpu < 0); + BUG_ON(!last_cpu && next_cpu < 0); + BUG_ON(dev->init != LOGDEV_DEV_SUSPENDED); + BUG_ON(hdr[cpu].id == LOGDEV_HDR_DONE); + + + /* + * Now print out all from this buffer until we reach + * the next cpu. If this is the last buffer to write + * then finish the buffer. + */ + more_work = flush_buffer(dev, hdr, cpu, next_cpu, last_cpu); + } + + printk( ">>>>> done <<<<<\n"); + + for (cpu=0; cpu < LOGDEV_CPUS; cpu++) { + dev = get_logdev(cpu); + if (dev_suspended(dev)) { + dev->init = LOGDEV_DEV_RUNNING; + spin_unlock(&dev->lock); + } + + } + if (save_print) + logdev_print_on(); + if (save_switch) + logdev_switch_on(); + + local_irq_restore(flags); + started = 0; + in_logdump--; +} + +#define ENTRY_SIZE(e) ((((e)->tail - (e)->head)) & (PAGE_SIZE-1)) +#define ENTRY_FREE(e) ((PAGE_SIZE-1) - ENTRY_SIZE(e)) +#define ENTRY_ADD(e,x) ((e) = ((e) + x) & (PAGE_SIZE-1)) +#define ENTRY_INC(e) ENTRY_ADD(e,1) +#define ENTRY_MAX (PAGE_SIZE-1) + +int logdev_copy_from_dev(struct logdev_dev *dev, void *buf, int size) +{ + struct logdev_entry *entry = &dev->entry[dev->start]; + int ret = 0; + + if (size < 0) { + printk("logdev_copy_from_dev: size < 0 ???\n"); + return -1; + } + if (size > dev->size) + size = dev->size; + + while (size && dev->size) { + int copy = size; + int used; + + if (!ENTRY_SIZE(entry)) { + dev->start = (dev->start + 1) % dev->len; + entry = &dev->entry[dev->start]; + } + + if (copy > (used=ENTRY_SIZE(entry))) + copy = used; + if (entry->head+copy > PAGE_SIZE) + copy = PAGE_SIZE - entry->head; + memcpy(buf,entry->dat+entry->head,copy); + ENTRY_ADD(entry->head,copy); + buf += copy; + dev->size -= copy; + size -= copy; + ret += copy; + + } + + return ret; +} + +static void move_start_to_next_entry(struct logdev_dev *dev) +{ + struct logdev_entry *entry = &dev->entry[dev->start]; + int start = dev->start; + int size; + struct header { + int id; + int size; + } hdr; + + if (dev->corrupted) { + /* Don't trust headers, just skip to the next entry */ + dev->size -= ENTRY_SIZE(entry); + entry->head = entry->tail = 0; + dev->start = (dev->start+1) % dev->len; + return; + } + + while (start == dev->start) { + logdev_copy_from_dev(dev,(char*)&hdr,sizeof(hdr)); + entry = &dev->entry[dev->start]; + switch (hdr.id) { + case LOGDEV_CUSTOM: + case LOGDEV_SWITCH_ID: + case LOGDEV_PKT_ID: + case LOGDEV_PRINT: + case LOGDEV_PRINT_TIME: + case LOGDEV_PRINT_TIME_FUNC: + break; + default: + dev->corrupted = 1; + } + if (dev->corrupted) { + if (start == dev->start) + move_start_to_next_entry(dev); + return; + } + + size = sizeof(hdr); + while (size < hdr.size) { + int count = hdr.size - size; + if (count > ENTRY_SIZE(entry)) + count = ENTRY_SIZE(entry); + dev->size -= count; + size += count; + ENTRY_ADD(entry->head,count); + if (ENTRY_SIZE(entry) == 0) { + dev->start = (dev->start + 1) % dev->len; + entry->head = entry->tail = 0; + entry = &dev->entry[dev->start]; + } + } + } +} + +static int logdev_copy_to_dev(struct logdev_dev *dev, const void *dat, + int size) +{ + struct logdev_entry *entry = &dev->entry[dev->end]; + const char *buf = dat; + int ret = 0; + + while (size) { + int copy; + int free; + if (ENTRY_SIZE(entry) == ENTRY_MAX) { + dev->end = (dev->end+1) % dev->len; + entry = &dev->entry[dev->end]; + /* if we wrapped, then clear out this entire + * buffer. + */ + if (dev->end == dev->start) { + move_start_to_next_entry(dev); + } + } + copy = size; + if (copy > (free=ENTRY_FREE(entry))) + copy = free; + if (entry->tail+copy > PAGE_SIZE) + copy = PAGE_SIZE - entry->tail; + memcpy(entry->dat+entry->tail,buf,copy); + ENTRY_ADD(entry->tail,copy); + buf += copy; + size -= copy; + ret += copy; + dev->size += copy; + + } +#if 0 + /* wake up those waiting for data */ + if (waitqueue_active(&dev->wait)) + wake_up_interruptible(&dev->wait); +#endif + + return ret; + +} + + +#if 1 +static int atoi(const char *p) +{ + int n = 0; + while (*p && '0' <= *p && *p <= '9') { + n *= 10; + n += *p - '0'; + p++; + } + return (n); +} + +static int proc_var_read(char *buffer, char **start, off_t offset, int count, + int *eof, void *data) +{ + unsigned long var; + int len; + + var = *((unsigned long*)data); + + len = sprintf(buffer,"%ld\n",var); + + if (offset >= len) { + *start = buffer; + *eof = 1; + return 0; + } + *start = buffer + offset; + if ((len -= offset) > count) + return count; + *eof = 1; + return len; +} + +#if 0 +static int proc_var_write(struct file * file, const char * buffer, + unsigned long count, void *data) +{ + atomic_t *var; + int val; + char buf[10]; + + var = (atomic_t*)data; + + if (count > 9) + count = 9; + + if(copy_from_user(&buf, buffer, count)) + return -EFAULT; + + buf[count] = 0; + + val = atoi(buf); + + atomic_set(var,val); + + file->f_pos += count; + + return count; + +} +#endif + +static int proc_sw_read(char *buffer, char **start, off_t offset, int count, + int *eof, void *data) +{ + int var; + int len; + unsigned long bit = (unsigned long)data; + + /* We control what bit is, so it had better be right! */ + if (bit >= LOGDEV_SW_BITS) + BUG(); + + bit = (1 << bit); + + var = !!(logdev_switches & bit); + + len = sprintf(buffer,"%d\n",var); + + if (offset >= len) { + *start = buffer; + *eof = 1; + return 0; + } + *start = buffer + offset; + if ((len -= offset) > count) + return count; + *eof = 1; + return len; +} + +static int proc_sw_write(struct file * file, const char * buffer, + unsigned long count, void *data) +{ + int val; + unsigned long bit = (int)data; + char buf[10]; + + /* We control what bit is, so it had better be right! */ + if (bit >= LOGDEV_SW_BITS) + BUG(); + + bit = (1 << bit); + + if (count > 9) + count = 9; + + if(copy_from_user(&buf, buffer, count)) + return -EFAULT; + + buf[count] = 0; + + val = atoi(buf) ? bit : 0; + + logdev_switches = (logdev_switches & ~bit) | val; + + file->f_pos += count; + + return count; + +} +#endif + +static void *s_next(struct seq_file *m, void *v, loff_t *pos) +{ + struct logdev_dev *dev = m->private; + int i = (int)*pos; + + (*pos)++; + + if (i >= dev->len) + return NULL; + + return &dev->entry[i]; +} + +static void *s_start(struct seq_file *m, loff_t *pos) + __acquires(logdev_dev.lock) +{ + struct logdev_dev *dev = m->private; + void *p = NULL; + loff_t l = 0; + + /* + * A little strong? Perhaps, but we know that this is bad right + * from the start. Anyway this is for debugging purposes only, + * so it's OK, as well as the big latency we get by turning off + * intrerrupts. But we also never know who will be locking + * this. + */ + if (irqs_disabled()) + BUG(); + + local_irq_disable(); + spin_lock(&dev->lock); + for (p = (void *)1; p && l < *pos; p = s_next(m,p,&l)) + ; + + return p; +} + +static void s_stop(struct seq_file *m, void *p) + __releases(logdev_dev.lock) +{ + struct logdev_dev *dev = m->private; + spin_unlock(&dev->lock); + local_irq_enable(); +} + +static int s_show(struct seq_file *m, void *v) +{ + int i = (long)(v); + struct logdev_entry *entry = v; + struct logdev_dev *dev; + + dev = m->private; + + if (i == 1) { + seq_printf(m,"Logdev:\n"); + seq_printf(m,"\tlen:\t%d\n",dev->len); + seq_printf(m,"\tsize:\t%d\n",dev->size); + seq_printf(m,"\tstart:\t%d\n",dev->start); + seq_printf(m,"\tend:\t%d\n",dev->end); + seq_printf(m,"\tcorrupted:%d\n",dev->corrupted); + seq_printf(m,"\n\tEntries:\n"); + + } else { + i = (int)((char*)entry - (char*)dev->entry) / sizeof(struct logdev_entry); + + seq_printf(m,"\t %d:\t%8u : %8u\tsize: %lu\n", i, + entry->head, entry->tail, + ENTRY_SIZE(entry)); + } + + + return 0; +} + +static struct seq_operations logdev_proc_op = { + .start = s_start, + .next = s_next, + .stop = s_stop, + .show = s_show, +}; + +static int logdev_proc_open (struct inode *inode, struct file *file) +{ + struct proc_dir_entry * de; + int ret; + + de = PDE(inode); + + ret = seq_open(file, &logdev_proc_op); + if (!ret) { + struct seq_file *m = file->private_data; + m->private = de->data; + } + + return ret; +} + + +static struct file_operations logdev_proc_operations = { + .open = logdev_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + + +static void logdev_proc_setup(void) +{ + struct proc_dir_entry *entry; + struct logdev_dev *dev; + int i; + char buff[30]; + + logdev_proc_dir = proc_mkdir(LOGDEV_PROC_DIR, NULL); + if (!logdev_proc_dir) + return; + + for (i=0; i < LOGDEV_CPUS; i++) { + dev = get_logdev(i); + if (!dev->init) + continue; + + sprintf(buff,"%d",i); + dev->dir = proc_mkdir(buff,logdev_proc_dir); + + entry = create_proc_entry(LOGDEV_PROC_ENTRY, + S_IFREG | S_IRUGO, + dev->dir); + if (entry) { + logdev_proc_entry[i] = entry; + entry->proc_fops = &logdev_proc_operations; + entry->data = dev; + } + + entry = create_proc_entry(LOGDEV_PROC_MINOR, + S_IFREG | S_IRUGO, + dev->dir); + if (entry) { + entry->read_proc = proc_var_read; + entry->data = &dev->minor; + logdev_proc_minor[i] = entry; + } + } + + entry = create_proc_entry(LOGDEV_PROC_SW, + S_IFREG | S_IRUGO | S_IWUGO, + logdev_proc_dir); + if (entry) { + entry->read_proc = proc_sw_read; + entry->write_proc = proc_sw_write; + entry->data = (void*)LOGDEV_SW_SWITCH_ENABLED; + logdev_proc_sw = entry; + } + + entry = create_proc_entry(LOGDEV_PROC_SWITCHES, + S_IFREG | S_IRUGO | S_IWUGO, + logdev_proc_dir); + if (entry) { + entry->read_proc = proc_var_read; + entry->data = &logdev_switches; + logdev_proc_sw = entry; + } + +#if 0 + entry = create_proc_entry(LOGDEV_PROC_LEVEL, + S_IFREG | S_IRUGO | S_IWUGO, + logdev_proc_dir); + if (entry) { + entry->read_proc = proc_var_read; + entry->write_proc = proc_var_write; + entry->data = &logdev_level; + logdev_proc_level = entry; + } +#endif + entry = create_proc_entry(LOGDEV_PROC_PRINT, + S_IFREG | S_IRUGO | S_IWUGO, + logdev_proc_dir); + if (entry) { + entry->read_proc = proc_sw_read; + entry->write_proc = proc_sw_write; + entry->data = (void*)LOGDEV_SW_PRINT_ENABLED; + logdev_proc_print = entry; + } +} + +static struct miscdevice logdev_misc_dev[LOGDEV_CPUS] = +{ [ 0 ... (LOGDEV_CPUS-1) ] = + { + .minor = MISC_DYNAMIC_MINOR, + .fops = &logdev_fops + } +}; + +static int logdev_panic_handler(struct notifier_block *this, + unsigned long event, + void *unused) +{ + logdev_dump(); + return NOTIFY_OK; +} + +static struct notifier_block logdev_panic_notifier = { + .notifier_call = logdev_panic_handler, + .next = NULL, + .priority = 150 /* priority: INT_MAX >= x >= 0 */ +}; + +/* + * Unfortunately, the die handlers have no way to unregister, so + * we don't want to do this if we are a module. We would if we could + * find a way to clean ourselves up when unloaded. + */ +int logdev_die_handler(struct notifier_block *self, unsigned long val, + void *data) +{ + switch (val) { + case DIE_OOPS: + logdev_dump(); + break; + default: + break; + } + return NOTIFY_OK; +} + +static struct notifier_block logdev_die_notifier = { + .notifier_call = logdev_die_handler, + .priority = 200 +}; + +void __exit logdev_cleanup(void) +{ + struct logdev_dev *dev; + int i; + int cpu; + char buff[30]; + + atomic_notifier_chain_unregister(&panic_notifier_list, &logdev_panic_notifier); + + for (i=0; i < LOGDEV_CPUS; i++) { + dev = get_logdev(i); + if (dev->dir) { + struct proc_dir_entry *parent = dev->dir; + if (logdev_proc_entry[i]) { + sprintf(buff,"%s",LOGDEV_PROC_ENTRY); + remove_proc_entry(buff,parent); + } + if (logdev_proc_minor[i]) { + sprintf(buff,"%s",LOGDEV_PROC_MINOR); + remove_proc_entry(buff,parent); + } + sprintf(buff,"%s/%d",LOGDEV_PROC_DIR,i); + remove_proc_entry(buff,0); + } + } + + if (logdev_proc_sw) { + sprintf(buff,"%s/%s",LOGDEV_PROC_DIR,LOGDEV_PROC_SW); + remove_proc_entry(buff,0); + } + if (logdev_proc_level) { + sprintf(buff,"%s/%s",LOGDEV_PROC_DIR,LOGDEV_PROC_LEVEL); + remove_proc_entry(buff,0); + } + if (logdev_proc_print) { + sprintf(buff,"%s/%s",LOGDEV_PROC_DIR,LOGDEV_PROC_PRINT); + remove_proc_entry(buff,0); + } + if (logdev_proc_dir) + remove_proc_entry(LOGDEV_PROC_DIR,0); + + for_each_present_cpu(cpu) { + + if (cpu >= LOGDEV_CPUS) + break; + + dev = get_logdev(cpu); + if (!dev->init) + continue; + + if (logdev_misc_dev[cpu].name) + logdev_kfree(logdev_misc_dev[cpu].name); + + if (dev->entry) { + struct logdev_entry *entry; + for (i=0, entry=dev->entry; ilen; i++,entry++) + if (entry->dat) { + logdev_free_page((unsigned long)entry->dat); + entry->dat = NULL; + } + dev->len = 0; + logdev_kfree(dev->entry); + dev->entry = NULL; + } + + if (dev->minor >= 0) { + misc_deregister(&logdev_misc_dev[cpu]); + dev->minor = -1; + } + } +} + +int __init logdev_reg_misc(void) +{ + struct logdev_dev *dev; + int cpu; + int res; + + for_each_present_cpu(cpu) { + + if (cpu >= LOGDEV_CPUS) + break; + + dev = get_logdev(cpu); + if (!dev->init) + continue; + + if (dev->minor < 0) { + res = misc_register(&logdev_misc_dev[cpu]); + if (res) { + printk("Still can't register misc device for Logdev CPU %d\n",cpu); + printk(" giving up!\n"); + } else + dev->minor = logdev_misc_dev[cpu].minor; + } + } + + logdev_proc_setup(); + + return 0; +} + +int __init logdev_init(void) +{ + int res = 0; + int cpu; + struct logdev_dev *dev; + struct logdev_entry *entry; + static int init = 0; + + if (init) + return 0; + + init = 1; + + printk("Logdevice: copyright Steven Rostedt, Kihon Technologies Inc." + " (Version %s)\n", + logdev_version); + + for_each_present_cpu(cpu) { + int i; + char *name; + + if (cpu >= LOGDEV_CPUS) { + printk(KERN_WARNING "More present cpus (%d) than NR_CPUS (%d)\n", + cpu,LOGDEV_CPUS); + break; + } + + printk("Initializing logdev for cpu: %d\n",cpu); + + dev = get_logdev(cpu); + + name = logdev_malloc(32); + if (!name) { + printk(KERN_WARNING "logdev: can't allocate misc name\n"); + continue; + } + + sprintf(name,"logdev%d",cpu); + + /* register later */ + logdev_misc_dev[cpu].name = name; + dev->minor = -1; + + dev->len = pages; + + res = -ENOMEM; + + dev->entry = logdev_malloc(sizeof(struct logdev_entry)*dev->len); + if (!dev->entry) + goto fail; + + memset(dev->entry,0,sizeof(*dev->entry)*dev->len); + + for (i=0,entry=dev->entry; ilen; i++,entry++) { + entry->dat = logdev_get_free_pages(0); + if (!entry->dat) + goto fail; + } + init_waitqueue_head(&dev->wait); + spin_lock_init(&dev->lock); + + dev->init = 1; + } + + atomic_notifier_chain_register(&panic_notifier_list, &logdev_panic_notifier); + + register_die_notifier(&logdev_die_notifier); + + res = 0; + + +out: + return res; + +fail: + logdev_cleanup(); + goto out; +} + +EXPORT_SYMBOL_GPL(logdev_record_switch); +EXPORT_SYMBOL_GPL(logdev_pkt); +EXPORT_SYMBOL_GPL(logdev_print); +EXPORT_SYMBOL_GPL(logdev_print_time); +EXPORT_SYMBOL_GPL(logdev_print_time_func); +EXPORT_SYMBOL_GPL(logdev_record); +EXPORT_SYMBOL_GPL(logdev_record_write); +EXPORT_SYMBOL_GPL(logdev_record_read); +EXPORT_SYMBOL_GPL(logdev_dump); + +core_initcall(logdev_init); +late_initcall(logdev_reg_misc); Index: linux-2.6.18-rc6/drivers/char/logdev_hooks.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6.18-rc6/drivers/char/logdev_hooks.c 2006-09-05 20:25:39.000000000 -0400 @@ -0,0 +1,102 @@ +#include +#include +#include +#include +#include +#include + +atomic_t logdev_level = ATOMIC_INIT(0); +#ifdef CONFIG_LOGDEV_SWITCH_ENABLED +atomic_t logdev_switch = ATOMIC_INIT(1); +#else +atomic_t logdev_switch = ATOMIC_INIT(0); +#endif +#ifdef CONFIG_LOGDEV_PRINT_ENABLED +atomic_t logdev_print_enabled = ATOMIC_INIT(1); +#else +atomic_t logdev_print_enabled = ATOMIC_INIT(0); +#endif + +int in_logdump; + +LOGDEV_DEFINE_SPINLOCK(logdev_add_hook_lock); +LOGDEV_DEFINE_SPINLOCK(logdev_callbacks_lock); + +LIST_HEAD(logdev_callbacks); + +EXPORT_SYMBOL(logdev_callbacks_lock); +EXPORT_SYMBOL(logdev_callbacks); + +/* + * The following is to register call back functions to print out + * a custom record. + */ +int logdev_register_callback(int custom_id, logdev_callback_func func) +{ + struct list_head *p; + struct logdev_callback *cb; + unsigned long flags; + int ret = 0; + + cb = kmalloc(sizeof(*cb),GFP_KERNEL); + if (!cb) { + ret = -ENOMEM; + goto out; + } + + spin_lock_irqsave(&logdev_callbacks_lock,flags); + list_for_each(p,&logdev_callbacks) { + struct logdev_callback *c = list_entry(p, struct logdev_callback, list); + if (c->id == custom_id) { + spin_unlock_irqrestore(&logdev_callbacks_lock,flags); + kfree(cb); + ret = -EBUSY; + goto out; + } + } + + cb->id = custom_id; + cb->func = func; + list_add(&cb->list,&logdev_callbacks); + spin_unlock_irqrestore(&logdev_callbacks_lock,flags); + + out: + return ret; +} + +int logdev_unregister_callback(int custom_id) +{ + struct list_head *p; + struct logdev_callback *cb; + unsigned long flags; + int ret = 0; + + spin_lock_irqsave(&logdev_callbacks_lock,flags); + list_for_each(p,&logdev_callbacks) { + cb = list_entry(p, struct logdev_callback, list); + if (cb->id == custom_id) + break; + } + if (p == &logdev_callbacks) { + ret = -ENODEV; + spin_unlock_irqrestore(&logdev_callbacks_lock,flags); + goto out; + } + + list_del(&cb->list); + spin_unlock_irqrestore(&logdev_callbacks_lock,flags); + + kfree(cb); + + out: + return ret; +} + +/* + * sched_clock isn't exported, so we export it ourselves. + */ +EXPORT_SYMBOL(logdev_switch); +EXPORT_SYMBOL(logdev_level); +EXPORT_SYMBOL(logdev_print_enabled); +EXPORT_SYMBOL(sched_clock); +EXPORT_SYMBOL(in_logdump); Index: linux-2.6.18-rc6/drivers/char/Makefile =================================================================== --- linux-2.6.18-rc6.orig/drivers/char/Makefile 2006-09-05 17:14:56.000000000 -0400 +++ linux-2.6.18-rc6/drivers/char/Makefile 2006-09-05 20:59:03.000000000 -0400 @@ -96,6 +96,8 @@ obj-$(CONFIG_DRM) += drm/ obj-$(CONFIG_PCMCIA) += pcmcia/ obj-$(CONFIG_IPMI_HANDLER) += ipmi/ +obj-$(CONFIG_LOGDEV) += logdev.o + obj-$(CONFIG_HANGCHECK_TIMER) += hangcheck-timer.o obj-$(CONFIG_TCG_TPM) += tpm/ Index: linux-2.6.18-rc6/drivers/char/sysrq.c =================================================================== --- linux-2.6.18-rc6.orig/drivers/char/sysrq.c 2006-09-05 17:14:56.000000000 -0400 +++ linux-2.6.18-rc6/drivers/char/sysrq.c 2006-09-05 22:48:05.000000000 -0400 @@ -30,6 +30,7 @@ #include #include #include /* for fsync_bdev() */ +#include #include #include #include @@ -41,6 +42,7 @@ /* Whether we react on sysrq keys or just ignore them */ int sysrq_enabled = 1; +/* Loglevel sysrq handler */ static void sysrq_handle_loglevel(int key, struct pt_regs *pt_regs, struct tty_struct *tty) { @@ -147,6 +149,41 @@ static struct sysrq_key_op sysrq_mountro .enable_mask = SYSRQ_ENABLE_REMOUNT, }; +static void sysrq_handle_dumplog(int key, struct pt_regs *pt_regs, + struct tty_struct *tty) +{ + logdev_dump(); +} + +static struct sysrq_key_op sysrq_dumplog_op = { + .handler = sysrq_handle_dumplog, + .help_msg = "Dumplog", + .action_msg = "Dump logdev to serial", +}; + +static void sysrq_handle_togglelogswitch(int key, struct pt_regs *pt_regs, + struct tty_struct *tty) +{ + /* + * Not so atomic, but we really don't care! + */ + if (logdev_switch_ison()) { + logdev_switch_off(); + printk("logdev_switch now off\n"); + } else { + logdev_switch_on(); + printk("logdev_switch now on\n"); + } +} + +static struct sysrq_key_op sysrq_togglelogdevswitch_op = { + .handler = sysrq_handle_togglelogswitch, + .help_msg = "Togglelogswitch", + .action_msg = "Toggling logdev_switch", +}; + +/* END SYNC SYSRQ HANDLERS BLOCK */ + #ifdef CONFIG_LOCKDEP static void sysrq_handle_showlocks(int key, struct pt_regs *pt_regs, struct tty_struct *tty) @@ -295,8 +332,8 @@ static struct sysrq_key_op *sysrq_key_ta &sysrq_crashdump_op, /* c */ &sysrq_showlocks_op, /* d */ &sysrq_term_op, /* e */ - &sysrq_moom_op, /* f */ - NULL, /* g */ + &sysrq_togglelogdevswitch_op, /* f */ + &sysrq_dumplog_op, /* g */ NULL, /* h */ &sysrq_kill_op, /* i */ NULL, /* j */ @@ -314,7 +351,7 @@ static struct sysrq_key_op *sysrq_key_ta &sysrq_mountro_op, /* u */ /* May be assigned at init time by SMP VOYAGER */ NULL, /* v */ - NULL, /* w */ + &sysrq_moom_op, /* w */ NULL, /* x */ NULL, /* y */ NULL /* z */ Index: linux-2.6.18-rc6/arch/i386/mm/fault.c =================================================================== --- linux-2.6.18-rc6.orig/arch/i386/mm/fault.c 2006-09-05 17:14:55.000000000 -0400 +++ linux-2.6.18-rc6/arch/i386/mm/fault.c 2006-09-06 09:32:05.000000000 -0400 @@ -28,6 +28,7 @@ #include #include +#include extern void die(const char *,struct pt_regs *,long); #ifdef CONFIG_KPROBES @@ -547,6 +548,8 @@ no_context: bust_spinlocks(1); if (oops_may_print()) { + lfnprint("BUG!"); + logdev_print_off(); #ifdef CONFIG_X86_PAE if (error_code & 16) { pte_t *pte = lookup_address(address);