--- arch/x86/kernel/dumpstack.c | 9 arch/x86/mm/fault.c | 4 drivers/char/sysrq.c | 33 include/linux/logdev.h | 222 ++++++ kernel/Makefile | 1 kernel/logdev/Makefile | 11 kernel/logdev/logdev.c | 1357 ++++++++++++++++++++++++++++++++++++++ kernel/logdev/logdev_priv.h | 74 ++ kernel/logdev/logdev_probe.c | 1427 +++++++++++++++++++++++++++++++++++++++++ kernel/logdev/logdev_relay.c | 175 +++++ kernel/logdev/logdev_ringbuf.c | 553 +++++++++++++++ kernel/logdev/logdev_ringbuf.h | 158 ++++ lib/Kconfig.debug | 62 + 13 files changed, 4083 insertions(+), 3 deletions(-) Index: linux-trace.git/include/linux/logdev.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-trace.git/include/linux/logdev.h 2009-04-28 17:17:35.000000000 -0400 @@ -0,0 +1,222 @@ +/* + * Logdevice - A device used to record debuging information in the kernel. + * It uses a large memory ring buffer consisting of individual pages + * to keep down on hogging large sections. A user may then read the device + * to get debugging information out of it. Or if configured, this can + * be dumped to the network on a system crash. + * + * Copyright - 2005 - Steven Rostedt, Kihon Technologies, (rostedt at kihontech dot com) + */ +#ifndef _LOG_DEV_H +#define _LOG_DEV_H + +#include + +struct logdev_print { + char str[0]; +}; + +struct logdev_print_time { + unsigned long long t; + char str[0]; +}; + +struct logdev_print_time_func { + const char *file; + int line; + /* need to be after line, since we use this with print_time. */ + unsigned long long t; + char str[0]; +} __attribute__((packed)); + +struct logdev_func { + unsigned long long t; + short pid; + unsigned long ip; + unsigned long parent_ip; + char comm[0]; +} __attribute__((packed)); + +struct logdev_print_time_func_curr { + short pid; + char comm[TASK_COMM_LEN]; + /* this is used with print_time_func */ + const char *file; + int line; + /* need to be after line, since we use this with print_time. */ + unsigned long long t; + char str[0]; +} __attribute__((packed)); + +struct logdev_time_func_curr_symbol { + unsigned long symbol; + /* this is used with print_time_func_curr */ + short pid; + char comm[TASK_COMM_LEN]; + /* this is used with print_time_func */ + const char *file; + int line; + /* need to be after line, since we use this with print_time. */ + unsigned long long t; +} __attribute__((packed)); + +struct logdev_custom { + int id; + char data[0]; +} __attribute__((packed)); + +struct logdev_header { + long counter; + int id; + int size; +} __attribute__((packed)); + +struct logdev_item { + struct logdev_header hdr; + union { + struct logdev_print print; + struct logdev_func func; + struct logdev_print_time print_time; + struct logdev_print_time_func print_time_func; + struct logdev_custom custom; + char data[0]; + } u; +} __attribute__((packed)); + +#ifdef CONFIG_LOGDEV + +extern int in_logdump; + +extern unsigned long logdev_switches; +#define LOGDEV_SW_PRINT_ENABLED 0 +#define LOGDEV_SW_MARKER_ENABLED 2 +#define LOGDEV_SW_BITS 3 + +#define LOGDEV_SW_ISSET(sw) (test_bit(sw, &logdev_switches)) +#define LOGDEV_SW_SET(sw) (set_bit(sw, &logdev_switches)) +#define LOGDEV_SW_CLEAR(sw) (clear_bit(sw, &logdev_switches)) + +#define logdev_print_ison() LOGDEV_SW_ISSET(LOGDEV_SW_PRINT_ENABLED) +#define logdev_print_on() LOGDEV_SW_SET(LOGDEV_SW_PRINT_ENABLED); +#define logdev_print_off() LOGDEV_SW_CLEAR(LOGDEV_SW_PRINT_ENABLED); + +#define logdev_mark_ison() LOGDEV_SW_ISSET(LOGDEV_SW_MARKER_ENABLED) +#define logdev_mark_on() LOGDEV_SW_SET(LOGDEV_SW_MARKER_ENABLED); +#define logdev_mark_off() LOGDEV_SW_CLEAR(LOGDEV_SW_MARKER_ENABLED); + +typedef void (*logdev_callback_func)(struct logdev_header *hdr, + struct logdev_custom *custom, + int cpu, + void *rec); + +int logdev_print(const char *str, ...) + __attribute__ ((format (printf, 1, 2))); +int logdev_vprint(const char *str, va_list va); +int logdev_print_time(const char *str, ...) + __attribute__ ((format (printf, 1, 2))); +int __logdev_print_time_func(int curr, const char *file, int line, + unsigned long symbol, const char *str, ...) + __attribute__ ((format (printf, 5, 6))); +#define logdev_print_time_func(file, line, str...) \ + __logdev_print_time_func(0, file, line, 0, str) +#define logdev_print_time_func_curr(file, line, str...) \ + __logdev_print_time_func(1, file, line, 0, str) +#define logdev_time_func_curr_symbol(file, line, symbol) \ + __logdev_print_time_func(2, file, line, (unsigned long)symbol, NULL) +#define logdev_mark(file, line) \ + __logdev_print_time_func(3, file, line, 0, NULL) + + +/* + * logdev_record is used for custom writes (saves of sprintf) + * use id and register a callback so that the logdump knows + * what to do when it sees this record. + * You can pass in multiple data structures, just end the function + * parameters with a NULL. + */ +int logdev_record(int id, int total_size, const void *data, int size, ...); +void logdev_dump(void); + +#define LOGDEV(x,y...) logdev_##x(y) +#define LOGPRINTS(func,x...) do { if (logdev_print_ison()) LOGDEV(func,x); } while(0) + +/* + * Using the UPPER case here ignores the logdev_print_enabled flag + */ +#define LOGPRINT(x...) LOGDEV(print,x) +#define LOGTPRINT(x...) LOGDEV(print_time,x) +#define LOGTFPRINT(x...) LOGDEV(print_time_func,__FUNCTION__,__LINE__,x) +#define LOGTFCPRINT(x...) LOGDEV(print_time_func_curr,__FUNCTION__,__LINE__,x) +#define LOGSYMBOL(x) LOGDEV(time_func_curr_symbol,__FUNCTION__,__LINE__,x) + +/* + * Using these functions, will only log if logdev_print_enabled flag is set. + */ + +#define lprint(x...) LOGPRINTS(print,x) +#define ltprint(x...) LOGPRINTS(print_time,x) +#define lfprint(x...) LOGPRINTS(print_time_func,__FUNCTION__,__LINE__,x) +#define lfcprint(x...) LOGPRINTS(print_time_func_curr,__FUNCTION__,__LINE__,x) +#define lsymbol(x) LOGPRINTS(time_func_curr_symbol,__FUNCTION__,__LINE__,x) +#define lmark() LOGPRINTS(mark,__FUNCTION__,__LINE__); + +/* + * lfnprint is identical to lfprint except that it adds a new line at the end. + */ +#define _lnprint(func,x,y...) func( x "%s\n", y) +#define lnprint(x...) _lnprint(lprint,x,"") +#define ltnprint(x...) _lnprint(ltprint,x,"") +#define lfnprint(x...) _lnprint(lfprint,x,"") +#define lfcnprint(x...) _lnprint(lfcprint,x,"") + +struct logdev_callback { + struct list_head list; + int id; + logdev_callback_func func; +}; + +int logdev_register_callback(int custom_id, logdev_callback_func func); +int logdev_unregister_callback(int custom_id); +void logdev_time(unsigned long long t); +void logdev_toggle_func_tracing(void); + +int logdev_init(void); /* If we want to put this in main.c */ + +#else /* !LOGDEV */ +#define logdev_toggle_func_tracing do { } while (0) +#define logdev_dump() do {} while(0) +#define logdev_record(id, size, data) do {} while(0) + +#define LOGPRINT(x...) do {} while(0) +#define LOGTPRINT(x...) do {} while(0) +#define LOGTFPRINT(x...) do {} while(0) +#define LOGTFCPRINT(x...) do {} while(0) +#define LOGSYMBOL(x) do {} while(0) +#define LOGMARK() do {} while(0) + +#define logdev_print_ison() ( 0 ) +#define logdev_print_on() do {} while(0) +#define logdev_print_off() do {} while(0) + +#define logdev_print(x...) do {} while(0) +#define logdev_mark() do {} while(0) + +#define lprint(x...) do {} while(0) +#define ltprint(x...) do {} while(0) +#define lfprint(x...) do {} while(0) +#define lfcprint(x...) do {} while(0) +#define lsymbol(x) do {} while(0) +#define lmark() do {} while(0) + +#define lnprint(x...) do {} while(0) +#define ltnprint(x...) do {} while(0) +#define lfnprint(x...) do {} while(0) +#define lfcnprint(x...) do {} while(0) + +#define logdev_time(t) do {} while(0) +#define logdev_register_callback(i,f) do {} while(0) +#define logdev_unregister_callback(i) do {} while(0) +#define in_logdump 0 +#endif /* LOGDEV */ + +#endif Index: linux-trace.git/lib/Kconfig.debug =================================================================== --- linux-trace.git.orig/lib/Kconfig.debug 2009-04-27 12:10:07.000000000 -0400 +++ linux-trace.git/lib/Kconfig.debug 2009-04-28 12:02:07.000000000 -0400 @@ -647,6 +647,68 @@ config BOOT_PRINTK_DELAY BOOT_PRINTK_DELAY also may cause DETECT_SOFTLOCKUP to detect what it believes to be lockup conditions. +config LOGDEV + bool "Enable logdev device" + depends on DEBUG_KERNEL + help + The logdev device stores data into the kernel that can be retrieved + later through a misc device (major 10). The minor number is + dynamic and is posted through /proc/logdev/minor. Utilities + to open and read the device can be found at + http://rostedt.homelinux.com/logdev + + This device allows for tracing lots of information in the kernel + when simply printk is too expensive. When the logdev is initialized, + it allocates a default of 1 meg of memory (in page size units). This + allows for saving data in a ring buffer without the need to allocate. + +config LOGDEV_PROBE + bool + depends on LOGDEV && KPROBES + default y + +choice + prompt "Logdev Backend" + depends on LOGDEV + default LOGDEV_RINGBUF + +config LOGDEV_RINGBUF + bool "Logdev internal ring buffer" + ---help--- + Logdev needs a backend to store the data as it is logged. + this is done in memory, to record without much digression + in performance of machine. With this option logdev creates + its own simple memory ring buffer. This allows it to record + things really early in the boot process. + +config LOGDEV_RELAY + bool "Logdev with relayfs (BROKEN)" + ---help--- + Logdev needs a backend to store the data as it is logged. + this is done in memory, to record without much digression + in performance of machine. With this option logdev uses + relayfs as its back end. This makes it more standard to + other things in the kernel, and for userspace. + +endchoice + +config LOGDEV_PAGES + int "Number of pages to allocate for logdev device" + depends on LOGDEV + default 64 + help + The Logdev device allocates a number of pages for the sole + purpose of logging data. This is the number of pages that + the Logdev device should allocate upon loading / initializing. + +config LOGDEV_PRINT_ENABLED + bool "Default Logdev prints should be enabled on startup" + depends on LOGDEV + help + Enable this if you expect the LOGPRINT macros to be enabled + as soon as the logdev device is loaded. Otherwise you must + enable it with /proc/logdev/print + config RCU_TORTURE_TEST tristate "torture tests for RCU" depends on DEBUG_KERNEL Index: linux-trace.git/drivers/char/sysrq.c =================================================================== --- linux-trace.git.orig/drivers/char/sysrq.c 2009-04-27 12:08:59.000000000 -0400 +++ linux-trace.git/drivers/char/sysrq.c 2009-04-28 13:42:11.000000000 -0400 @@ -30,6 +30,7 @@ #include #include #include /* for fsync_bdev() */ +#include #include #include #include @@ -182,6 +183,31 @@ static struct sysrq_key_op sysrq_mountro .enable_mask = SYSRQ_ENABLE_REMOUNT, }; +static void sysrq_handle_dumplog(int key, struct tty_struct *tty) +{ + logdev_dump(); +} + +static struct sysrq_key_op sysrq_dumplog_op = { + .handler = sysrq_handle_dumplog, + .help_msg = "Dumplog", + .action_msg = "Dump logdev to serial", + .enable_mask = SYSRQ_ENABLE_DUMP, +}; + +static void sysrq_handle_logfunc(int key, struct tty_struct *tty) +{ + logdev_toggle_func_tracing(); +} + +static struct sysrq_key_op sysrq_logfunc_op = { + .handler = sysrq_handle_logfunc, + .help_msg = "logfunc", + .action_msg = "toggle logdev function tracing", +}; + +/* END SYNC SYSRQ HANDLERS BLOCK */ + #ifdef CONFIG_LOCKDEP static void sysrq_handle_showlocks(int key, struct tty_struct *tty) { @@ -406,9 +432,10 @@ static struct sysrq_key_op *sysrq_key_ta &sysrq_crashdump_op, /* c & ibm_emac driver debug */ &sysrq_showlocks_op, /* d */ &sysrq_term_op, /* e */ - &sysrq_moom_op, /* f */ +// &sysrq_moom_op, /* f */ + &sysrq_logfunc_op, /* g: May be registered by ppc for kgdb */ - NULL, /* g */ + &sysrq_dumplog_op, /* g */ NULL, /* h - reserved for help */ &sysrq_kill_op, /* i */ #ifdef CONFIG_BLOCK @@ -433,7 +460,7 @@ static struct sysrq_key_op *sysrq_key_ta &sysrq_showstate_op, /* t */ &sysrq_mountro_op, /* u */ /* v: May be registered at init time by SMP VOYAGER */ - NULL, /* v */ + &sysrq_moom_op, /* v */ &sysrq_showstate_blocked_op, /* w */ /* x: May be registered on ppc/powerpc for xmon */ NULL, /* x */ Index: linux-trace.git/arch/x86/mm/fault.c =================================================================== --- linux-trace.git.orig/arch/x86/mm/fault.c 2009-04-06 09:52:03.000000000 -0400 +++ linux-trace.git/arch/x86/mm/fault.c 2009-04-28 12:03:43.000000000 -0400 @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -689,8 +690,11 @@ no_context(struct pt_regs *regs, unsigne * Oops. The kernel tried to access some bad page. We'll have to * terminate things with extreme prejudice: */ + lfcnprint("BUG! KERNEL FAULT"); + logdev_print_off(); flags = oops_begin(); + logdev_dump(); show_fault_oops(regs, error_code, address); stackend = end_of_stack(tsk); Index: linux-trace.git/kernel/logdev/Makefile =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-trace.git/kernel/logdev/Makefile 2009-04-28 12:15:17.000000000 -0400 @@ -0,0 +1,11 @@ + +ORIG_CFLAGS := $(KBUILD_CFLAGS) +KBUILD_CFLAGS = $(subst -pg,,$(ORIG_CFLAGS)) +KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING + +obj-y := logdev.o + +obj-$(CONFIG_LOGDEV_PROBE) += logdev_probe.o +obj-$(CONFIG_LOGDEV_RINGBUF) += logdev_ringbuf.o +obj-$(CONFIG_LOGDEV_RELAY) += logdev_relay.o + Index: linux-trace.git/kernel/logdev/logdev.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-trace.git/kernel/logdev/logdev.c 2009-04-28 18:31:56.000000000 -0400 @@ -0,0 +1,1357 @@ +/* + * logdev.c + * + * Copyright (C) 2004-2006 Steven Rostedt, Kihon Technologies, Inc. + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License (not later!) + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "logdev_priv.h" + +static char *logdev_version = "0.6.0"; + +static DEFINE_PER_CPU(atomic_t, logdev_disable); + +#define LOCK_LOGDEV(cpu) \ + if (atomic_inc_return(&per_cpu(logdev_disable, cpu)) != 1) \ + goto __logdev_out_dec; \ + __raw_spin_lock(&dev->lock) + +#define UNLOCK_LOGDEV(cpu) \ + __raw_spin_unlock(&dev->lock); \ + __logdev_out_dec: \ + atomic_dec(&per_cpu(logdev_disable, cpu)) + +/* used for BUG(), since we can't add any headers in that file */ +int get_processor_id(void) { return smp_processor_id(); } +EXPORT_SYMBOL(get_processor_id); + +void log_show_eip(const char *func) +{ + lfnprint("(%s) from %p",func,__builtin_return_address(0)); +} + +/* bit mask of logdev settings */ +unsigned long logdev_switches; +EXPORT_SYMBOL_GPL(logdev_switches); + +/* Compares good for a difference of ~2 billion. */ +static inline int compare_cnt(unsigned long a, unsigned long b) +{ + long x = (long)a - (long)b; + return x > 0 ? 1 : x < 0 ? -1 : 0; +} + +static atomic_t logdev_counter = ATOMIC_INIT(0); +long logdev_counter_inc(void) +{ + return atomic_inc_return(&logdev_counter); +} + +DEFINE_PER_CPU(struct logdev_dev, logdev_dev); + +/* + * We don't support hotplug CPUS + */ +#define check_cpu(cpu) ({ \ + static int once = 1; \ + int x; \ + if (unlikely(x = (cpu >= LOGDEV_CPUS)) && once) { \ + once = 0; \ + printk("BUG %s:%d: cpu %d doesn't fit logdev cpus\n", \ + __FILE__, __LINE__, cpu); \ + } \ + x; \ + }) + +int logdev_copy_from_dev(struct logdev_dev *dev, void *buf, + int size); +int logdev_copy_to_dev(struct logdev_dev *dev, const void *dat, + int size); + +static int option_logdev_print(char *opt) +{ + logdev_print_on(); + if (strstr(opt, "0")) + logdev_print_off(); + return 0; +} + +__setup("logdevprint", option_logdev_print); + +/* + * The following is to register call back functions to print out + * a custom record. + */ + +DEFINE_RAW_SPINLOCK(logdev_callbacks_lock); + +LIST_HEAD(logdev_callbacks); +EXPORT_SYMBOL_GPL(logdev_callbacks); + +int __kprobes logdev_register_callback(int custom_id, + logdev_callback_func func) +{ + struct logdev_callback *cb; + struct logdev_callback *c; + unsigned long flags; + int ret = 0; + + cb = kmalloc(sizeof(*cb),GFP_KERNEL); + if (!cb) { + ret = -ENOMEM; + goto out; + } + + spin_lock_irqsave(&logdev_callbacks_lock, flags); + list_for_each_entry(c, &logdev_callbacks, list) { + if (c->id == custom_id) { + spin_unlock_irqrestore(&logdev_callbacks_lock, flags); + kfree(cb); + ret = -EBUSY; + goto out; + } + } + + cb->id = custom_id; + cb->func = func; + list_add(&cb->list, &logdev_callbacks); + spin_unlock_irqrestore(&logdev_callbacks_lock, flags); + + out: + return ret; +} + +int __kprobes logdev_unregister_callback(int custom_id) +{ + struct logdev_callback *cb; + unsigned long flags; + int ret = 0; + + spin_lock_irqsave(&logdev_callbacks_lock, flags); + list_for_each_entry(cb, &logdev_callbacks, list) { + if (cb->id == custom_id) + break; + } + if (&cb->list == &logdev_callbacks) { + ret = -ENODEV; + spin_unlock_irqrestore(&logdev_callbacks_lock, flags); + goto out; + } + + list_del(&cb->list); + spin_unlock_irqrestore(&logdev_callbacks_lock, flags); + + kfree(cb); + + out: + return ret; +} + +/* + * We have a separate kernel buffer for each CPU. + * This buffer is used to copy snprintf data into the ring buffer. + */ +static char kern_buffer[LOGDEV_CPUS][PAGE_SIZE]; + +/** + * logdev_vprint - print to the log like vprintk. + * @str - format string + * @va - variable list argument. + * + * logdev_vprint acts like vprintk but it writes to the logdev device instead + * of a console. + */ +int __kprobes logdev_vprint(const char *str, va_list va) +{ + struct logdev_dev *dev; + char *buffer; + int len=0; + struct logdev_header hdr; + unsigned long flags; + int cpu; + + raw_local_irq_save(flags); + + cpu = smp_processor_id(); + + if (check_cpu(cpu)) + goto out; + + dev = get_logdev(cpu); + if (!dev_running(dev)) + goto out; + + buffer = kern_buffer[cpu]; + len = vsnprintf(buffer, PAGE_SIZE, str, va); + + if (len >= PAGE_SIZE) { + buffer[PAGE_SIZE-1] = 0; + len = PAGE_SIZE; + } + + hdr.counter = logdev_counter_inc(); + hdr.id = LOGDEV_PRINT; + hdr.size = sizeof(hdr) + len; + + LOCK_LOGDEV(cpu); + logdev_copy_to_dev(dev, &hdr, sizeof(hdr)); + logdev_copy_to_dev(dev, buffer, len); + UNLOCK_LOGDEV(cpu); + + out: + raw_local_irq_restore(flags); + + return len; +} + +/** + * logdev_print - print to the log like printk. + * @str - format string + * + * logdev_print acts like printk but it writes to the logdev device instead + * of a console. + */ +int __kprobes logdev_print(const char *str, ...) +{ + va_list va; + struct logdev_dev *dev; + char *buffer; + int len=0; + struct logdev_header hdr; + unsigned long flags; + int cpu; + + raw_local_irq_save(flags); + + cpu = smp_processor_id(); + + if (check_cpu(cpu)) + goto out; + + dev = get_logdev(cpu); + if (!dev_running(dev)) + goto out; + + buffer = kern_buffer[cpu]; + + va_start(va,str); + len = vsnprintf(buffer, PAGE_SIZE, str, va); + va_end(va); + + if (len >= PAGE_SIZE) { + buffer[PAGE_SIZE-1] = 0; + len = PAGE_SIZE; + } + + hdr.counter = logdev_counter_inc(); + hdr.id = LOGDEV_PRINT; + hdr.size = sizeof(hdr) + len; + + LOCK_LOGDEV(cpu); + logdev_copy_to_dev(dev, &hdr, sizeof(hdr)); + logdev_copy_to_dev(dev, buffer, len); + UNLOCK_LOGDEV(cpu); + + out: + raw_local_irq_restore(flags); + + return len; +} + +/** + * logdev_print_time - write to the logdev buffer with timestamp + * @str - format string + * + * logdev_print_time is the same as logdev_print but it attaches a timestamp + * to it. Saves on doing it yourself. + */ +int __kprobes logdev_print_time(const char *str, ...) +{ + struct logdev_dev *dev; + char *buffer; + va_list va; + int len = 0; + struct logdev_header hdr; + struct logdev_print_time rs; + unsigned long flags; + int cpu; + + raw_local_irq_save(flags); + + cpu = smp_processor_id(); + + if (check_cpu(cpu)) + goto out; + + dev = get_logdev(cpu); + if (!dev_running(dev)) + goto out; + + buffer = kern_buffer[cpu]; + + va_start(va,str); + len = vsnprintf(buffer, PAGE_SIZE, str, va); + va_end(va); + + if (len >= PAGE_SIZE) { + buffer[PAGE_SIZE-1] = 0; + len = PAGE_SIZE; + } + + rs.t = sched_clock(); + + hdr.counter = logdev_counter_inc(); + hdr.id = LOGDEV_PRINT_TIME; + hdr.size = sizeof(hdr) + sizeof(rs) + len; + + LOCK_LOGDEV(cpu); + logdev_copy_to_dev(dev, &hdr, sizeof(hdr)); + logdev_copy_to_dev(dev, &rs, sizeof(rs)); + logdev_copy_to_dev(dev, buffer, len); + UNLOCK_LOGDEV(cpu); + + out: + raw_local_irq_restore(flags); + + return len; +} + +/** + * __logdev_print_time_func - multi-use recording function. + * @curr - type of function to do + * @symbol - symbol name to print nicely out + * @str - string format. + * + * logdev_print_time_func quickly stores the time, function and line number. + * this is really only good for live runs since the function is just a pointer, + * so a user land process would need to have the System.map available. + * + * If curr is 0 it ignores @symbol and uses the str format to do a logdev_print + * type of function (adding the current comm, pid, and file and line number). + * If curr is 1 it ignores the @symbol and @str and prints the current comm + * pid, file and line number. + * If curr is 2 it just prints the file line number as well as records the + * symbol. On output it will resolve the symbol using ksyms. + * + * All options also do a timestamp. + */ +int __kprobes __logdev_print_time_func(int curr, const char *file, int line, + unsigned long symbol, + const char *str, ...) +{ + struct logdev_dev *dev; + char *buffer = buffer; /* shut gcc up, we use it! */ + va_list va; + int len = 0; + struct logdev_header hdr; + struct logdev_print_time_func rs; + unsigned long flags; + int cpu; + + raw_local_irq_save(flags); + + cpu = smp_processor_id(); + + if (check_cpu(cpu)) + goto out; + + dev = get_logdev(cpu); + if (!dev_running(dev)) + goto out; + + /* symbol and mark set doesn't use string */ + if (str) { + buffer = kern_buffer[cpu]; + va_start(va,str); + len = vsnprintf(buffer, PAGE_SIZE, str, va); + va_end(va); + + if (len >= PAGE_SIZE) { + buffer[PAGE_SIZE-1] = 0; + len = PAGE_SIZE; + } + } + + rs.t = sched_clock(); + rs.file = file; + rs.line = line; + + hdr.counter = logdev_counter_inc(); + hdr.id = curr == 1 ? LOGDEV_PRINT_TIME_FUNC_CURR : + curr == 2 ? LOGDEV_TIME_FUNC_CURR_SYMBOL : + curr == 3 ? LOGDEV_TIME_FUNC_CURR_MARK : + LOGDEV_PRINT_TIME_FUNC; + hdr.size = sizeof(hdr) + sizeof(rs) + len; + + switch (curr) { + case 2: /* symbol */ + hdr.size += sizeof(symbol); + /* fall through */ + case 3: /* marker */ + case 1: /* comm only */ + hdr.size += sizeof(short) + TASK_COMM_LEN; + break; + } + + LOCK_LOGDEV(cpu); + logdev_copy_to_dev(dev, &hdr, sizeof(hdr)); + if (curr) { + if (curr == 2) /* symbol */ + logdev_copy_to_dev(dev, &symbol, sizeof(symbol)); + + logdev_copy_to_dev(dev, ¤t->pid, sizeof(short)); + logdev_copy_to_dev(dev, current->comm, TASK_COMM_LEN); + } + logdev_copy_to_dev(dev, &rs, sizeof(rs)); + if (len) + logdev_copy_to_dev(dev, buffer, len); + UNLOCK_LOGDEV(cpu); + + out: + raw_local_irq_restore(flags); + + return len; +} + +/** + * logdev_record - record a custom version record. + * @id - unique id to tell which function will be used to translate this. + * @total_size - the total size in bytes of all the data being passed in. + * @data - a pointer to data to record. + * @size - the size of the individual data. + * + * If you feel like recording your own data, you can use logdev_record. + * just pass your own id, total_size, data and size. You can repeat + * passing in data and size and this will stop when a data is NULL. + * For faster processing, it's required that the total size of all data + * being processed must also be passed in. i.e. the sum of the sizes + * used. + * + * This record will be added as LOGDEV_CUSTOM and the given id will be the + * custom id. + * + * We use total_size so that we can avoid multiple copies to get the data + * into the buffer. + */ +int __kprobes logdev_record(int id, int total_size, const void *data, + int size, ...) +{ + struct logdev_dev *dev; + struct logdev_header hdr; + struct logdev_custom rs; + unsigned long flags; + int tsize = size; + int cpu; + int ret = 0; + va_list ap; + + if (!data) + return -EINVAL; + + raw_local_irq_save(flags); + + cpu = smp_processor_id(); + + if (check_cpu(cpu)) + goto out; + + dev = get_logdev(cpu); + if (!dev_running(dev)) + goto out; + + hdr.counter = logdev_counter_inc(); + hdr.id = LOGDEV_CUSTOM; + hdr.size = sizeof(hdr) + sizeof(rs) + total_size; + + rs.id = id; + + LOCK_LOGDEV(cpu); + logdev_copy_to_dev(dev, &hdr, sizeof(hdr)); + logdev_copy_to_dev(dev, &rs, sizeof(rs)); + va_start(ap, size); + do { + ret = logdev_copy_to_dev(dev, data, size); + if (tsize >= total_size) + break; + data = va_arg(ap, void *); + if (data) { + size = va_arg(ap, int); + tsize += size; + if (tsize > total_size) + size -= tsize - total_size; + } + } while (data); + va_end(ap); + + /* + * If total_size didn't equal all sizes, then write padding. + */ + while (tsize < total_size) { + int cnt = total_size - tsize; + if (cnt > PAGE_SIZE) + cnt = PAGE_SIZE; + logdev_copy_to_dev(dev, kern_buffer[cpu], cnt); + tsize += cnt; + } + + UNLOCK_LOGDEV(cpu); + +out: + raw_local_irq_restore(flags); + + return ret; +} + +/** + * logdev_record_write - write directly into logdev buffer. + * @data - data to write + * @size - size of data. + * + * If you just want to write into the buffer using your own methods, then this + * is perfectly fine. Just pass in your data and the size of the data being + * passed in. You can read it out later with logdev_record_read. But you wont + * have the benefits of keeping integrity when the buffer overflows. + * + * Warning, only your own logdev_record_read will know what to do with this. + * It is basically useless for logdev_dump. + */ +int __kprobes logdev_record_write(const char *data, int size) +{ + struct logdev_dev *dev; + unsigned long flags; + int cpu; + int ret = 0; + + raw_local_irq_save(flags); + + cpu = smp_processor_id(); + dev = get_logdev(cpu); + if (!dev_running(dev)) { + raw_local_irq_restore(flags); + return 0; + } + + LOCK_LOGDEV(cpu); + ret = logdev_copy_to_dev(dev, data, size); + UNLOCK_LOGDEV(cpu); + raw_local_irq_restore(flags); + + return ret; +} + +/** + * logdev_record_read - read out data directly from the logdev device. + * @data - pointer to buffer to read to + * @size - size of the buffer + * + * logdev_record_read reads some data from the logdev device no matter what + * it was. + */ +int __kprobes logdev_record_read(void *data, int size) +{ + struct logdev_dev *dev; + unsigned long flags; + int cpu; + int ret = 0; + + raw_local_irq_save(flags); + + cpu = smp_processor_id(); + dev = get_logdev(cpu); + if (!dev_running(dev)) { + raw_local_irq_restore(flags); + return 0; + } + + LOCK_LOGDEV(cpu); + ret = logdev_copy_from_dev(dev, data, size); + UNLOCK_LOGDEV(cpu); + raw_local_irq_restore(flags); + + return ret; +} + +int in_logdump; +EXPORT_SYMBOL_GPL(in_logdump); + +static int __kprobes get_next_cpus(struct logdev_header *hdr, + int *_this_cpu, int *_next_cpu) +{ + int cpu; + int f = 0; + struct logdev_dev *dev; + int this_cpu = -1; + int next_cpu = -1; + int last_cpu = 1; + + for_each_present_cpu(cpu) { + + if (cpu >= LOGDEV_CPUS) + break; + + dev = get_logdev(cpu); + if (!dev_suspended(dev)) + continue; + + /* Skip empty buffers */ + if (hdr[cpu].id == LOGDEV_HDR_DONE) + continue; + + /* + * If the header is corrupted, just pick it + * as if this buffer was the last. The corruption + * will break out of the loop in the flush. + */ + if (!logdev_valid(hdr[cpu].id)) { + this_cpu = cpu; + next_cpu = -1; + last_cpu = 1; + break; + } + + /* + * If this is the first cpu, then use it, otherwise, + * compare. + */ + if (!f || + compare_cnt(hdr[cpu].counter, + hdr[this_cpu].counter) < 0) { + if (!f) + f = 1; + else { + /* we already have the first count */ + next_cpu = this_cpu; + last_cpu = 0; + f = 2; + } + this_cpu = cpu; + } else if (f == 1 || + compare_cnt(hdr[cpu].counter, + hdr[next_cpu].counter) < 0) { + f = 2; + next_cpu = cpu; + last_cpu = 0; + } + } + + *_this_cpu = this_cpu; + *_next_cpu = next_cpu; + + return last_cpu; +} + +void __kprobes logdev_time(unsigned long long t) +{ + unsigned long usec_rem; + unsigned long secs; + + usec_rem = do_div(t, 1000000000)/1000; + secs = (unsigned long)t; + + printk("[%5lu.%06lu] ", + secs, usec_rem); +} + +static int __kprobes process_log(struct logdev_dev *dev, + struct logdev_header *hdr, + int cpu) +{ + int i; + int r; + int count; + int corrupt = 0; + int line = 0; + static int newline = 1; + const char *file = NULL; + char comm[TASK_COMM_LEN]; + short pid = 0; + unsigned long symbol = 0; + + r = sizeof(hdr[0]); + + switch (hdr[cpu].id) { + + case LOGDEV_TIME_FUNC_CURR_SYMBOL: + { + logdev_copy_from_dev(dev, &symbol, sizeof(symbol)); + + r += sizeof(symbol); + + /* fall through */ + } + + case LOGDEV_TIME_FUNC_CURR_MARK: + case LOGDEV_PRINT_TIME_FUNC_CURR: + { + logdev_copy_from_dev(dev, &pid, sizeof(short)); + logdev_copy_from_dev(dev, comm, TASK_COMM_LEN); + + r += sizeof(short) + TASK_COMM_LEN; + + /* fall through */ + } + + case LOGDEV_PRINT_TIME_FUNC: + { + struct logdev_print_time_func rs; + int cap = sizeof(rs) - sizeof(struct logdev_print_time); + + logdev_copy_from_dev(dev, &rs, cap); + file = rs.file; + line = rs.line; + + r += cap; + + /* fall through */ + } + + case LOGDEV_PRINT_TIME: + { + struct logdev_print_time rs; + + logdev_copy_from_dev(dev, &rs, sizeof(rs)); + + if (newline) + logdev_time(rs.t); + + r += sizeof(rs); + + /* fall through */ + } + + case LOGDEV_PRINT: + if (newline) { + printk("cpu:%d ",cpu); + + switch (hdr[cpu].id) { + case LOGDEV_TIME_FUNC_CURR_SYMBOL: + case LOGDEV_TIME_FUNC_CURR_MARK: + case LOGDEV_PRINT_TIME_FUNC_CURR: + printk ("(%s:%d) ", comm, pid); + /* fall through */ + case LOGDEV_PRINT_TIME_FUNC: + printk("%s:%d ",file, line); + break; + } + } + for (i=r; i < hdr[cpu].size; i += r) { + count = hdr[cpu].size - i; + if (count > PAGE_SIZE-1) + count = PAGE_SIZE-1; + r = logdev_copy_from_dev(dev, kern_buffer[cpu], count); + if (r < 0) + break; + kern_buffer[cpu][count] = 0; + printk("%s", kern_buffer[cpu]); + newline = (count) && + (kern_buffer[cpu][count - 1] != '\n') ? 0 : 1; + } + if (symbol) { + print_ip_sym(symbol); + newline = 1; + } + if (hdr[cpu].id == LOGDEV_TIME_FUNC_CURR_MARK) { + printk("\n"); + newline = 1; + } + break; + + case LOGDEV_CUSTOM: + { + struct list_head *p; + struct logdev_custom custom; + int len; + + logdev_copy_from_dev(dev,&custom,sizeof(custom)); + + spin_lock(&logdev_callbacks_lock); + i = len = hdr[cpu].size - sizeof(hdr[0]) - sizeof(custom); + if (i > PAGE_SIZE) + i = PAGE_SIZE; + + /* change size to be what the record size is. */ + hdr[cpu].size = i; + + logdev_copy_from_dev(dev,kern_buffer[cpu],i); + + list_for_each(p, &logdev_callbacks) { + struct logdev_callback *cb = + list_entry(p, struct logdev_callback, list); + if (cb->id == custom.id) { + cb->func(&hdr[cpu], &custom, cpu, + kern_buffer[cpu]); + break; + } + } + + /* No record should be bigger than a page. Ignore all else */ + while (i < len) { + int count = len - i; + if (count > PAGE_SIZE) + count = PAGE_SIZE; + logdev_copy_from_dev(dev,kern_buffer[cpu],count); + i += count; + } + + /* check if we didn't find a call back */ + if (p == &logdev_callbacks) { + printk("skipping! LOGDEV_CUSTOM id %d\n",custom.id); + } + + spin_unlock(&logdev_callbacks_lock); + break; + } + case LOGDEV_FUNC_CALL: + { + struct logdev_func rs; + + logdev_copy_from_dev(dev, &rs, sizeof(rs)); + logdev_copy_from_dev(dev, comm, TASK_COMM_LEN); + + printk("cpu:%d ",cpu); + logdev_time(rs.t); + printk ("(%s:%d) ", comm, rs.pid); + printk("%pF <-- %pF\n", (void*)rs.ip, (void*)rs.parent_ip); + newline = 1; + + break; + } + default: + corrupt = 1; + if (!dev->corrupted) { + dev->corrupted = 1; + printk(">>>>> Unknown logdev header, cpu %d buffer " + "may be corrupted from this point on\n", cpu); + } else + printk("CPU %d >>> corrupted header <<<\n", cpu); + break; + } + + return corrupt; +} + +static int __kprobes flush_buffer(struct logdev_dev *dev, + struct logdev_header *hdr, + int cpu, int next_cpu, int last_cpu) +{ + int corrupt = 0; + int more_work = 1; + + while (last_cpu || compare_cnt(hdr[cpu].counter, + hdr[next_cpu].counter) <= 0) { + + BUG_ON(hdr[cpu].id == LOGDEV_HDR_DONE); + + /* + * Lets not set off watchdogs. + */ + touch_nmi_watchdog(); + + corrupt = process_log(dev, hdr, cpu); + + /* Read the next header for this */ + if ((logdev_copy_from_dev(dev,&hdr[cpu],sizeof(hdr[0]))) + != sizeof(hdr[0])) { + hdr[cpu].id = LOGDEV_HDR_DONE; + if (last_cpu) + more_work = 0; + break; + } + /* + * if we have a corrupted header, then stop this buffer flush. + */ + if (corrupt) + break; + } + + return more_work; +} + +/** + * logdev_dump - dump the logdev device to console. + * + * This empties the logdev device and dumps it out to console. + * This routine is very useful on kernel errors, where the system + * is in a state of no return. + */ +void __kprobes logdev_dump(void) +{ + int save_print; + struct logdev_dev *dev; + struct logdev_header hdr[LOGDEV_CPUS]; + static int started = 0; + int do_lock = 1; + extern int in_logdump; + unsigned long flags; + int more_work = 0; + int cpu; + + /* + * We don't care about race conditions with this started variable. + * It only exists to keep dumps a little cleaner. If two dumps get + * through at the same time, it doesn't hurt. + */ + if (started) + return; + + started = 1; + + /* + * Because of the started race, we also use in_logdump just for + * reference. + */ + in_logdump++; + + if (oops_in_progress) + do_lock = 0; + + raw_local_irq_save(flags); + + /* + * This is for debugging, so we don't want to reintroduce more output. + */ + save_print = logdev_print_ison(); + logdev_print_off(); + + printk("****** Starting Logdev Dump ********\n"); + + /* + * Read all the available headers for each CPU. + */ + for_each_present_cpu(cpu) { + + if (cpu >= LOGDEV_CPUS) + break; + + dev = get_logdev(cpu); + if (!dev_running(dev)) + continue; + + if (dev->corrupted) { + printk("Warning buffer for CPU %d is corrupted\n", + cpu); + /* will be set when corrupted part is read. */ + dev->corrupted = 0; + } + + /* + * Long time to hold the spin locks, but hey it's just + * debugging. + */ + if (do_lock) + __raw_spin_lock(&dev->lock); + + /* + * Try to limit the amount added while reading + * this buffer, suspend the buffer. (redundant but also good + * for accounting) + */ + dev->init = LOGDEV_DEV_SUSPENDED; + + if ((logdev_copy_from_dev(dev, &hdr[cpu], sizeof(hdr[0]))) + == sizeof(hdr[0])) + /* record that we have a buffer to work with. */ + more_work = 1; + else + /* record that the buffer is empty */ + hdr[cpu].id = LOGDEV_HDR_DONE; + } + + while (more_work) { + int this_cpu; + int next_cpu; + int last_cpu; + + /* + * Find the cpu to work with that has the earliest counter, + * and also the cpu with the next counter. + */ + last_cpu = get_next_cpus(hdr, &this_cpu, &next_cpu); + + cpu = this_cpu; + dev = get_logdev(cpu); + + BUG_ON(this_cpu < 0); + BUG_ON(!last_cpu && next_cpu < 0); + BUG_ON(dev->init != LOGDEV_DEV_SUSPENDED); + BUG_ON(hdr[cpu].id == LOGDEV_HDR_DONE); + + /* + * Now print out all from this buffer until we reach + * the next cpu. If this is the last buffer to write + * then finish the buffer. + */ + more_work = flush_buffer(dev, hdr, cpu, next_cpu, last_cpu); + } + + printk( ">>>>> done <<<<<\n"); + + for_each_present_cpu(cpu) { + if (cpu >= LOGDEV_CPUS) + break; + dev = get_logdev(cpu); + if (dev_suspended(dev)) { + dev->init = LOGDEV_DEV_RUNNING; + /* buffers should be emptied */ + dev->corrupted = 0; + if (do_lock) + __raw_spin_unlock(&dev->lock); + } + + } + if (save_print) + logdev_print_on(); + + raw_local_irq_restore(flags); + started = 0; + in_logdump--; +} + +static int __kprobes logdev_panic_handler(struct notifier_block *this, + unsigned long event, + void *unused) +{ + logdev_print_off(); + logdev_dump(); + return NOTIFY_OK; +} + +static struct notifier_block logdev_panic_notifier = { + .notifier_call = logdev_panic_handler, + .next = NULL, + .priority = 150 /* priority: INT_MAX >= x >= 0 */ +}; + +int __kprobes logdev_die_handler(struct notifier_block *self, + unsigned long val, + void *data) +{ + switch (val) { + case DIE_OOPS: + logdev_dump(); + break; + default: + break; + } + return NOTIFY_OK; +} + +static struct notifier_block logdev_die_notifier = { + .notifier_call = logdev_die_handler, + .priority = 200 +}; + +static int logdev_open_generic(struct inode *inode, struct file *filp) +{ + filp->private_data = inode->i_private; + return 0; +} + + +static ssize_t logdev_debug_sw_read(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + long bit = (long)(filp->private_data); + char buf[16]; + int var = !!(logdev_switches & (1<private_data); + char buf[16]; + + /* We control what bit is, so it had better be right! */ + if (bit >= LOGDEV_SW_BITS) + BUG(); + + bit = (1 << bit); + + if (cnt > 15) + cnt = 15; + + if(copy_from_user(&buf, ubuf, cnt)) + return -EFAULT; + + buf[cnt] = 0; + + val = simple_strtoul(buf, NULL, 10) ? bit : 0; + + logdev_switches = (logdev_switches & ~bit) | val; + + filp->f_pos += cnt; + + return cnt; +} + +static struct file_operations logdev_debug_sw_fops = { + .open = logdev_open_generic, + .read = logdev_debug_sw_read, + .write = logdev_debug_sw_write, +}; + +static ssize_t logdev_debug_read(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + unsigned long *data = filp->private_data; + char buf[16]; + int r; + + r = sprintf(buf, "%0lx\n", *data); + return simple_read_from_buffer(ubuf, cnt, ppos, + buf, r); +} + +static struct file_operations logdev_debug_rdonly_fops = { + .open = logdev_open_generic, + .read = logdev_debug_read, +}; + +struct dentry *logdev_d; +EXPORT_SYMBOL_GPL(logdev_d); + +static int logdev_debugfs_init(void) +{ + struct dentry *d_switches; + struct dentry *d_print; +#ifndef CONFIG_LOGDEV_MARKER + struct dentry *d_mark; +#endif + + logdev_d = debugfs_create_dir("logdev", NULL); + if (!logdev_d) { + printk("can't create logdev debugfs\n"); + return 0; + } + if (logdev_d == ERR_PTR(-ENODEV)) { + printk("debugfs not configured in. Can't access logdev " + "from userspace\n"); + logdev_d = NULL; + return 0; + } + + d_switches = debugfs_create_file("switches", 0444, logdev_d, + &logdev_switches, + &logdev_debug_rdonly_fops); + d_print = debugfs_create_file("print", 0644, logdev_d, + (void*)LOGDEV_SW_PRINT_ENABLED, + &logdev_debug_sw_fops); +#ifndef CONFIG_LOGDEV_MARKER + d_mark = debugfs_create_file("mark", 0644, logdev_d, + (void*)LOGDEV_SW_MARKER_ENABLED, + &logdev_debug_sw_fops); +#endif + return 0; +} + +extern void logdev_cleanup_priv(struct logdev_dev *dev); + +static void __init logdev_cleanup(void) +{ + struct logdev_dev *dev; + int i; + + atomic_notifier_chain_unregister(&panic_notifier_list, + &logdev_panic_notifier); + + for (i=0; i < LOGDEV_CPUS; i++) { + dev = get_logdev(i); + logdev_cleanup_priv(dev); + } +} + +static void +logdev_func_call(unsigned long ip, unsigned long parent_ip) +{ + struct logdev_header hdr; + struct logdev_dev *dev; + struct logdev_func rs; + unsigned long flags; + int cpu; + + if (!logdev_print_ison()) + return; + + local_irq_save(flags); + cpu = raw_smp_processor_id(); + + dev = get_logdev(cpu); + if (!dev_running(dev)) + goto out; + + LOCK_LOGDEV(cpu); + + hdr.counter = logdev_counter_inc(); + hdr.id = LOGDEV_FUNC_CALL; + hdr.size = sizeof(hdr) + sizeof(rs) + TASK_COMM_LEN; + + rs.t = sched_clock(); + rs.pid = current->pid; + rs.ip = ip; + rs.parent_ip = parent_ip; + logdev_copy_to_dev(dev, &hdr, sizeof(hdr)); + logdev_copy_to_dev(dev, &rs, sizeof(rs)); + logdev_copy_to_dev(dev, current->comm, TASK_COMM_LEN); + UNLOCK_LOGDEV(cpu); + + out: + local_irq_restore(flags); +} + +static struct ftrace_ops trace_ops __read_mostly = +{ + .func = logdev_func_call, +}; + +static int logdev_func_running; + +static void do_logdev_toggle_func_tracing(void) +{ + logdev_func_running ^= 1; + + if (logdev_func_running) + register_ftrace_function(&trace_ops); + else + unregister_ftrace_function(&trace_ops); + + printk("Logdev %s\n", logdev_func_running ? "activated" : "deactivate"); +} + +static struct task_struct *logdev_task; + +void logdev_toggle_func_tracing(void) +{ + if (logdev_task) + wake_up_process(logdev_task); +} + +static int klogdevd(void *unused) +{ + while (!kthread_should_stop()) { + set_current_state(TASK_INTERRUPTIBLE); + schedule(); + do_logdev_toggle_func_tracing(); + } + __set_current_state(TASK_RUNNING); + return 0; +} + +static int __init klogdevd_init(void) +{ + logdev_task = kthread_run(klogdevd, NULL, "klogdevd"); +} + +device_initcall(klogdevd_init); + +extern int initialize_logdev(void); + +int __init logdev_init(void) +{ + int res = 0; + int cpu; + struct logdev_dev *dev; + static int init = 0; + + if (init) + return 0; + + init = 1; + + printk("Logdevice: copyright Steven Rostedt, Kihon Technologies Inc." + " (Version %s)\n", + logdev_version); + + for_each_present_cpu(cpu) { + + if (cpu >= LOGDEV_CPUS) { + printk(KERN_WARNING "More present cpus (%d) than " + "NR_CPUS (%d)\n", + cpu,LOGDEV_CPUS); + break; + } + + printk("Initializing logdev for cpu: %d\n",cpu); + + dev = get_logdev(cpu); + dev->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED; + dev->init = LOGDEV_DEV_UP; + } + + if (initialize_logdev() < 0) + goto fail; + + atomic_notifier_chain_register(&panic_notifier_list, + &logdev_panic_notifier); + + register_die_notifier(&logdev_die_notifier); + + res = 0; + +#ifdef CONFIG_LOGDEV_PRINT_ENABLED + logdev_print_on(); +#endif + +out: + return res; + +fail: + logdev_cleanup(); + goto out; +} + +EXPORT_SYMBOL_GPL(logdev_time); +EXPORT_SYMBOL_GPL(logdev_print); +EXPORT_SYMBOL_GPL(logdev_print_time); +EXPORT_SYMBOL_GPL(__logdev_print_time_func); +EXPORT_SYMBOL_GPL(logdev_record); +EXPORT_SYMBOL_GPL(logdev_record_write); +EXPORT_SYMBOL_GPL(logdev_record_read); +EXPORT_SYMBOL_GPL(logdev_dump); + +core_initcall(logdev_init); +postcore_initcall(logdev_debugfs_init); Index: linux-trace.git/kernel/logdev/logdev_priv.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-trace.git/kernel/logdev/logdev_priv.h 2009-04-28 12:59:13.000000000 -0400 @@ -0,0 +1,74 @@ +#ifndef _LOG_DEV_PRIV_H +#define _LOG_DEV_PRIV_H + +/* Random numbers out of my head used for MAGIC */ +#define LOGDEV_CUSTOM 0x1afb +#define LOGDEV_SWITCH_ID 0x2afc +#define LOGDEV_PRINT 0x4adb +#define LOGDEV_PRINT_TIME 0x4adc +#define LOGDEV_PRINT_TIME_FUNC 0x4add +#define LOGDEV_PRINT_TIME_FUNC_CURR 0x4ade +#define LOGDEV_TIME_FUNC_CURR_SYMBOL 0x4afe +#define LOGDEV_TIME_FUNC_CURR_MARK 0x4baa +#define LOGDEV_FUNC_CALL 0x4daa +#define LOGDEV_HDR_DONE 0xbbbb /* internal use only */ + +#define logdev_spinlock_t raw_spinlock_t + +#ifndef CONFIG_PREEMPT_RT +# ifndef DEFINE_RAW_SPINLOCK +# define DEFINE_RAW_SPINLOCK DEFINE_SPINLOCK +# endif +#else /* CONFIG_PREEMPT_RT */ +# undef logdev_spinlock_t +# define logdev_spinlock_t __raw_spinlock_t +#endif /* CONFIG_PREEMPT_RT */ + + +static inline int logdev_valid(int id) +{ + switch (id) { + case LOGDEV_CUSTOM: + case LOGDEV_SWITCH_ID: + case LOGDEV_PRINT: + case LOGDEV_PRINT_TIME: + case LOGDEV_PRINT_TIME_FUNC: + case LOGDEV_PRINT_TIME_FUNC_CURR: + case LOGDEV_TIME_FUNC_CURR_SYMBOL: + case LOGDEV_TIME_FUNC_CURR_MARK: + case LOGDEV_FUNC_CALL: + return 1; + } + return 0; +} + +struct logdev_dev { + int init; + int corrupted; + logdev_spinlock_t lock; + void *priv; +}; + +DECLARE_PER_CPU(struct logdev_dev, logdev_dev); +#define get_logdev(cpu) &per_cpu(logdev_dev, cpu) + +#define LOGDEV_CPUS NR_CPUS + +#define LOGDEV_DEV_UNINITALIZED 0 +#define LOGDEV_DEV_RUNNING 1 +#define LOGDEV_DEV_SUSPENDED 2 +#define LOGDEV_DEV_UP 3 + +#define dev_running(dev) ((dev)->init == LOGDEV_DEV_RUNNING) +#define dev_suspended(dev) ((dev)->init == LOGDEV_DEV_SUSPENDED) +#define dev_up(dev) ((dev)->init == LOGDEV_DEV_UP) + +extern long logdev_counter_inc(void); + +extern struct dentry *logdev_d; + +#ifdef CONFIG_LOGDEV_RINGBUF +#include "logdev_ringbuf.h" +#endif + +#endif Index: linux-trace.git/kernel/logdev/logdev_probe.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-trace.git/kernel/logdev/logdev_probe.c 2009-04-28 12:02:07.000000000 -0400 @@ -0,0 +1,1427 @@ +/* + * logdev_probe.c + * + * Copyright (C) 2004-2006 Steven Rostedt + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License (not later!) + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "logdev_priv.h" + +#undef DPRINTK +#if 1 +# define DPRINTK(x...) printk(x) +#else +# define DPRINTK(x...) do { } while(0) +#endif + +static atomic_t logdev_probe_next_id = ATOMIC_INIT(1); +static DEFINE_MUTEX(probe_list_lock); +enum logprobe_type { + LOGPROBE_BP, + LOGPROBE_FUNC, + LOGPROBE_CURR, + LOGPROBE_VAR, +}; + +static char *logdev_probe_buffer; +static int logdev_probe_buffer_sz; +static DEFINE_RAW_SPINLOCK(probe_buffer_lock); +static DEFINE_PER_CPU(struct jprobe *, jprobe); + +/* ---------------- cut here for user space headers -------------------- */ + +/* "s/user_//" for the below when bringing to user space */ + +#define LOGPROBE_IOCTL_BASE 'l' + +#define PROBE_IOW(nr, type) _IOW(LOGPROBE_IOCTL_BASE, nr, type) + +#define LOGPROBE_DELPOINT PROBE_IOW(0, unsigned long) +#define LOGPROBE_ADDWATCH PROBE_IOW(1, struct user_logdev_probe_watch) +#define LOGPROBE_ADDFUNC PROBE_IOW(2, struct user_logdev_probe_func) +#define LOGPROBE_ADDCURR PROBE_IOW(3, struct user_logdev_probe_curr) +#define LOGPROBE_ADDVAR PROBE_IOW(4, struct user_logdev_probe_var) + +#define LOGPROBE_ID_WATCH 0x56570001 +#define LOGPROBE_ID_FUNC 0x56570002 +#define LOGPROBE_ID_CURR 0x56570003 +#define LOGPROBE_ID_VAR 0x56570004 + +#define LOGPROBE_TYPE_PREEMPT 0x1111 + +struct logdev_probe_hdr_log { + unsigned long long t; + char comm[TASK_COMM_LEN]; + int pid; + unsigned long addr; + unsigned long offset; + int func_symbol_size; +} __attribute__((packed)); + +struct logdev_probe_watch_log { + struct logdev_probe_hdr_log hdr; + unsigned long var; + unsigned long value; + int var_symbol_size; + char symbols[0]; +} __attribute__((packed)); + +struct logdev_probe_func_log { + struct logdev_probe_hdr_log hdr; + int str_size; + char symbols[0]; +} __attribute__((packed)); + +struct logdev_probe_curr_log { + struct logdev_probe_hdr_log hdr; + int index; + unsigned long value; + int prefix_size; + char symbols[0]; +} __attribute__((packed)); + +struct logdev_probe_var_log { + struct logdev_probe_hdr_log hdr; + int type; + unsigned long value; + int prefix_size; + char symbols[0]; +} __attribute__((packed)); + +struct user_logdev_probe_watch { + unsigned long addr; + unsigned long watch; + const char *func; + const char *var; + int func_size; + int var_size; +}; + +struct user_logdev_probe_func { + const char *func; + const char *fmt1; + const char *fmt2; + int func_size; + int fmt1_size; + int fmt2_size; +}; + +struct user_logdev_probe_curr { + unsigned long addr; + const char *func; + const char *prefix; + int offset; + int func_size; + int prefix_size; +}; + +struct user_logdev_probe_var { + unsigned long addr; + const char *func; + const char *prefix; + int type; + int func_size; + int prefix_size; +}; + +/* ---------------- end of user space cut ---------------- */ + +struct logdev_probe_hdr { + unsigned long addr; + unsigned long offset; + int func_symbol_size; + char func_symbol[KSYM_NAME_LEN+1]; +}; + +struct logdev_probe_watch { + struct logdev_probe_hdr hdr; + struct kprobe kp; + unsigned long var; + int var_symbol_size; + char var_symbol[KSYM_NAME_LEN+1]; +}; + +struct logdev_probe_func { + struct logdev_probe_hdr hdr; + struct jprobe jp; + const char *fmt1; + const char *fmt2; +}; + +struct logdev_probe_curr { + struct logdev_probe_hdr hdr; + struct kprobe kp; + int index; + char *prefix; + int prefix_size; +}; + +struct logdev_probe_var { + struct logdev_probe_hdr hdr; + struct kprobe kp; + int type; + char *prefix; + int prefix_size; +}; + +struct logdev_probe { + struct list_head list; + unsigned long id; + enum logprobe_type type; + union { + struct logdev_probe_watch watch; + struct logdev_probe_func func; + struct logdev_probe_curr curr; + struct logdev_probe_var var; + }; +}; + +static LIST_HEAD(logdev_probes); + +static void logdev_print_time_cpu(unsigned long long t, int cpu) +{ + unsigned long usec_rem; + unsigned long secs; + + usec_rem = do_div(t, 1000000000)/1000; + secs = (unsigned long)t; + + printk("[%5lu.%06lu] cpu:%d ", + secs, usec_rem, cpu); +} + +/* + * This is called under a lock, so we can use a static buffer. + */ +static char probe_buf[PAGE_SIZE]; + +/* ------------------- cut here for user space print -------------- */ + +/* "s/printk/printf" */ + +static const char *logdev_print_symbol(const char *sym, int size) +{ + /* we don't trust not having a null char */ + while (size) { + int sz = size; + if (sz > PAGE_SIZE-1) + sz = PAGE_SIZE-1; + memcpy(probe_buf, sym, sz); + probe_buf[sz] = 0; + printk("%s", probe_buf); + sym += sz; + size -= sz; + } + + return sym; +} + +static const char * logdev_print_hdr(int cpu, + const char *sym, + struct logdev_probe_hdr_log *hdr) +{ + logdev_print_time_cpu(hdr->t, cpu); + + memcpy(probe_buf, hdr->comm, TASK_COMM_LEN); + probe_buf[TASK_COMM_LEN] = 0; + printk("%s:%d ", probe_buf, hdr->pid); + + printk("func: "); + sym = logdev_print_symbol(sym, hdr->func_symbol_size); + printk(" (%p) ", (void*)hdr->addr); + + return sym; +} + +static void logdev_probe_watch_print(int cpu, int size, + struct logdev_probe_watch_log *lp) +{ + const char *sym; + + if (size < sizeof(*lp) || + size != sizeof(*lp) + lp->var_symbol_size + + lp->hdr.func_symbol_size) { + printk("CPU[%d] Bad probe var record!\n", + cpu); + return; + } + + sym = logdev_print_hdr(cpu, lp->symbols, &lp->hdr); + + if (lp->var) { + printk("var: "); + sym = logdev_print_symbol(sym, lp->var_symbol_size); + printk(" (%p) = %08lx", + (void*)lp->var, + lp->value); + } + printk("\n"); + +} + +static void logdev_probe_func_print(int cpu, int size, + struct logdev_probe_func_log *lp) +{ + const char *sym; + + if (size < sizeof(*lp) || + size != sizeof(*lp) + lp->hdr.func_symbol_size + + lp->str_size) { + printk("CPU[%d] Bad probe func record!\n", + cpu); + return; + } + + sym = logdev_print_hdr(cpu, lp->symbols, &lp->hdr); + + if (lp->str_size) + logdev_print_symbol(sym, lp->str_size); + + printk("\n"); + +} + +static void logdev_probe_curr_print(int cpu, int size, + struct logdev_probe_curr_log *lp) +{ + const char *sym; + + if (size < sizeof(*lp) || + size != sizeof(*lp) + lp->hdr.func_symbol_size + + lp->prefix_size) { + printk("CPU[%d] Bad probe curr record!\n", + cpu); + return; + } + + sym = logdev_print_hdr(cpu, lp->symbols, &lp->hdr); + + if (lp->prefix_size) + logdev_print_symbol(sym, lp->prefix_size); + + printk(" index:%d = %p", lp->index, (void*)lp->value); + + printk("\n"); + +} + +static void logdev_probe_var_print(int cpu, int size, + struct logdev_probe_var_log *lp) +{ + const char *sym; + + if (size < sizeof(*lp) || + size != sizeof(*lp) + lp->hdr.func_symbol_size + + lp->prefix_size) { + printk("CPU[%d] Bad probe curr record!\n", + cpu); + return; + } + + sym = logdev_print_hdr(cpu, lp->symbols, &lp->hdr); + + switch (lp->type) { + case LOGPROBE_TYPE_PREEMPT: + printk(" preempt_count:"); + break; + } + + if (lp->prefix_size) + logdev_print_symbol(sym, lp->prefix_size); + + printk("0x%lx", lp->value); + + printk("\n"); + +} + +static void logdev_probe_callback(struct logdev_header *hdr, + struct logdev_custom *custom, + int cpu, + void *rec) +{ + switch (custom->id) { + case LOGPROBE_ID_WATCH: + logdev_probe_watch_print(cpu, hdr->size, rec); + break; + case LOGPROBE_ID_FUNC: + logdev_probe_func_print(cpu, hdr->size, rec); + break; + case LOGPROBE_ID_CURR: + logdev_probe_curr_print(cpu, hdr->size, rec); + break; + case LOGPROBE_ID_VAR: + logdev_probe_var_print(cpu, hdr->size, rec); + break; + default: + printk("Unknown probe callback id %x\n", + custom->id); + break; + } +} +/* ------------------ end cut for user space printing ------------------- */ + + +/************************ Kprobes ******************************/ + +static void __kprobes logprobe_hdr(struct logdev_probe_hdr_log *lp, + struct logdev_probe_hdr *p) +{ + lp->t = sched_clock(); + memcpy(lp->comm, current->comm, TASK_COMM_LEN); + lp->pid = current->pid; + lp->addr = p->addr; + lp->offset = p->offset; + lp->func_symbol_size = p->func_symbol_size; +} + +static int __kprobes logprobe_watch(struct kprobe *kp, struct pt_regs *regs) +{ + struct logdev_probe_watch *p = + container_of(kp, struct logdev_probe_watch, kp); + struct logdev_probe_watch_log lp; + int total_sz = sizeof(lp) + p->var_symbol_size + + p->hdr.func_symbol_size; + + rcu_read_lock(); + + logprobe_hdr(&lp.hdr, &p->hdr); + + lp.var = p->var; + if (p->var) + lp.value = *(unsigned long*)(p->var); + lp.var_symbol_size = p->var_symbol_size; + logdev_record(LOGPROBE_ID_WATCH, total_sz, + &lp, sizeof(lp), + p->hdr.func_symbol, p->hdr.func_symbol_size, + p->var_symbol, p->var_symbol_size, + NULL); + + rcu_read_unlock(); + + return 0; +} + + +static long __kprobes logprobe_func(unsigned long param, ...) +{ + struct jprobe *jp; + struct logdev_probe_func *p; + struct logdev_probe_func_log lp; + va_list ap; + int total_sz; + unsigned long flags; + int i = -1; + + if (!logdev_probe_buffer) + jprobe_return(); + + jp = __get_cpu_var(jprobe); + if (!jp) + jprobe_return(); + + p = container_of(jp, struct logdev_probe_func, jp); + + /* find probe */ + rcu_read_lock(); + + logprobe_hdr(&lp.hdr, &p->hdr); + + spin_lock_irqsave(&probe_buffer_lock, flags); + if (p->fmt1) { + i = snprintf(logdev_probe_buffer, logdev_probe_buffer_sz, + p->fmt1, param); + + if (p->fmt2 && i < logdev_probe_buffer_sz) { + va_start(ap, param); + i += vsnprintf(logdev_probe_buffer + i, + logdev_probe_buffer_sz - i, + p->fmt2, ap); + va_end(ap); + } + if (i > logdev_probe_buffer_sz) + i = logdev_probe_buffer_sz; + + logdev_probe_buffer[i] = 0; + } + + lp.str_size = i+1; + + total_sz = sizeof(lp) + lp.hdr.func_symbol_size + lp.str_size; + + logdev_record(LOGPROBE_ID_FUNC, total_sz, + &lp, sizeof(lp), + p->hdr.func_symbol, p->hdr.func_symbol_size, + logdev_probe_buffer, lp.str_size, + NULL); + spin_unlock_irqrestore(&probe_buffer_lock, flags); + + rcu_read_unlock(); + + jprobe_return(); + /* NOT REACHED */ + return 0; +} + +/* + * On some archs (i386!!!!) the parameters are sometimes put into + * regs, and sometimes on the stack. And some of these archs will + * always put the parameters on the stack if the function happens + * to include variable args (...). This means that the above + * logprobe_func can't track this. So we have the following helper + * functions to load 0 to 6 args. Any more, you are on your own. + * If the variable args don't work, then tough. ;-) + */ +static long logprobe_func_0(void) +{ + logprobe_func(0); + return 0; +} + +static long logprobe_func_1(unsigned long p1) +{ + logprobe_func(p1); + return 0; +} + +static long logprobe_func_2(unsigned long p1, unsigned long p2) +{ + logprobe_func(p1, p2); + return 0; +} + +static long logprobe_func_3(unsigned long p1, unsigned long p2, + unsigned long p3) +{ + logprobe_func(p1, p2, p3); + return 0; +} + +static long logprobe_func_4(unsigned long p1, unsigned long p2, + unsigned long p3, unsigned long p4) +{ + logprobe_func(p1, p2, p3, p4); + return 0; +} + +static long logprobe_func_5(unsigned long p1, unsigned long p2, + unsigned long p3, unsigned long p4, + unsigned long p5) +{ + logprobe_func(p1, p2, p3, p4, p5); + return 0; +} + +static long logprobe_func_6(unsigned long p1, unsigned long p2, + unsigned long p3, unsigned long p4, + unsigned long p5, unsigned long p6) +{ + logprobe_func(p1, p2, p3, p4, p5, p6); + return 0; +} + +static int __kprobes logprobe_curr(struct kprobe *kp, struct pt_regs *regs) +{ + struct logdev_probe_curr *p = + container_of(kp, struct logdev_probe_curr, kp); + struct logdev_probe_curr_log lp; + int total_sz = sizeof(lp) + p->prefix_size + + p->hdr.func_symbol_size; + unsigned long *ptr; + + rcu_read_lock(); + + logprobe_hdr(&lp.hdr, &p->hdr); + + lp.index = p->index; + + ptr = (unsigned long*)((char*)current + p->index); + lp.value = *ptr; + + lp.prefix_size = p->prefix_size; + + logdev_record(LOGPROBE_ID_CURR, total_sz, + &lp, sizeof(lp), + p->hdr.func_symbol, p->hdr.func_symbol_size, + p->prefix, p->prefix_size, + NULL); + + rcu_read_unlock(); + + return 0; +} + +static int __kprobes logprobe_var(struct kprobe *kp, struct pt_regs *regs) +{ + struct logdev_probe_var *p = + container_of(kp, struct logdev_probe_var, kp); + struct logdev_probe_var_log lp; + int total_sz = sizeof(lp) + p->prefix_size + + p->hdr.func_symbol_size; + unsigned long pc = preempt_count(); + + rcu_read_lock(); + + logprobe_hdr(&lp.hdr, &p->hdr); + + lp.type = p->type; + + switch(lp.type) { + case LOGPROBE_TYPE_PREEMPT: + lp.value = pc; + default: + lp.value = 0; + } + + lp.prefix_size = p->prefix_size; + + logdev_record(LOGPROBE_ID_VAR, total_sz, + &lp, sizeof(lp), + p->hdr.func_symbol, p->hdr.func_symbol_size, + p->prefix, p->prefix_size, + NULL); + + rcu_read_unlock(); + + return 0; +} + +/*********************** end Kprobes **************************/ + +static int __kprobes logdev_probe_update_hdr(struct logdev_probe_hdr *p) +{ + const char *sym; + unsigned long size; + char *modname; + + if (p->addr) { + DPRINTK("%s:%d p->addr = %lx\n", + __FUNCTION__, __LINE__, p->addr); + sym = kallsyms_lookup(p->addr, &size, &p->offset, &modname, + p->func_symbol); + DPRINTK("%s:%d sym = %p (%s)\n", + __FUNCTION__, __LINE__, sym, + sym ? : "nil"); + if (sym) + p->func_symbol_size = strlen(sym); + } else { + DPRINTK("%s:%d p->func_symbol = (%s)\n", + __FUNCTION__, __LINE__, + p->func_symbol); + size = strlen(p->func_symbol); + p->func_symbol_size = size; + DPRINTK("%s:%d size = %ld\n", + __FUNCTION__, __LINE__, size); + p->addr = kallsyms_lookup_name(p->func_symbol); + if (!p->addr) { + DPRINTK("func name %s not found", p->func_symbol); + return -EINVAL; + } + } + + return 0; +} + +static int __kprobes logdev_probe_add_watch(struct logdev_probe *lh) +{ + struct logdev_probe_watch *p = &lh->watch; + const char *sym; + char *modname; + unsigned long size; + unsigned long offset; + unsigned long watch = p->var; + int ret; + + lh->type = LOGPROBE_BP; + + DPRINTK("set up watch\n"); + /* can only have one or the other */ + if (p->hdr.func_symbol[0] && p->hdr.addr) + return -EINVAL; + if (p->var_symbol[0] && watch) + return -EINVAL; + + DPRINTK("passed first test\n"); + /* TBD: check upper limit ?? */ + if (!p->hdr.func_symbol[0] && p->hdr.addr < PAGE_OFFSET) + return -EINVAL; + + DPRINTK("passed second test\n"); + /* + * kallsyms requires no scheduling. + */ + preempt_disable(); + ret = logdev_probe_update_hdr(&p->hdr); + if (ret) { + preempt_enable(); + return ret; + } + + if (watch) { + sym = kallsyms_lookup(watch, &size, &offset, &modname, + p->var_symbol); + DPRINTK("%s:%d sym = %p (%s)\n", + __FUNCTION__, __LINE__, sym, + sym ? : "nil"); + if (sym) + p->var_symbol_size = strlen(sym); + } else if (p->var_symbol[0]) { + size = strlen(p->var_symbol); + DPRINTK("%s:%d var = (%s)\n", + __FUNCTION__, __LINE__, + p->var_symbol); + p->var_symbol_size = size; + watch = kallsyms_lookup_name(p->var_symbol); + if (!watch) { + DPRINTK("var name %s not found", p->var_symbol); + preempt_enable(); + return -EINVAL; + } + } + preempt_enable(); + + p->var = watch; + p->kp.pre_handler = logprobe_watch; + p->kp.addr = (kprobe_opcode_t *)p->hdr.addr; + + ret = register_kprobe(&p->kp); + if (ret < 0) + return ret; + + lh->id = atomic_inc_return(&logdev_probe_next_id); + + return lh->id; +} + +/* Hijack the jprobes registering! */ + +static int __kprobes logdev_setjmp_pre_handler(struct kprobe *p, + struct pt_regs *regs) +{ + __get_cpu_var(jprobe) = container_of(p, struct jprobe, kp); + return setjmp_pre_handler(p, regs); +} + +static int __kprobes logdev_longjmp_break_handler(struct kprobe *p, + struct pt_regs *regs) +{ + int ret; + ret = longjmp_break_handler(p, regs); + __get_cpu_var(jprobe) = NULL; + return ret; +} + +static int logdev_register_jprobe(struct jprobe *jp) +{ + jp->kp.pre_handler = logdev_setjmp_pre_handler; + jp->kp.break_handler = logdev_longjmp_break_handler; + + return register_kprobe(&jp->kp); +} + +static int __kprobes logdev_probe_add_func(struct logdev_probe *lh, + const char *fmt1, + const char *fmt2, + int cnt) +{ + struct logdev_probe_func *p = &lh->func; + int ret; + + lh->type = LOGPROBE_FUNC; + + /* + * kallsyms requires no scheduling. + */ + preempt_disable(); + + ret = logdev_probe_update_hdr(&p->hdr); + preempt_enable(); + + if (ret) + return ret; + + p->fmt1 = fmt1; + p->fmt2 = fmt2; + switch (cnt) { + case 0: + p->jp.entry = (kprobe_opcode_t *)logprobe_func_0; + break; + case 1: + p->jp.entry = (kprobe_opcode_t *)logprobe_func_1; + break; + case 2: + p->jp.entry = (kprobe_opcode_t *)logprobe_func_2; + break; + case 3: + p->jp.entry = (kprobe_opcode_t *)logprobe_func_3; + break; + case 4: + p->jp.entry = (kprobe_opcode_t *)logprobe_func_4; + break; + case 5: + p->jp.entry = (kprobe_opcode_t *)logprobe_func_5; + break; + case 6: + p->jp.entry = (kprobe_opcode_t *)logprobe_func_6; + break; + default: + p->jp.entry = (kprobe_opcode_t *)logprobe_func; + break; + } + p->jp.kp.addr = (kprobe_opcode_t *)p->hdr.addr; + + ret = logdev_register_jprobe(&p->jp); + if (ret < 0) + return ret; + + lh->id = atomic_inc_return(&logdev_probe_next_id); + + return lh->id; +} + +static int __kprobes logdev_probe_add_curr(struct logdev_probe *lh) +{ + struct logdev_probe_curr *p = &lh->curr; + int ret; + + lh->type = LOGPROBE_CURR; + + /* + * kallsyms requires no scheduling. + */ + preempt_disable(); + ret = logdev_probe_update_hdr(&p->hdr); + preempt_enable(); + if (ret) + return ret; + + p->kp.pre_handler = logprobe_curr; + p->kp.addr = (kprobe_opcode_t *)p->hdr.addr; + + ret = register_kprobe(&p->kp); + if (ret < 0) + return ret; + + lh->id = atomic_inc_return(&logdev_probe_next_id); + + return lh->id; +} + +static int __kprobes logdev_probe_add_var(struct logdev_probe *lh) +{ + struct logdev_probe_var *p = &lh->var; + int ret; + + lh->type = LOGPROBE_VAR; + + /* + * kallsyms requires no scheduling. + */ + preempt_disable(); + ret = logdev_probe_update_hdr(&p->hdr); + preempt_enable(); + if (ret) + return ret; + + p->kp.pre_handler = logprobe_var; + p->kp.addr = (kprobe_opcode_t *)p->hdr.addr; + + ret = register_kprobe(&p->kp); + if (ret < 0) + return ret; + + lh->id = atomic_inc_return(&logdev_probe_next_id); + + return lh->id; +} + +/************************ User Land ******************************/ + +static int logdev_setup_probe_watch(unsigned long __user *arg) +{ + struct user_logdev_probe_watch *uprobe; + struct logdev_probe *lh = NULL; + struct logdev_probe_watch *probe; + int ret; + + uprobe = kmalloc(sizeof(*uprobe), GFP_KERNEL); + + if (!uprobe) + return -ENOMEM; + + ret = -EFAULT; + if(copy_from_user(uprobe, arg, sizeof(*uprobe))) + goto fail; + + ret = -ENOMEM; + lh = kzalloc(sizeof(*lh), GFP_KERNEL); + if (!lh) + goto fail; + + probe = &lh->watch; + + ret = -EFAULT; + if (uprobe->func) { + if (uprobe->func_size > KSYM_NAME_LEN) + uprobe->func_size = KSYM_NAME_LEN; + if (copy_from_user(probe->hdr.func_symbol, uprobe->func, + uprobe->func_size)) + goto fail; + } + if (uprobe->var) { + if (uprobe->var_size > KSYM_NAME_LEN) + uprobe->var_size = KSYM_NAME_LEN; + if (copy_from_user(probe->var_symbol, uprobe->var, + uprobe->var_size)) + goto fail; + } + probe->hdr.addr = uprobe->addr; + probe->var = uprobe->watch; + + ret = logdev_probe_add_watch(lh); + if (ret < 0) + goto fail; + + mutex_lock(&probe_list_lock); + list_add_tail_rcu(&lh->list, &logdev_probes); + mutex_unlock(&probe_list_lock); + + return ret; + + fail: + if (uprobe) + kfree(uprobe); + if (lh) + kfree(lh); + return ret; +} + +static int find_count(const char *fmt) +{ + int cnt = 0; + + for (; *fmt; fmt++) { + if (*fmt == '%') { + if (fmt[1] == '%') + fmt += 2; + else + cnt++; + } + } + return cnt; +} + +static int logdev_setup_probe_func(unsigned long __user *arg) +{ + struct user_logdev_probe_func *uprobe; + struct logdev_probe *lh = NULL; + struct logdev_probe_func *probe; + char *fmt1 = NULL; + char *fmt2 = NULL; + int cnt = 0; + int ret; + + uprobe = kmalloc(sizeof(*uprobe), GFP_KERNEL); + + if (!uprobe) + return -ENOMEM; + + ret = -EFAULT; + if(copy_from_user(uprobe, arg, sizeof(*uprobe))) + goto fail; + + ret = -EINVAL; + if (!uprobe->func || !uprobe->func_size) + goto fail; + + ret = -ENOMEM; + lh = kzalloc(sizeof(*lh), GFP_KERNEL); + if (!lh) + goto fail; + + probe = &lh->func; + + ret = -EFAULT; + if (uprobe->func_size > KSYM_NAME_LEN) + uprobe->func_size = KSYM_NAME_LEN; + if (copy_from_user(probe->hdr.func_symbol, uprobe->func, + uprobe->func_size)) + goto fail; + + if (uprobe->fmt1 && uprobe->fmt1_size) { + ret = -ENOMEM; + fmt1 = kmalloc(uprobe->fmt1_size+1, GFP_KERNEL); + if (!fmt1) + goto fail; + ret = -EFAULT; + if (copy_from_user(fmt1, uprobe->fmt1, + uprobe->fmt1_size)) + goto fail; + fmt1[uprobe->fmt1_size] = 0; + + cnt = find_count(fmt1); + + if (uprobe->fmt2 && uprobe->fmt2_size) { + ret = -ENOMEM; + fmt2 = kmalloc(uprobe->fmt2_size+1, GFP_KERNEL); + if (!fmt2) + goto fail; + ret = -EFAULT; + if (copy_from_user(fmt2, uprobe->fmt2, + uprobe->fmt2_size)) + goto fail; + fmt2[uprobe->fmt2_size] = 0; + cnt += find_count(fmt2); + } + } + + ret = logdev_probe_add_func(lh, fmt1, fmt2, cnt); + if (ret < 0) + goto fail; + + mutex_lock(&probe_list_lock); + list_add_tail_rcu(&lh->list, &logdev_probes); + mutex_unlock(&probe_list_lock); + + return ret; + + fail: + DPRINTK("%s FAILED\n",__FUNCTION__); + if (fmt1) + kfree(fmt1); + if (fmt2) + kfree(fmt2); + if (uprobe) + kfree(uprobe); + if (lh) + kfree(lh); + return ret; +} + +static int logdev_setup_probe_curr(unsigned long __user *arg) +{ + struct user_logdev_probe_curr *uprobe; + struct logdev_probe *lh = NULL; + struct logdev_probe_curr *probe; + int ret; + + uprobe = kmalloc(sizeof(*uprobe), GFP_KERNEL); + + if (!uprobe) + return -ENOMEM; + + ret = -EFAULT; + if(copy_from_user(uprobe, arg, sizeof(*uprobe))) + goto fail; + + ret = -ENOMEM; + lh = kzalloc(sizeof(*lh), GFP_KERNEL); + if (!lh) + goto fail; + + probe = &lh->curr; + + ret = -EFAULT; + if (uprobe->func) { + if (uprobe->func_size > KSYM_NAME_LEN) + uprobe->func_size = KSYM_NAME_LEN; + if (copy_from_user(probe->hdr.func_symbol, uprobe->func, + uprobe->func_size)) + goto fail; + } + /* yes this can be negative! */ + probe->index = uprobe->offset; + + if (uprobe->prefix && uprobe->prefix_size > 0) { + probe->prefix = kmalloc(uprobe->prefix_size + 1, GFP_KERNEL); + if (copy_from_user(probe->prefix, uprobe->prefix, + uprobe->prefix_size)) + goto fail; + probe->prefix[uprobe->prefix_size] = 0; + probe->prefix_size = uprobe->prefix_size + 1; + } + probe->hdr.addr = uprobe->addr; + + ret = logdev_probe_add_curr(lh); + if (ret < 0) + goto fail; + + mutex_lock(&probe_list_lock); + list_add_tail_rcu(&lh->list, &logdev_probes); + mutex_unlock(&probe_list_lock); + + return ret; + + fail: + if (uprobe) + kfree(uprobe); + if (lh) + kfree(lh); + return ret; +} + +static int logdev_setup_probe_var(unsigned long __user *arg) +{ + struct user_logdev_probe_var *uprobe; + struct logdev_probe *lh = NULL; + struct logdev_probe_var *probe; + int ret; + + uprobe = kmalloc(sizeof(*uprobe), GFP_KERNEL); + + if (!uprobe) + return -ENOMEM; + + ret = -EFAULT; + if(copy_from_user(uprobe, arg, sizeof(*uprobe))) + goto fail; + + ret = -ENOMEM; + lh = kzalloc(sizeof(*lh), GFP_KERNEL); + if (!lh) + goto fail; + + probe = &lh->var; + + probe->type = uprobe->type; + switch(probe->type) { + case LOGPROBE_TYPE_PREEMPT: + break; + default: + ret = -EINVAL; + goto fail; + } + + ret = -EFAULT; + if (uprobe->func) { + if (uprobe->func_size > KSYM_NAME_LEN) + uprobe->func_size = KSYM_NAME_LEN; + if (copy_from_user(probe->hdr.func_symbol, uprobe->func, + uprobe->func_size)) + goto fail; + } + + if (uprobe->prefix && uprobe->prefix_size > 0) { + probe->prefix = kmalloc(uprobe->prefix_size + 1, GFP_KERNEL); + if (copy_from_user(probe->prefix, uprobe->prefix, + uprobe->prefix_size)) + goto fail; + probe->prefix[uprobe->prefix_size] = 0; + probe->prefix_size = uprobe->prefix_size + 1; + } + probe->hdr.addr = uprobe->addr; + + ret = logdev_probe_add_var(lh); + if (ret < 0) + goto fail; + + mutex_lock(&probe_list_lock); + list_add_tail_rcu(&lh->list, &logdev_probes); + mutex_unlock(&probe_list_lock); + + return ret; + + fail: + if (uprobe) + kfree(uprobe); + if (lh) + kfree(lh); + return ret; +} + +static int logdev_probe_ioctl(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg) +{ + int ret = 0; + + switch (cmd) { + case LOGPROBE_ADDWATCH: + + ret = logdev_setup_probe_watch((unsigned long __user *)arg); + + break; + + case LOGPROBE_ADDFUNC: + + ret = logdev_setup_probe_func((unsigned long __user *)arg); + + break; + + case LOGPROBE_ADDCURR: + + ret = logdev_setup_probe_curr((unsigned long __user *)arg); + + break; + + case LOGPROBE_ADDVAR: + + ret = logdev_setup_probe_var((unsigned long __user *)arg); + + break; + + case LOGPROBE_DELPOINT: + { + struct logdev_probe *lp; + unsigned long id = arg; + + mutex_lock(&probe_list_lock); + list_for_each_entry_rcu(lp, &logdev_probes, list) + if (lp->id == id) + break; + if (&lp->list != &logdev_probes) + list_del_rcu(&lp->list); + else + lp = NULL; + mutex_unlock(&probe_list_lock); + + if (!lp) + return -EINVAL; + + switch (lp->type) { + case LOGPROBE_BP: + unregister_kprobe(&lp->watch.kp); + synchronize_rcu(); + break; + case LOGPROBE_FUNC: + unregister_jprobe(&lp->func.jp); + synchronize_rcu(); + + if (lp->func.fmt1) + kfree(lp->func.fmt1); + if (lp->func.fmt2) + kfree(lp->func.fmt2); + break; + case LOGPROBE_CURR: + unregister_kprobe(&lp->curr.kp); + synchronize_rcu(); + if (lp->curr.prefix) + kfree(lp->curr.prefix); + break; + case LOGPROBE_VAR: + unregister_kprobe(&lp->var.kp); + synchronize_rcu(); + if (lp->var.prefix) + kfree(lp->var.prefix); + break; + default: + printk(KERN_WARNING "unregistering logdev probe of" + "unknown type %d\n", + lp->type); + /* + * Don't even free this. We don't know where it + * is registered, and this is most certainly a bug! + */ + return -EINVAL; + } + + kfree(lp); + + break; + } + default: + ret = -ENOTTY; + } + + return ret; +} + + +/******************* List kprobe entries *****************/ + +static void __kprobes *s_next(struct seq_file *m, void *v, loff_t *pos) +{ + struct logdev_probe *p = NULL; + int l = 0; + + list_for_each_entry(p, &logdev_probes, list) { + if (l++ >= *pos) + break; + } + + (*pos)++; + + if (&p->list == &logdev_probes) + return NULL; + + return p; +} + +static void __kprobes *s_start(struct seq_file *m, loff_t *pos) + __acquires(logdev_dev.lock) +{ + struct logdev_probe *p = NULL; + loff_t l = 0; + + rcu_read_lock(); + + list_for_each_entry(p, &logdev_probes, list) { + if (l++ >= *pos) + break; + } + + if (&p->list == &logdev_probes) + return NULL; + + (*pos)++; + + return p; +} + +static void __kprobes s_stop(struct seq_file *m, void *p) + __releases(logdev_dev.lock) +{ + rcu_read_unlock(); +} + +static void hdr_show(struct seq_file *m, struct logdev_probe_hdr *p) +{ + seq_printf(m,"%s : %p\n", + p->func_symbol, + (void*)p->addr); +} + +static int __kprobes s_show(struct seq_file *m, void *v) +{ + struct logdev_probe *lh = v; + struct logdev_probe_watch *watch = &lh->watch; + struct logdev_probe_func *func = &lh->func; + struct logdev_probe_curr *curr = &lh->curr; + struct logdev_probe_var *var = &lh->var; + + seq_printf(m, "%ld:\t", lh->id); + switch (lh->type) { + case LOGPROBE_BP: + hdr_show(m, &watch->hdr); + if (watch->var) + seq_printf(m,"\t%s : %p\n", + watch->var_symbol, + (void*)watch->var); + break; + case LOGPROBE_FUNC: + hdr_show(m, &func->hdr); + if (func->fmt1) + seq_printf(m,"\tfmt1: %s\n", + func->fmt1); + if (func->fmt2) + seq_printf(m,"\tfmt2: %s\n", + func->fmt2); + break; + + case LOGPROBE_CURR: + hdr_show(m, &curr->hdr); + seq_printf(m,"\tindex: %d (0x%x)\n", + curr->index, curr->index); + if (curr->prefix) + seq_printf(m,"\tprefix: %s\n", + curr->prefix); + break; + + case LOGPROBE_VAR: + hdr_show(m, &var->hdr); + switch (var->type) { + case LOGPROBE_TYPE_PREEMPT: + seq_printf(m,"\tpreempt_count\n"); + break; + default: + seq_printf(m,"\tunknown type\n"); + } + if (curr->prefix) + seq_printf(m,"\tprefix: %s\n", + curr->prefix); + break; + + default: + seq_printf(m,"tunknown type %d\n", + lh->type); + } + return 0; +} + +static struct seq_operations logdev_seq_op = { + .start = s_start, + .next = s_next, + .stop = s_stop, + .show = s_show, +}; + +/******************* end list kprobes *****************/ + +static int logdev_probe_open (struct inode *inode, struct file *filp) +{ + int ret; + + ret = seq_open(filp, &logdev_seq_op); + if (!ret) { + struct seq_file *m = filp->private_data; + m->private = inode->i_private; + } + + return ret; +} + + +static struct file_operations logdev_probe_fops = { + .read = seq_read, + .ioctl = logdev_probe_ioctl, + .open = logdev_probe_open, + .llseek = seq_lseek, + .release = seq_release, +}; + +/************************ End User Land ******************************/ + + + +static int __init logdev_probe_init(void) +{ + debugfs_create_file("probe", 0600, logdev_d, + NULL, &logdev_probe_fops); + + logdev_register_callback(LOGPROBE_ID_WATCH, logdev_probe_callback); + logdev_register_callback(LOGPROBE_ID_FUNC, logdev_probe_callback); + logdev_register_callback(LOGPROBE_ID_CURR, logdev_probe_callback); + logdev_register_callback(LOGPROBE_ID_VAR, logdev_probe_callback); + + /* just some decent number */ + logdev_probe_buffer_sz = 256; + logdev_probe_buffer = kmalloc(logdev_probe_buffer_sz, GFP_KERNEL); + + /* If we fail, really no harm done */ + if (!logdev_probe_buffer) { + logdev_probe_buffer_sz = 0; + printk("logdev_probe: Warning, couldn't allocate func buffer\n"); + } + + return 0; +} + +module_init(logdev_probe_init); Index: linux-trace.git/kernel/logdev/logdev_relay.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-trace.git/kernel/logdev/logdev_relay.c 2009-04-28 12:02:07.000000000 -0400 @@ -0,0 +1,175 @@ +/* + * logdev_relay.c + * + * Copyright (C) 2006 Tom Zanussi (zanussi@us.ibm.com), IBM Corp + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License (not later!) + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "logdev_priv.h" + +static int pages = CONFIG_LOGDEV_PAGES; + +/* + * Relay structures + */ +struct logdev_dev_priv { + int cpu; + struct rchan_buf *buf; + loff_t read_pos; +}; +DEFINE_PER_CPU(struct logdev_dev_priv, logdev_dev_priv); +#define get_logdev_priv(cpu) &per_cpu(logdev_dev_priv, cpu) + +static struct rchan *logdev_chan; + +DEFINE_PER_CPU(struct logdev_dev_priv, logdev_dev_priv); +#define get_logdev_priv(cpu) &per_cpu(logdev_dev_priv, cpu) + +int logdev_copy_from_dev(struct logdev_dev *ldev, void *buffer, int size) +{ + struct logdev_dev_priv *dev = ldev->priv; + struct rchan_buf *buf = dev->buf; + + return relay_kernel_read(buffer, size, &dev->read_pos, buf); +} + +int logdev_copy_to_dev(struct logdev_dev *ldev, const void *dat, + int size) +{ + struct logdev_dev_priv *dev = ldev->priv; + struct rchan_buf *buf = dev->buf; + + relay_write(buf->chan, dat, size); + + return size; +} + +static int logdev_subbuf_start(struct rchan_buf *buf, + void *subbuf, + void *prev_subbuf, + unsigned int prev_padding) +{ + return 1; +} + + +/* + * file_create() callback. Creates relay file in debugfs. + */ +static struct dentry *create_buf_file_handler(const char *filename, + struct dentry *parent, + int mode, + struct rchan_buf *buf, + int *is_global) +{ + struct dentry *buf_file; + + buf_file = debugfs_create_file(filename, mode, parent, buf, + &relay_file_operations); + + return buf_file; +} + +/* + * file_remove() default callback. Removes relay file in debugfs. + */ +static int remove_buf_file_handler(struct dentry *dentry) +{ + debugfs_remove(dentry); + + return 0; +} + +/* + * relay callbacks + */ +static struct rchan_callbacks logdev_relay_callbacks = +{ + .subbuf_start = logdev_subbuf_start, + .create_buf_file = create_buf_file_handler, + .remove_buf_file = remove_buf_file_handler, +}; + +int initialize_logdev(void) +{ + /* + * debugfs may not be ready when we initialize + * logdev. So postpone this later. + */ + return 0; +} + +void logdev_cleanup_priv(struct logdev_dev *ldev) +{ + if (logdev_chan) + relay_close(logdev_chan); +} + +int __init logdev_misc_init(void) +{ + struct logdev_dev *ldev; + struct logdev_dev_priv *dev; + struct rchan *chan; + int subbufs; + int cpu; + + /* round up */ + subbufs = (pages + 1) >> 1; + + chan = relay_open("dev", logdev_d, + PAGE_SIZE * 2, + subbufs, + &logdev_relay_callbacks); + if (!chan) + return -1; + + for_each_present_cpu(cpu) { + + if (cpu >= LOGDEV_CPUS) + break; + + ldev = get_logdev(cpu); + dev = get_logdev_priv(cpu); + dev->cpu = cpu; + ldev->priv = dev; + + dev->cpu = cpu; + dev->buf = chan->buf[cpu]; + ldev->init = LOGDEV_DEV_RUNNING; + } + + return 0; +} + +/* just needs to be after postcore init */ +arch_initcall(logdev_misc_init); + Index: linux-trace.git/kernel/logdev/logdev_ringbuf.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-trace.git/kernel/logdev/logdev_ringbuf.c 2009-04-28 12:02:07.000000000 -0400 @@ -0,0 +1,553 @@ +/* + * logdev_ringbuf.c + * + * Copyright (C) 2004-2006 Steven Rostedt + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License (not later!) + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "logdev_priv.h" + +/* + * If need be, we can be placed really early before memory + * is locate, so if a flag is set, use bootmem otherwise use kmalloc. + * The earliest we can call logdev_init is just after smp_prepare_boot_cpu. + */ +#undef LOGDEV_USE_BOOTMEM + +static int pages = CONFIG_LOGDEV_PAGES; + +static void *logdev_malloc(int size) +{ +#ifndef LOGDEV_USE_BOOTMEM + return kmalloc(size, GFP_KERNEL); +#else + return alloc_bootmem(sizeof(int)); +#endif +} + +static void *logdev_get_free_pages(int order) +{ +#ifndef LOGDEV_USE_BOOTMEM + return (void*)__get_free_pages(GFP_KERNEL,order); +#else + return alloc_bootmem((1<private_data; + struct logdev_dev_priv *dev; + unsigned long flags; + + if (count > PAGE_SIZE) + count = PAGE_SIZE; + + if (mutex_lock_interruptible(&user_mutex)) + return -EINTR; + + if (!dev_running(ldev)) + goto out; + dev = ldev->priv; + + if (copy_from_user(user_buffer,buf,count)) { + count = -EFAULT; + goto out; + } + + local_irq_save(flags); + __raw_spin_lock(&ldev->lock); + count = logdev_copy_to_dev(ldev, user_buffer, count); + __raw_spin_unlock(&ldev->lock); + local_irq_restore(flags); + + /* wake up those waiting for data */ + if (waitqueue_active(&dev->wait)) + wake_up_interruptible(&dev->wait); + +out: + mutex_unlock(&user_mutex); + return count; +} + +/* + * logdev_write - write like a logdev_print_time (expects buf to hold printable + * characters). + */ +static ssize_t logdev_write(struct file *filp, const char *buf, size_t count, + loff_t *f_pos) +{ + struct logdev_dev *dev; + struct logdev_header hdr; + struct logdev_print_time rs; + unsigned long flags; + int cpu; + + if (count > PAGE_SIZE) + count = PAGE_SIZE; + + if (mutex_lock_interruptible(&user_mutex)) + return -EINTR; + + preempt_disable(); + cpu = smp_processor_id(); + + dev = get_logdev(cpu); + if (!dev_running(dev)) + goto out; + + if (copy_from_user(user_buffer,buf,count)) { + count = -EFAULT; + goto out; + } + + rs.t = sched_clock(); + + hdr.counter = logdev_counter_inc(); + hdr.id = LOGDEV_PRINT_TIME; + hdr.size = sizeof(hdr) + sizeof(rs) + count; + + local_irq_save(flags); + __raw_spin_lock(&dev->lock); + + logdev_copy_to_dev(dev, &hdr, sizeof(hdr)); + logdev_copy_to_dev(dev, &rs, sizeof(rs)); + count = logdev_copy_to_dev(dev, user_buffer, count); + __raw_spin_unlock(&dev->lock); + local_irq_restore(flags); + +out: + preempt_enable(); + mutex_unlock(&user_mutex); + return count; +} + +ssize_t logdev_read(struct file *filp, char *buf, size_t count, loff_t *f_pos) +{ + struct logdev_dev *ldev = filp->private_data; + struct logdev_dev_priv *dev; + unsigned long flags; + + if (mutex_lock_interruptible(&user_mutex)) + return -EINTR; + + if (!dev_running(ldev)) + goto out_up; + + dev = ldev->priv; + + local_irq_save(flags); + __raw_spin_lock(&ldev->lock); + + if (!dev->size) { + + /* TBD - FIXME */ +#if 1 + + count = 0; + goto out; +#endif + + if (filp->f_flags & O_NONBLOCK) { + count = -EAGAIN; + goto out; + } + + do { + DECLARE_WAITQUEUE(wait,current); + current->state = TASK_INTERRUPTIBLE; + add_wait_queue(&dev->wait,&wait); + local_irq_save(flags); + __raw_spin_unlock(&ldev->lock); + schedule(); + __raw_spin_lock(&ldev->lock); + local_irq_restore(flags); + remove_wait_queue(&dev->wait,&wait); + if (dev->size) + break; + if (signal_pending(current)) { + count = -ERESTARTSYS; + goto out; + } + } while(1); + } + + if (count > PAGE_SIZE) + count = PAGE_SIZE; + + + count = logdev_copy_from_dev(ldev,user_buffer,count); + + out: + /* We can't be corrupted if we have no data */ + if (!dev->size) + dev->corrupted = 0; + __raw_spin_unlock(&ldev->lock); + local_irq_restore(flags); + + if (count > 0) { + /* Well if we fail here, we just lost the data read :-( */ + if (copy_to_user(buf,user_buffer,count)) + count = -EFAULT; + } + out_up: + mutex_unlock(&user_mutex); + + return count; +} + +static int logdev_ioctl(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg) +{ + return -ENOTTY; +} + +static int logdev_close(struct inode *inode, struct file *filp) +{ +#if 0 + int cpu = (int)filp->private_data; +#endif + return 0; +} + + +static int logdev_open (struct inode *inode, struct file *filp) +{ + filp->private_data = inode->i_private; + return 0; +} + +/** + * logdev_poll - poll file op for logdev + * @filp: the file + * @wait: poll table + * + * Poll implemention. + */ +static unsigned int +logdev_poll(struct file *filp, struct poll_table_struct *wait) +{ + struct logdev_dev *dev = (struct logdev_dev*)filp->private_data; + unsigned int mask = 0; + + (void)dev; +#if 0 + if (filp->f_mode & FMODE_READ) { + poll_wait(filp, &app->read_wait, wait); + if (!empty_channel(dev)) + mask |= POLLIN | POLLRDNORM; + } +#endif + + return mask; +} + +static struct file_operations logdev_raw_fops = { + .read = logdev_read, + .write = logdev_raw_write, + .ioctl = logdev_ioctl, + .open = logdev_open, + .release = logdev_close, + .poll = logdev_poll, + .llseek = no_llseek, +}; + +static struct file_operations logdev_fops = { + .write = logdev_write, + .ioctl = logdev_ioctl, + .open = logdev_open, + .release = logdev_close, + .llseek = no_llseek, +}; + +/************************ End User Land ******************************/ + + +/******************* entry debugging interface *****************/ + +static void __kprobes *s_next(struct seq_file *m, void *v, loff_t *pos) +{ + struct logdev_dev *ldev = m->private; + struct logdev_dev_priv *dev = ldev->priv; + int i = (int)*pos; + + (*pos)++; + + if (i >= dev->len) + return NULL; + + return &dev->entry[i]; +} + +static void __kprobes *s_start(struct seq_file *m, loff_t *pos) + __acquires(logdev_dev.lock) +{ + struct logdev_dev *dev = m->private; + void *p = NULL; + loff_t l = 0; + + /* + * A little strong? Perhaps, but we know that this is bad right + * from the start. Anyway this is for debugging purposes only, + * so it's OK, as well as the big latency we get by turning off + * intrerrupts. But we also never know who will be locking + * this. + */ + if (irqs_disabled()) + BUG(); + + local_irq_disable(); + __raw_spin_lock(&dev->lock); + for (p = (void *)1; p && l < *pos; p = s_next(m,p,&l)) + ; + + return p; +} + +static void __kprobes s_stop(struct seq_file *m, void *p) + __releases(logdev_dev.lock) +{ + struct logdev_dev *dev = m->private; + __raw_spin_unlock(&dev->lock); + local_irq_enable(); +} + +static int __kprobes s_show(struct seq_file *m, void *v) +{ + int i = (long)(v); + struct logdev_entry *entry = v; + struct logdev_dev *ldev; + struct logdev_dev_priv *dev; + + ldev = m->private; + dev = ldev->priv; + + if (i == 1) { + seq_printf(m,"Logdev:\n"); + seq_printf(m,"\tlen:\t%d\n",dev->len); + seq_printf(m,"\tsize:\t%d\n",dev->size); + seq_printf(m,"\tstart:\t%d\n",dev->start); + seq_printf(m,"\tend:\t%d\n",dev->end); + seq_printf(m,"\tcorrupted:%d\n",dev->corrupted); + seq_printf(m,"\n\tEntries:\n"); + + } else { + i = (int)((char*)entry - (char*)dev->entry) / + sizeof(struct logdev_entry); + + seq_printf(m,"\t %d:\t%8u : %8u\tsize: %lu\n", i, + entry->head, entry->tail, + LOGDEV_ENTRY_SIZE(entry)); + } + + + return 0; +} + +static struct seq_operations logdev_proc_op = { + .start = s_start, + .next = s_next, + .stop = s_stop, + .show = s_show, +}; + +static int logdev_entries_open (struct inode *inode, struct file *file) +{ + int ret; + + ret = seq_open(file, &logdev_proc_op); + if (!ret) { + struct seq_file *m = file->private_data; + m->private = inode->i_private; + } + + return ret; +} + +static struct file_operations logdev_entries_fops = { + .open = logdev_entries_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +/******************* end entry debugging interface *****************/ + +int __init initialize_logdev(void) +{ + int cpu; + + for_each_present_cpu(cpu) { + struct logdev_dev *ldev; + struct logdev_dev_priv *dev; + struct logdev_entry *entry; + int i; + + ldev = get_logdev(cpu); + dev = get_logdev_priv(cpu); + dev->cpu = cpu; + ldev->priv = dev; + + dev->len = pages; + + dev->entry = + logdev_malloc(sizeof(struct logdev_entry)*dev->len); + if (!dev->entry) + return -ENOMEM; + + memset(dev->entry,0,sizeof(*dev->entry)*dev->len); + + for (i=0,entry=dev->entry; ilen; i++,entry++) { + entry->dat = logdev_get_free_pages(0); + if (!entry->dat) + return -ENOMEM; + } + init_waitqueue_head(&dev->wait); + + ldev->init = LOGDEV_DEV_RUNNING; + } + + return 0; +} + +void __init logdev_cleanup_priv(struct logdev_dev *ldev) +{ + struct logdev_dev_priv *dev = ldev->priv; + + if (!ldev->init) + return; + + if (dev->entry) { + struct logdev_entry *entry; + int i; + + for (i=0, entry=dev->entry; ilen; i++,entry++) + if (entry->dat) { + logdev_free_page((unsigned long)entry->dat); + entry->dat = NULL; + } + dev->len = 0; + logdev_kfree(dev->entry); + dev->entry = NULL; + ldev->init = 0; + } +} + +int __init logdev_misc_init(void) +{ + struct logdev_dev *ldev; + struct logdev_dev_priv *dev; + struct dentry *entries; + int cpu; + char buf[16]; + + if (!logdev_d) + return 0; + + entries = debugfs_create_dir("entry", logdev_d); + + for_each_present_cpu(cpu) { + + if (cpu >= LOGDEV_CPUS) + break; + + ldev = get_logdev(cpu); + if (!ldev->init) + continue; + + dev = ldev->priv; + + /* + * Setup the debugfs. + */ + sprintf(buf, "dev%d", cpu); + debugfs_create_file(buf, 0444, logdev_d, + ldev, &logdev_raw_fops); + + if (entries) { + sprintf(buf,"%d",cpu); + debugfs_create_file(buf, 0444, entries, + ldev, &logdev_entries_fops); + } + } + debugfs_create_file("write", 0222, logdev_d, + NULL, &logdev_fops); + + + return 0; +} + +/* just needs to be after postcore init */ +arch_initcall(logdev_misc_init); Index: linux-trace.git/kernel/logdev/logdev_ringbuf.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-trace.git/kernel/logdev/logdev_ringbuf.h 2009-04-28 12:02:07.000000000 -0400 @@ -0,0 +1,158 @@ +#ifndef __LINUX_LOGDEV_RINGBUF_H +#define __LINUX_LOGDEV_RINGBUF_H + +/* + * Ring buffer structures + */ +struct logdev_entry { + unsigned int head; + unsigned int tail; + char *dat; +}; + +struct logdev_dev_priv { + struct logdev_entry *entry; + int cpu; + int size; + int len; + int start; + int end; + int corrupted; + wait_queue_head_t wait; +}; + +DECLARE_PER_CPU(struct logdev_dev_priv, logdev_dev_priv); +#define get_logdev_priv(cpu) &per_cpu(logdev_dev_priv, cpu) + +#define LOGDEV_ENTRY_SIZE(e) ((((e)->tail - (e)->head)) & (PAGE_SIZE-1)) +#define LOGDEV_ENTRY_FREE(e) ((PAGE_SIZE-1) - LOGDEV_ENTRY_SIZE(e)) +#define LOGDEV_ENTRY_ADD(e,x) ((e) = ((e) + x) & (PAGE_SIZE-1)) +#define LOGDEV_ENTRY_INC(e) LOGDEV_ENTRY_ADD(e,1) +#define LOGDEV_ENTRY_MAX (PAGE_SIZE-1) + +static inline int logdev_copy_from_dev(struct logdev_dev *ldev, void *buf, + int size) +{ + struct logdev_dev_priv *dev = ldev->priv; + struct logdev_entry *entry = &dev->entry[dev->start]; + int ret = 0; + + if (size < 0) { + printk("logdev_copy_from_dev: size < 0 ???\n"); + return -1; + } + if (size > dev->size) + size = dev->size; + + while (size && dev->size) { + int copy = size; + int used; + + if (!LOGDEV_ENTRY_SIZE(entry)) { + dev->start = (dev->start + 1) % dev->len; + entry = &dev->entry[dev->start]; + } + + if (copy > (used=LOGDEV_ENTRY_SIZE(entry))) + copy = used; + if (entry->head+copy > PAGE_SIZE) + copy = PAGE_SIZE - entry->head; + memcpy(buf,entry->dat+entry->head,copy); + LOGDEV_ENTRY_ADD(entry->head,copy); + buf += copy; + dev->size -= copy; + size -= copy; + ret += copy; + + } + + return ret; +} + +static inline void move_start_to_next_entry(struct logdev_dev *ldev) +{ + struct logdev_dev_priv *dev = ldev->priv; + struct logdev_entry *entry = &dev->entry[dev->start]; + int start = dev->start; + int size; + struct logdev_header hdr; + + if (unlikely(dev->corrupted)) + goto corrupted; + + while (start == dev->start) { + logdev_copy_from_dev(ldev,(char*)&hdr,sizeof(hdr)); + entry = &dev->entry[dev->start]; + + if (!logdev_valid(hdr.id)) { + dev->corrupted = 1; + if (start == dev->start) + goto corrupted; + return; + } + + size = sizeof(hdr); + while (size < hdr.size) { + int count = hdr.size - size; + if (count > LOGDEV_ENTRY_SIZE(entry)) + count = LOGDEV_ENTRY_SIZE(entry); + dev->size -= count; + size += count; + LOGDEV_ENTRY_ADD(entry->head,count); + if (LOGDEV_ENTRY_SIZE(entry) == 0) { + dev->start = (dev->start + 1) % dev->len; + entry->head = entry->tail = 0; + entry = &dev->entry[dev->start]; + } + } + } + return; + + corrupted: + /* Don't trust headers, just skip to the next entry */ + dev->size -= LOGDEV_ENTRY_SIZE(entry); + entry->head = entry->tail = 0; + dev->start = (dev->start+1) % dev->len; + return; +} + +static inline int logdev_copy_to_dev(struct logdev_dev *ldev, const void *dat, + int size) +{ + struct logdev_dev_priv *dev = ldev->priv; + struct logdev_entry *entry = &dev->entry[dev->end]; + const char *buf = dat; + int ret = 0; + + while (size) { + int copy; + int free; + if (LOGDEV_ENTRY_SIZE(entry) == LOGDEV_ENTRY_MAX) { + dev->end = (dev->end+1) % dev->len; + entry = &dev->entry[dev->end]; + /* if we wrapped, then clear out this entire + * buffer. + */ + if (dev->end == dev->start) { + move_start_to_next_entry(ldev); + } + } + copy = size; + if (copy > (free=LOGDEV_ENTRY_FREE(entry))) + copy = free; + if (entry->tail+copy > PAGE_SIZE) + copy = PAGE_SIZE - entry->tail; + memcpy(entry->dat+entry->tail,buf,copy); + LOGDEV_ENTRY_ADD(entry->tail,copy); + buf += copy; + size -= copy; + ret += copy; + dev->size += copy; + + } + + return ret; + +} + +#endif /* __LINUX_LOGDEV_RINGBUF_H */ Index: linux-trace.git/arch/x86/kernel/dumpstack.c =================================================================== --- linux-trace.git.orig/arch/x86/kernel/dumpstack.c 2009-04-06 09:52:03.000000000 -0400 +++ linux-trace.git/arch/x86/kernel/dumpstack.c 2009-04-28 12:02:50.000000000 -0400 @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -250,6 +251,9 @@ int __kprobes __die(const char *str, str unsigned short ss; unsigned long sp; #endif + lfcnprint("BUG! NMI DIE!\n"); + logdev_print_off(); + printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter); #ifdef CONFIG_PREEMPT printk("PREEMPT "); @@ -278,6 +282,7 @@ int __kprobes __die(const char *str, str print_symbol("%s", regs->ip); printk(" SS:ESP %04x:%08lx\n", ss, sp); #else + logdev_dump(); /* Executive summary in case the oops scrolled away */ printk(KERN_ALERT "RIP "); printk_address(regs->ip, 1); @@ -311,6 +316,9 @@ die_nmi(char *str, struct pt_regs *regs, if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP) return; + lfcnprint("BUG! NMI DIE!\n"); + logdev_print_off(); + /* * We are in trouble anyway, lets at least try * to get a message out. @@ -319,6 +327,7 @@ die_nmi(char *str, struct pt_regs *regs, printk(KERN_EMERG "%s", str); printk(" on CPU%d, ip %08lx, registers:\n", smp_processor_id(), regs->ip); + logdev_dump(); show_registers(regs); oops_end(flags, regs, 0); if (do_panic || panic_on_oops) Index: linux-trace.git/kernel/Makefile =================================================================== --- linux-trace.git.orig/kernel/Makefile 2009-04-27 12:10:05.000000000 -0400 +++ linux-trace.git/kernel/Makefile 2009-04-28 12:16:01.000000000 -0400 @@ -83,6 +83,7 @@ obj-$(CONFIG_TREE_RCU) += rcutree.o obj-$(CONFIG_PREEMPT_RCU) += rcupreempt.o obj-$(CONFIG_TREE_RCU_TRACE) += rcutree_trace.o obj-$(CONFIG_PREEMPT_RCU_TRACE) += rcupreempt_trace.o +obj-$(CONFIG_LOGDEV) += logdev/ obj-$(CONFIG_RELAY) += relay.o obj-$(CONFIG_SYSCTL) += utsname_sysctl.o obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o