3 * Copyright (C) International Business Machines Corp., 2003
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19 * Please send e-mail to kenistoj@users.sourceforge.net if you have
20 * questions or comments.
22 * Project Website: http://evlog.sourceforge.net/
25 #include <linux/config.h>
26 #include <linux/kernel.h>
27 #include <linux/stddef.h>
28 #include <linux/spinlock.h>
29 #include <linux/time.h>
30 #include <linux/smp.h>
31 #include <linux/string.h>
32 #include <linux/interrupt.h>
33 #include <asm/uaccess.h>
34 #include <asm/semaphore.h>
35 #include <linux/module.h>
36 #include <linux/ctype.h>
37 #include <linux/evlog.h>
39 #define REC_HDR_SIZE sizeof(struct kern_log_entry)
41 extern struct timezone sys_tz;
43 /* Use same buffer size as printk's, but at least 2x the max rec length. */
44 #define EVL_BUF_SIZE (1 << CONFIG_LOG_BUF_SHIFT)
45 #if (EVL_BUF_SIZE < 2*EVL_ENTRY_MAXLEN)
47 #define EVL_BUF_SIZE (2*EVL_ENTRY_MAXLEN)
51 * After buffer overflows, require at most this much free space before
52 * logging events again.
54 #define EVL_BUF_DRAINAGE (16*1024U)
57 * This data structure describes the circular buffer that is written into
58 * by evl_writeh() and drained by evl_kbufread().
60 * bf_buf, bf_len, and bf_end are the start, length, and end of the buffer,
61 * and in the current implementation these remain constant.
63 * bf_tail advances as event records are logged to the buffer, and bf_head
64 * advances as records are drained from the buffer. bf_dropped maintains
65 * a count of records that have been dropped due to buffer overrun.
67 * - (bf_head == bf_tail) indicates an empty buffer.
68 * - bf_head can take any value from bf_buf through bf_end.
69 * - bf_tail starts out equal to bf_buf, but once the first record is written
70 * to the buffer, bf_tail never equals bf_buf. It can equal bf_end.
72 * It is possible for one task to be draining the buffer while another
73 * is writing to it. Only evl_kbufread() advances bf_head, and only
74 * copy_rec_to_cbuf() advances bf_tail. Each advances its respective
75 * pointer only after completing its operation.
78 char *bf_buf; /* base buffer address */
79 unsigned int bf_len; /* buffer length */
80 unsigned int bf_dropped; /* (internal) dropped count */
81 char *bf_head; /* head-pointer for circ. buf */
82 char *bf_tail; /* tail-pointer for circ. buf */
83 char *bf_end; /* end buffer address */
86 static char evl_buffer[EVL_BUF_SIZE + sizeof(long)];
88 static struct cbuf ebuf = {
94 evl_buffer + EVL_BUF_SIZE
98 * evl_read_sem serializes reads of the evlog buffer into user space (although
99 * only the logging daemon should be running evl_kbufread()).
101 * readq allows the reader to sleep until there's at least one record in
102 * the buffer to read.
104 * ebuf_lock serializes writes to the evlog buffer.
106 static DECLARE_MUTEX(evl_read_sem);
107 static DECLARE_WAIT_QUEUE_HEAD(readq);
108 static spinlock_t ebuf_lock = SPIN_LOCK_UNLOCKED;
111 * A region of the evlog circular buffer, possibly split into 2 chunks
122 * set_region() - Establish region to be written to or read from.
123 * Caller wants to write to or read from an nbytes-byte region (of the
124 * evlog circular buffer) starting at head. Figure out whether the
125 * region needs to be 1 chunk (typical) or 2 (due to wraparound),
126 * and populate the region struct accordingly.
128 * @rg: region struct to be populated
129 * @head: beginning of region to be read/written. If this is beyond the
130 * end of the buffer, wrap it around to the beginning.
131 * @nbytes: size of region
134 set_region(struct cbregion *rg, char *head, size_t nbytes)
136 if (head >= ebuf.bf_end) {
140 rg->rg_tail = head + nbytes;
141 if (rg->rg_tail > ebuf.bf_end) {
142 rg->rg_chunk1 = ebuf.bf_end - head;
143 rg->rg_chunk2 = nbytes - rg->rg_chunk1;
144 rg->rg_tail = ebuf.bf_buf + rg->rg_chunk2;
146 rg->rg_chunk1 = nbytes;
152 copy_from_cbuf(const struct cbregion *rg, char *dest)
154 memcpy(dest, rg->rg_head, rg->rg_chunk1);
155 if (rg->rg_chunk2 != 0) {
156 memcpy(dest + rg->rg_chunk1, ebuf.bf_buf, rg->rg_chunk2);
161 copy_cbuf_to_user(const struct cbregion *rg, char *ubuf)
164 status = copy_to_user(ubuf, rg->rg_head, rg->rg_chunk1);
165 if (rg->rg_chunk2 != 0 && status == 0) {
166 status = copy_to_user(ubuf + rg->rg_chunk1, ebuf.bf_buf,
173 copy_to_cbuf(const struct cbregion *rg, const char *src)
175 memcpy(rg->rg_head, src, rg->rg_chunk1);
176 if (rg->rg_chunk2 != 0) {
177 memcpy(ebuf.bf_buf, src + rg->rg_chunk1, rg->rg_chunk2);
182 * copy_rec_to_cbuf() - Log event (hdr + vardata) to buffer.
183 * Caller has verified that there's enough room.
186 copy_rec_to_cbuf(const struct kern_log_entry *hdr, const char *vardata)
189 char *tail = ebuf.bf_tail;
191 set_region(&rg, tail, REC_HDR_SIZE);
192 copy_to_cbuf(&rg, (const char*) hdr);
194 if (hdr->log_size != 0) {
195 set_region(&rg, tail + REC_HDR_SIZE, hdr->log_size);
196 copy_to_cbuf(&rg, vardata);
199 ebuf.bf_tail = rg.rg_tail;
203 * evl_mk_rechdr() - Populate evlog record header.
204 * @facility: facility name (e.g., "kern", driver name)
205 * @event_type: event type (event ID assigned by programmer; may also be
206 * computed by recipient -- e.g., CRC of format string)
207 * @severity: severity level (e.g., LOG_INFO)
208 * @size: length, in bytes, of variable data
209 * @flags: event flags (e.g., EVL_TRUNCATE, EVL_EVTYCRC)
210 * @format: format of variable data (e.g., EVL_STRING)
213 evl_mk_rechdr(struct kern_log_entry *rec_hdr,
214 const char *facility,
223 flags |= EVL_KERNEL_EVENT;
224 if (in_interrupt()) {
225 flags |= EVL_INTERRUPT;
228 rec_hdr->log_kmagic = LOGREC_KMAGIC;
229 rec_hdr->log_kversion = LOGREC_KVERSION;
230 rec_hdr->log_size = (__u16) size;
231 rec_hdr->log_format = (__s8) format;
232 rec_hdr->log_event_type = (__s32) event_type;
233 rec_hdr->log_severity = (__s8) severity;
234 rec_hdr->log_uid = current->uid;
235 rec_hdr->log_gid = current->gid;
236 rec_hdr->log_pid = current->pid;
237 /* current->signal->xxx pointers may be bad. */
238 if (unlikely(current->flags & PF_EXITING))
239 rec_hdr->log_pgrp = 0;
241 rec_hdr->log_pgrp = process_group(current);
242 rec_hdr->log_flags = (__u32) flags;
243 rec_hdr->log_processor = (__s32) smp_processor_id();
245 strlcpy(rec_hdr->log_facility, facility, FACILITY_MAXLEN);
247 if (get_seconds() == 0) {
248 rec_hdr->log_flags |= EVL_INITIAL_BOOT_EVENT;
250 #if defined(__i386__)
251 if (sys_tz.tz_minuteswest == 0) {
253 rec_hdr->log_flags |= EVL_KERNTIME_LOCAL;
258 rec_hdr->log_time_sec = (time_t) ts.tv_sec;
259 rec_hdr->log_time_nsec = (__s32) ts.tv_nsec;
263 * normalize_header() - Fix up rec header, handling overflow, null vardata, etc.
264 * In case of sloppiness on the part of the caller, we clean it up rather
265 * than failing, since the caller is unlikely to handle failure.
268 normalize_header(struct kern_log_entry *hdr, const void *vardata)
270 if (hdr->log_severity < 0 || hdr->log_severity > LOG_DEBUG) {
271 hdr->log_severity = LOG_WARNING;
274 || hdr->log_size == 0
275 || hdr->log_format == EVL_NODATA) {
277 hdr->log_format = EVL_NODATA;
279 if (hdr->log_size > EVL_ENTRY_MAXLEN) {
280 hdr->log_size = EVL_ENTRY_MAXLEN;
281 hdr->log_flags |= EVL_TRUNCATE;
286 * log_dropped_recs_event() - Log message about previously dropped records.
287 * The evlog circular buffer had been full and caused later records to be
288 * dropped. Now the buffer has some free space again. Log an event reporting
289 * the number of records dropped. Caller has verified that there's at least
290 * enough room for this event record.
293 log_dropped_recs_event(void)
295 #define DROP_MSG_SIZE 80
296 char sbuf[DROP_MSG_SIZE];
297 struct kern_log_entry drechdr;
299 snprintf(sbuf, DROP_MSG_SIZE,
300 "%d event records dropped due to EVL buffer overflow.",
303 evl_mk_rechdr(&drechdr, "kern", EVL_BUFFER_OVERRUN, LOG_INFO,
304 strlen(sbuf) + 1, 0, EVL_STRING);
305 copy_rec_to_cbuf(&drechdr, sbuf);
309 * evl_check_buf() - Check for space in evlog buffer.
310 * If buffer free space is sufficient to log the indicated record,
311 * return 0. If not, return -1.
313 * Once the buffer becomes full and one or more messages are discarded,
314 * a significant portion of the buffer must be drained before we permit
315 * messages to be buffered again. We count the number of discards
316 * in the meantime and report them when we resume logging events.
317 * If we resumed logging with a nearly full buffer, then there could
318 * be a thrashing of stops and starts, making the discarded-message
319 * reporting annoying.
321 * @hdr: The header of the record caller intends to log.
324 evl_check_buf(const struct kern_log_entry *hdr)
327 size_t water_mark, avail, recsize;
329 recsize = REC_HDR_SIZE + hdr->log_size;
332 avail = (head <= tail) ?
333 (ebuf.bf_len - (tail - head)) :
336 if (ebuf.bf_dropped != 0) {
338 * Still recovering from buffer overflow.
339 * Apply the low water mark.
341 water_mark = min(EVL_BUF_DRAINAGE, ebuf.bf_len / 2);
343 * Just in case recsize is huge and/or somebody cranks the
344 * buffer size and/or EVL_BUF_DRAINAGE way down, make
345 * sure we have room for this record AND the "records dropped"
348 water_mark = max(water_mark,
349 recsize + REC_HDR_SIZE + DROP_MSG_SIZE);
351 /* +1 because bf_tail must never catch up with bf_head. */
352 water_mark = recsize + 1;
355 if (avail < water_mark) {
359 /* There's enough free buffer space. Return success. */
360 if (ebuf.bf_dropped != 0) {
361 log_dropped_recs_event();
367 * evl_kbufread() - Copy records from evlog circular buffer into user space.
368 * If successful, returns the number of bytes copied; else returns a
369 * negative error code.
371 * @retbuf: pointer to the buffer to be filled with the event records
372 * @bufsize: length, in bytes, of retbuf
375 evl_kbufread(char *retbuf, size_t bufsize)
381 char *tail, *buf = retbuf;
383 if (bufsize < REC_HDR_SIZE) {
387 if (ebuf.bf_head == ebuf.bf_tail && ebuf.bf_dropped != 0) {
390 * 1. Somebody logged a huge burst of events and overflowed
391 * the buffer. At this point, there was no room for the
392 * "records dropped" message.
393 * 2. evlogd drained the buffer, and is now back for more.
395 unsigned long iflags;
396 spin_lock_irqsave(&ebuf_lock, iflags);
397 log_dropped_recs_event();
398 spin_unlock_irqrestore(&ebuf_lock, iflags);
402 * We expect that only the logging daemon will be running here,
403 * but serialize access just in case.
405 error = down_interruptible(&evl_read_sem);
406 if (error == -EINTR) {
409 /* Go to sleep if the buffer is empty. */
410 error = wait_event_interruptible(readq,
411 (ebuf.bf_head != ebuf.bf_tail));
417 * Assemble message(s) into the user buffer, as many as will
418 * fit. On running out of space in the buffer, try to copy
419 * the header for the overflowing message. This means that
420 * there will always be at least a header returned. The caller
421 * must compare the numbers of bytes returned (remaining) with
422 * the length of the message to see if the entire message is
423 * present. A subsequent read will get the entire message,
424 * including the header (again).
426 * For simplicity, take a snapshot of bf_tail, and don't read
427 * past that even if evl_writeh() pours in more records while
428 * we're draining. We'll get those new records next time around.
433 /* Should not happen. Buffer must have at least one record. */
440 __u16 vardata_size; /* type must match rec.log_size */
442 if (bufsize < REC_HDR_SIZE) {
447 * Extract log_size from header, which could be split due to
448 * wraparound, or misaligned.
450 set_region(&rg, rec+offsetof(struct kern_log_entry, log_size),
451 sizeof(vardata_size));
452 copy_from_cbuf(&rg, (char*) &vardata_size);
453 rec_size = REC_HDR_SIZE + vardata_size;
455 if (bufsize < rec_size) {
457 * Copyout only the header 'cause user buffer can't
460 set_region(&rg, rec, REC_HDR_SIZE);
461 error = copy_cbuf_to_user(&rg, buf);
466 bufsize -= REC_HDR_SIZE;
467 retbuflen += REC_HDR_SIZE;
470 set_region(&rg, rec, rec_size);
471 error = copy_cbuf_to_user(&rg, buf);
479 retbuflen += rec_size;
480 } while (rec != tail);
493 * evl_writeh() - Log event, given a pre-constructed header.
494 * Returns 0 on success, or a negative error code otherwise.
495 * For caller's convenience, we normalize the header as needed.
498 evl_writeh(struct kern_log_entry *hdr, const char *vardata)
500 char *oldtail = ebuf.bf_tail;
501 unsigned long iflags; /* for spin_lock_irqsave() */
503 normalize_header(hdr, vardata);
505 spin_lock_irqsave(&ebuf_lock, iflags);
506 if (evl_check_buf(hdr) < 0) {
508 spin_unlock_irqrestore(&ebuf_lock, iflags);
512 copy_rec_to_cbuf(hdr, vardata);
514 * If the variable portion is a truncated string, make sure it
515 * ends with a null character.
517 if ((hdr->log_flags & EVL_TRUNCATE) && hdr->log_format == EVL_STRING) {
518 *(ebuf.bf_tail - 1) = '\0';
521 if ((ebuf.bf_head == oldtail) &&
522 (ebuf.bf_head != ebuf.bf_tail)) {
523 wake_up_interruptible(&readq);
525 spin_unlock_irqrestore(&ebuf_lock, iflags);