Skip to content

Commit ea82416

Browse files
author
Andi Kleen
committed
Initial import of mcelog-0.8pre + some old patches
0 parents  commit ea82416

25 files changed

+3607
-0
lines changed

CHANGES

+41
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
<newer changes first>
2+
3+
Support AMD Fam10h/11h CPUs (Joachim Deguara)
4+
Add switch to use LOG_ERR for syslog messages
5+
Various misc. cleanups
6+
mcelog now logs summaries of some serious events into syslog by default
7+
Fix syslog multiline logging
8+
Separate decoding output and error messages
9+
Automatic dependency generation in Makefile
10+
Many cleanups in DMI decoding
11+
Add DIMM database and error triggers
12+
Automatic sanity check for DMI information and enable by default
13+
Add support for decoding Intel Core2 machine checks
14+
Simple decoding of the TSC value into uptime for Intel CPUs
15+
Add the MCE design paper.
16+
Decode Intel thermal events properly
17+
Add some "RAMs" to K8 ECC strings in futile hope that users will get
18+
the hint
19+
Allow modifier command line options after --ascii
20+
Don't print decoded address twice for --ascii
21+
Fix SMBIOS anchor scan to work on more machines and don't crash when
22+
no anchor found.
23+
Fix --ascii reparsing of mcelog output.
24+
Add --filter and filter out known broken K8 GART errors
25+
Add --ignorenodev argument and use in cron script (avoids cron errors
26+
in Xen guest kernels)
27+
Add new --dmi argument to look up machine check addresses in SMBIOS
28+
(warning unreliable due to wide spread bios bugs)
29+
Fix argument decoding (support --, allow arguments in any order)
30+
Clarify --ascii in the manpage
31+
Support for AMD K8 Revision F machine check DRAM error thresholding
32+
from Jacob Shin <[email protected]>
33+
Add P4 decoder contributed by "Guo, Racing" <[email protected]> for
34+
Intel P4 and Xeon.
35+
Add K8 decoder from 2.4 kernel code to decode Opteron/Athlon64 logs.
36+
(code mostly from Eric Morton and Andi Kleen)
37+
Add --ascii function to decode fatal kernel output.
38+
Improve manpage
39+
Fix 32bit bugs
40+
Fix uninitialized variable in check_cpu
41+
Minor cleanups

Makefile

+45
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
CFLAGS := -g -Wall
2+
prefix := /usr
3+
# Define appropiately for your distribution
4+
# DOCDIR := /usr/share/doc/packages/mcelog
5+
6+
all: mcelog dbquery
7+
8+
.PHONY: install clean depend
9+
10+
OBJ := p4.o k8.o mcelog.o dmi.o db.o dimm.o tsc.o core2.o
11+
SRC := $(OBJ:.o=.c)
12+
CLEAN := mcelog dmi tsc dbquery .depend .depend.X dbquery.o
13+
DOC := mce.pdf smbios.spec
14+
15+
mcelog: ${OBJ}
16+
17+
# dbquery intentionally not installed by default
18+
install: mcelog
19+
cp mcelog ${prefix}/sbin/mcelog
20+
cp mcelog.8 ${prefix}/share/man/man8
21+
ifdef DOCDIR
22+
cp ${DOC} ${DOCDIR}
23+
else
24+
echo
25+
echo "Consider defining DOCDIR to install additional documentation"
26+
endif
27+
echo
28+
echo "call mcelog regularly from your crontab"
29+
30+
clean:
31+
rm -f ${CLEAN} ${OBJ}
32+
33+
tsc: tsc.c
34+
gcc -o tsc ${CFLAGS} -DSTANDALONE tsc.c ${LDFLAGS}
35+
36+
dbquery: db.o dbquery.o
37+
38+
depend: .depend
39+
40+
.depend:
41+
${CC} -MM -I. ${SRC} > .depend.X && mv .depend.X .depend
42+
43+
include .depend
44+
45+
Makefile: .depend

README

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
2+
The latest x86-64 2.6 kernel won't log machine check errors to the
3+
kernel log anymore. You need this tool to decode them.
4+
5+
Create the device first.
6+
7+
mknod /dev/mcelog c 10 227
8+
9+
This program is licensed under the subject of the GNU Public General
10+
License, v.2
11+

TODO

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
2+
3+
- unified error output for memory errors
4+
- support replacement DIMM table
5+
- decode syndromes on K8? (from EDAC)
6+

core2.c

+135
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
#include <string.h>
2+
#include <stdio.h>
3+
#include <assert.h>
4+
#include "mcelog.h"
5+
#include "core2.h"
6+
7+
/* Decode P6 family (Core2) model specific errors.
8+
The generic errors are decoded in p4.c */
9+
10+
/* [19..24] */
11+
static char *bus_queue_req_type[] = {
12+
[0] = "BQ_DCU_READ_TYPE",
13+
[2] = "BQ_IFU_DEMAND_TYPE",
14+
[3] = "BQ_IFU_DEMAND_NC_TYPE",
15+
[4] = "BQ_DCU_RFO_TYPE",
16+
[5] = "BQ_DCU_RFO_LOCK_TYPE",
17+
[6] = "BQ_DCU_ITOM_TYPE",
18+
[8] = "BQ_DCU_WB_TYPE",
19+
[10] = "BC_DCU_WCEVICT_TYPE",
20+
[11] = "BQ_DCU_WCLINE_TYPE",
21+
[12] = "BQ_DCU_BTM_TYPE",
22+
[13] = "BQ_DCU_INTACK_TYPE",
23+
[14] = "BQ_DCU_INVALL2_TYPE",
24+
[15] = "BQ_DCU_FLUSHL2_TYPE",
25+
[16] = "BQ_DCU_PART_RD_TYPE",
26+
[18] = "BQ_DCU_PART_WR_TYPE",
27+
[20] = "BQ_DCU_SPEC_CYC_TYPE",
28+
[24] = "BQ_DCU_IO_RD_TYPE",
29+
[25] = "BQ_DCU_IO_WR_TYPE",
30+
[28] = "BQ_DCU_LOCK_RD_TYPE",
31+
[30] = "BQ_DCU_SPLOCK_RD_TYPE",
32+
[29] = "BQ_DCU_LOCK_WR_TYPE",
33+
};
34+
35+
/* [25..27] */
36+
static char *bus_queue_error_type[] = {
37+
[0] = "BQ_ERR_HARD_TYPE",
38+
[1] = "BQ_ERR_DOUBLE_TYPE",
39+
[2] = "BQ_ERR_AERR2_TYPE",
40+
[4] = "BQ_ERR_SINGLE_TYPE",
41+
[5] = "BQ_ERR_AERR1_TYPE",
42+
};
43+
44+
static char *reserved_3bits[8];
45+
static char *reserved_1bit[2];
46+
static char *reserved_2bits[4];
47+
48+
#define SINGLEBIT(n,d) static char *n[2] = { [1] = d };
49+
50+
SINGLEBIT(frc, "FRC error");
51+
SINGLEBIT(berr, "BERR");
52+
SINGLEBIT(int_binit, "internal BINIT");
53+
SINGLEBIT(ext_binit, "external BINIT");
54+
SINGLEBIT(response_parity, "response parity error");
55+
SINGLEBIT(bus_binit, "bus BINIT");
56+
SINGLEBIT(timeout_binit, "timeout BINIT (ROB timeout)");
57+
SINGLEBIT(hard_err, "hard error");
58+
SINGLEBIT(ierr, "IERR");
59+
SINGLEBIT(aerr, "parity error");
60+
SINGLEBIT(uecc, "uncorrectable ECC");
61+
SINGLEBIT(cecc, "correctable ECC");
62+
63+
struct field {
64+
int start_bit;
65+
char **str;
66+
int stringlen;
67+
};
68+
69+
#define FIELD(start_bit, name) { start_bit, name, NELE(name) }
70+
71+
struct field fields[] = {
72+
FIELD(16, reserved_3bits),
73+
FIELD(19, bus_queue_req_type),
74+
FIELD(25, bus_queue_error_type),
75+
FIELD(25, bus_queue_error_type),
76+
FIELD(28, frc),
77+
FIELD(29, berr),
78+
FIELD(30, int_binit),
79+
FIELD(31, reserved_1bit),
80+
FIELD(32, reserved_3bits),
81+
FIELD(35, ext_binit),
82+
FIELD(36, response_parity),
83+
FIELD(37, bus_binit),
84+
FIELD(38, timeout_binit),
85+
FIELD(39, reserved_3bits),
86+
FIELD(42, hard_err),
87+
FIELD(43, ierr),
88+
FIELD(44, aerr),
89+
FIELD(45, uecc),
90+
FIELD(46, cecc),
91+
/* [47..54]: ECC syndrome */
92+
FIELD(55, reserved_2bits),
93+
{},
94+
};
95+
96+
static u64 bitmask(u64 i)
97+
{
98+
u64 mask = 1;
99+
while (mask < i)
100+
mask = (mask << 1) | 1;
101+
return mask;
102+
}
103+
104+
void core2_decode_model(u64 status)
105+
{
106+
struct field *f;
107+
int linelen = 0;
108+
char *delim = "";
109+
110+
for (f = &fields[0]; f->str; f++) {
111+
u64 v = (status >> f->start_bit) & bitmask(f->stringlen - 1);
112+
char *s = NULL;
113+
if (v < f->stringlen)
114+
s = f->str[v];
115+
if (!s) {
116+
if (v == 0)
117+
continue;
118+
char buf[60];
119+
s = buf;
120+
snprintf(buf, sizeof buf, "<%u:%Lx>", f->start_bit, v);
121+
}
122+
int len = strlen(s);
123+
if (linelen + len > 75) {
124+
delim = "\n";
125+
linelen = 0;
126+
}
127+
Wprintf("%s%s", delim, s);
128+
delim = " ";
129+
linelen += len + 1;
130+
}
131+
if (linelen > 0)
132+
Wprintf("\n");
133+
if ((status >> 47) & 0xff)
134+
Wprintf("ECC syndrome: %Lx\n", (status >> 47) & 0xff);
135+
}

core2.h

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
void core2_decode_model(u64 status);
2+

0 commit comments

Comments
 (0)