Skip to content

Commit b66ffe4

Browse files
committed
ESP8266: Add esf_buf monitor to detect a WiFi TX stall
It detects a WiFi TX stall that sometimes happens and that cannot be recovered from other than by rebooting. Enable by defining ESP_ESF_BUF_MONITOR_INTERVAL_MS to something reasonable, e.g. 5000. It takes at most 2 intervals to detect the stall. `void esp_esf_buf_monitor_failure(void)` by default reboots the device but can be overridden if extra cleanup is required.
1 parent d0110e1 commit b66ffe4

File tree

4 files changed

+148
-6
lines changed

4 files changed

+148
-6
lines changed

platforms/esp8266/Makefile.build

+6-6
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ MGOS_SRCS += cs_file.c cs_hex.c cs_rbuf.c \
104104
MGOS_SRCS += esp_config.c \
105105
esp_coredump.c \
106106
esp_debug.c \
107+
esp_esf_buf_monitor.c \
107108
esp_exc.c \
108109
esp_flash_writer.c \
109110
esp_gpio.c \
@@ -208,15 +209,12 @@ BOOT_LOADER_BIN = $(RBOOT_BUILD_DIR)/rboot.bin
208209
APP_BIN = $(BUILD_DIR)/$(APP).bin
209210
APP_ELF = $(BUILD_DIR)/$(APP).elf
210211
LD_SCRIPT = $(GEN_DIR)/$(APP0_ADDR).ld
211-
LD_WRAPPERS =
212+
LD_WRAPPERS = esf_buf_alloc esf_buf_recycle lmacProcessTXStartData
212213
HEAP_LOG_FLAGS =
213214

214215
ifneq "${MGOS_ENABLE_HEAP_LOG}${MGOS_ENABLE_CALL_TRACE}" "00"
215216
HEAP_LOG_FLAGS += -DMGOS_ENABLE_HEAP_LOG
216-
LD_WRAPPERS += -Wl,--wrap=umm_calloc \
217-
-Wl,--wrap=umm_malloc \
218-
-Wl,--wrap=umm_realloc \
219-
-Wl,--wrap=umm_free
217+
LD_WRAPPERS += umm_calloc umm_malloc umm_realloc umm_free
220218
endif
221219
ifeq "${MGOS_ENABLE_CALL_TRACE}" "1"
222220
MGOS_SRCS += cs_heap_trace.c
@@ -351,13 +349,15 @@ $(APP_BIN): $(APP_ELF) $(BUILD_DIR)/esptool2
351349

352350
# -- Linking.
353351

352+
WLWRAP := -Wl,--wrap=
353+
354354
# Link the main ELF output file.
355355
BIN_PARTS = $(BUILD_DIR)/esp_cache.c.o $(APP_OBJS) $(FFI_EXPORTS_O) $(BUILD_INFO_O) $(MG_BUILD_INFO_O) $(MGOS_LIB) $(APP_BIN_LIB_FILES)
356356
$(APP_ELF): $(BIN_PARTS) $(LD_SCRIPT)
357357
ifeq "$(MGOS_ESP8266_RTOS)" "1"
358358
$(OBJCOPY) --weaken-symbol printf --weaken-symbol puts /opt/Espressif/esp-open-sdk/xtensa-lx106-elf/xtensa-lx106-elf/lib/libc.a
359359
endif
360-
$(call link,$(BIN_PARTS) $(LIBS) $(LD_WRAPPERS))
360+
$(call link,$(BIN_PARTS) $(LIBS) $(addprefix $(WLWRAP),$(LD_WRAPPERS)))
361361

362362
$(BUILD_DIR)/dummy_main.c: $(APP_OBJS) $(MGOS_ESP8266_PATH)/Makefile.build
363363
$(vecho) "GEN $@"
+104
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
/*
2+
* Copyright (c) 2021 Deomid "rojer" Ryabkov
3+
* All rights reserved
4+
*
5+
* Licensed under the Apache License, Version 2.0 (the "License");
6+
* you may not use this file except in compliance with the License.
7+
* You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
#include "esp_esf_buf_monitor.h"
19+
20+
#include <stdlib.h>
21+
22+
#include "mgos.h"
23+
24+
struct pbuf;
25+
26+
struct esf_buf *g_pending_bufs[8] = {0};
27+
uint32_t g_esf_buf_canary_ctr = 0;
28+
uint8_t g_esf_buf_canary_strikes = 0;
29+
30+
extern struct esf_buf *__real_esf_buf_alloc(struct pbuf *p, uint32_t type,
31+
uint32_t a4);
32+
struct esf_buf *__wrap_esf_buf_alloc(struct pbuf *p, uint32_t type,
33+
uint32_t a4) {
34+
struct esf_buf *eb = __real_esf_buf_alloc(p, type, a4);
35+
if (eb != NULL) {
36+
for (int i = 0; i < 8; i++) {
37+
if (g_pending_bufs[i] == NULL) {
38+
g_pending_bufs[i] = eb;
39+
break;
40+
}
41+
}
42+
}
43+
return eb;
44+
}
45+
46+
extern void __real_esf_buf_recycle(struct esf_buf *eb, uint32_t type);
47+
void __wrap_esf_buf_recycle(struct esf_buf *eb, uint32_t type) {
48+
for (int i = 0; i < 8; i++) {
49+
if (g_pending_bufs[i] == eb) {
50+
g_pending_bufs[i] = NULL;
51+
g_esf_buf_canary_ctr++;
52+
}
53+
}
54+
__real_esf_buf_recycle(eb, type);
55+
}
56+
57+
static uint32_t s_esf_buf_last_canary_ctr = 0;
58+
59+
static void esp_esf_buf_monitor_timer_cb(void *arg) {
60+
int num_pending = 0;
61+
for (int i = 0; i < 8; i++) {
62+
if (g_pending_bufs[i] != NULL) num_pending++;
63+
}
64+
LOG(LL_DEBUG, ("np %d ctr %lu %lu", num_pending, s_esf_buf_last_canary_ctr,
65+
g_esf_buf_canary_ctr));
66+
if (num_pending == 0) {
67+
// All clear, no problem.
68+
return;
69+
}
70+
if (g_esf_buf_canary_ctr != s_esf_buf_last_canary_ctr) {
71+
// Things are moving along, all good.
72+
s_esf_buf_last_canary_ctr = g_esf_buf_canary_ctr;
73+
return;
74+
}
75+
LOG(LL_ERROR, ("TX is stuck!"));
76+
static bool cb_invoked = false;
77+
if (!cb_invoked) {
78+
esp_esf_buf_monitor_failure();
79+
cb_invoked = true;
80+
}
81+
(void) arg;
82+
}
83+
84+
void esp_esf_buf_monitor_failure(void) WEAK;
85+
void esp_esf_buf_monitor_failure(void) {
86+
mgos_system_restart_after(1000);
87+
}
88+
89+
extern void __real_lmacProcessTXStartData(uint8_t id);
90+
IRAM void __wrap_lmacProcessTXStartData(uint8_t id) {
91+
#ifdef ESP_ESF_BUF_TRIGGER_BUG
92+
// Introducing a delay here triggers the condition reasonably quickly
93+
// under moderate traffic load (3 x curl; sleep 1 in parallel).
94+
if (id == 0) {
95+
ets_delay_us(500);
96+
}
97+
#endif
98+
__real_lmacProcessTXStartData(id);
99+
}
100+
101+
void esp_esf_buf_monitor_init(void) {
102+
mgos_set_timer(ESP_ESF_BUF_MONITOR_INTERVAL_MS, MGOS_TIMER_REPEAT,
103+
esp_esf_buf_monitor_timer_cb, NULL);
104+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
/*
2+
* Copyright (c) 2021 Deomid "rojer" Ryabkov
3+
* All rights reserved
4+
*
5+
* Licensed under the Apache License, Version 2.0 (the "License");
6+
* you may not use this file except in compliance with the License.
7+
* You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
#pragma once
19+
20+
#include <stdint.h>
21+
22+
// esf_buf monitor detects a WiFi TX stall that sometimes happens
23+
// and that cannot be recovered from other than by rebooting.
24+
25+
#ifndef ESP_ESF_BUF_MONITOR_INTERVAL_MS
26+
#define ESP_ESF_BUF_MONITOR_INTERVAL_MS 0 // Disabled by default
27+
#endif
28+
29+
// This callback will be invoked once stuck TX is detected.
30+
// Default (weak) implementation is to reboot the device.
31+
void esp_esf_buf_monitor_failure(void);
32+
33+
void esp_esf_buf_monitor_init(void);

platforms/esp8266/src/esp_main.c

+5
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
#include "esp_adc.h"
3939
#endif
4040
#include "esp_coredump.h"
41+
#include "esp_esf_buf_monitor.h"
4142
#include "esp_exc.h"
4243
#include "esp_features.h"
4344
#include "esp_fs.h"
@@ -173,6 +174,10 @@ enum mgos_init_result esp_mgos_init2(void) {
173174
return ir;
174175
}
175176

177+
#if ESP_ESF_BUF_MONITOR_INTERVAL_MS > 0
178+
esp_esf_buf_monitor_init();
179+
#endif
180+
176181
return MGOS_INIT_OK;
177182
}
178183

0 commit comments

Comments
 (0)