From a8aed2953b2abcce4321960031656d3c6bdd5156 Mon Sep 17 00:00:00 2001 From: M Hightower <27247790+mhightower83@users.noreply.github.com> Date: Fri, 20 Aug 2021 15:52:53 -0700 Subject: [PATCH 1/6] Update mmu_get... and mmu_set... to comply with strict-aliasing rules. Added 32-bit dependency injections as needed to guard against compiler optimizing 32-bit loads from IRAM to 8-bit or 16-bit loads. --- cores/esp8266/mmu_iram.h | 89 ++++++-- .../esp8266/examples/irammem/irammem.ino | 205 +++++++++++++++++- 2 files changed, 268 insertions(+), 26 deletions(-) diff --git a/cores/esp8266/mmu_iram.h b/cores/esp8266/mmu_iram.h index 7ba06be9bb..9eb1247646 100644 --- a/cores/esp8266/mmu_iram.h +++ b/cores/esp8266/mmu_iram.h @@ -127,8 +127,26 @@ bool mmu_is_icache(const void *addr) { static inline __attribute__((always_inline)) uint8_t mmu_get_uint8(const void *p8) { ASSERT_RANGE_TEST_READ(p8); - uint32_t val = (*(uint32_t *)((uintptr_t)p8 & ~0x3)); - uint32_t pos = ((uintptr_t)p8 & 0x3) * 8; + // https://gist.github.com/shafik/848ae25ee209f698763cffee272a58f8#how-do-we-type-pun-correctly + // Comply with strict-aliasing rules. Using memcpy is a Standards suggested + // method for type punning. The compiler optimizer will replace the memcpy + // with an `l32i` instruction. Using __builtin_memcpy to ensure we get the + // effects of the compiler optimization and not some #define version of + // memcpy. + void *v32 = (void *)((uintptr_t)p8 & ~(uintptr_t)3u); + uint32_t val; + __builtin_memcpy(&val, v32, sizeof(uint32_t)); + // Use an empty ASM to reference the 32-bit value. This will block the + // compiler from immediately optimizing to an 8-bit or 16-bit load instruction + // against IRAM memory. (This approach was inspired by + // https://github.com/esp8266/Arduino/pull/7780#discussion_r548303374) + // This issue was seen when using a constant address with the GCC 10.3 + // compiler. + // As a general practice, I think referencing by way of Extended ASM R/W + // output register will stop the the compiler from reloading the value later + // as 8-bit load from IRAM. + asm volatile ("" :"+r"(val)); // inject 32-bit dependency + uint32_t pos = ((uint32_t)p8 & 3u) * 8u; val >>= pos; return (uint8_t)val; } @@ -136,8 +154,11 @@ uint8_t mmu_get_uint8(const void *p8) { static inline __attribute__((always_inline)) uint16_t mmu_get_uint16(const uint16_t *p16) { ASSERT_RANGE_TEST_READ(p16); - uint32_t val = (*(uint32_t *)((uintptr_t)p16 & ~0x3)); - uint32_t pos = ((uintptr_t)p16 & 0x3) * 8; + void *v32 = (void *)((uintptr_t)p16 & ~(uintptr_t)0x3u); + uint32_t val; + __builtin_memcpy(&val, v32, sizeof(uint32_t)); + asm volatile ("" :"+r"(val)); + uint32_t pos = ((uint32_t)p16 & 3u) * 8u; val >>= pos; return (uint16_t)val; } @@ -145,8 +166,11 @@ uint16_t mmu_get_uint16(const uint16_t *p16) { static inline __attribute__((always_inline)) int16_t mmu_get_int16(const int16_t *p16) { ASSERT_RANGE_TEST_READ(p16); - uint32_t val = (*(uint32_t *)((uintptr_t)p16 & ~0x3)); - uint32_t pos = ((uintptr_t)p16 & 0x3) * 8; + void *v32 = (void *)((uintptr_t)p16 & ~(uintptr_t)3u); + uint32_t val; + __builtin_memcpy(&val, v32, sizeof(uint32_t)); + asm volatile ("" :"+r"(val)); + uint32_t pos = ((uint32_t)p16 & 3u) * 8u; val >>= pos; return (int16_t)val; } @@ -154,30 +178,43 @@ int16_t mmu_get_int16(const int16_t *p16) { static inline __attribute__((always_inline)) uint8_t mmu_set_uint8(void *p8, const uint8_t val) { ASSERT_RANGE_TEST_WRITE(p8); - uint32_t pos = ((uintptr_t)p8 & 0x3) * 8; + uint32_t pos = ((uint32_t)p8 & 3u) * 8u; uint32_t sval = val << pos; - uint32_t valmask = 0x0FF << pos; + uint32_t valmask = 0x0FFu << pos; + + void *v32 = (void *)((uintptr_t)p8 & ~(uintptr_t)3u); + uint32_t ival; + __builtin_memcpy(&ival, v32, sizeof(uint32_t)); + asm volatile ("" :"+r"(ival)); - uint32_t *p32 = (uint32_t *)((uintptr_t)p8 & ~0x3); - uint32_t ival = *p32; ival &= (~valmask); ival |= sval; - *p32 = ival; + /* + This 32-bit dependency injection does not appear to be needed with the + current GCC 10.3; however, that could change in the future versions. Or, I + may not have the right test for it to fail. + */ + asm volatile ("" :"+r"(ival)); + __builtin_memcpy(v32, &ival, sizeof(uint32_t)); return val; } static inline __attribute__((always_inline)) uint16_t mmu_set_uint16(uint16_t *p16, const uint16_t val) { ASSERT_RANGE_TEST_WRITE(p16); - uint32_t pos = ((uintptr_t)p16 & 0x3) * 8; + uint32_t pos = ((uint32_t)p16 & 3u) * 8u; uint32_t sval = val << pos; - uint32_t valmask = 0x0FFFF << pos; + uint32_t valmask = 0x0FFFFu << pos; + + void *v32 = (void *)((uintptr_t)p16 & ~(uintptr_t)3u); + uint32_t ival; + __builtin_memcpy(&ival, v32, sizeof(uint32_t)); + asm volatile ("" :"+r"(ival)); - uint32_t *p32 = (uint32_t *)((uintptr_t)p16 & ~0x3); - uint32_t ival = *p32; ival &= (~valmask); ival |= sval; - *p32 = ival; + asm volatile ("" :"+r"(ival)); + __builtin_memcpy(v32, &ival, sizeof(uint32_t)); return val; } @@ -185,15 +222,19 @@ static inline __attribute__((always_inline)) int16_t mmu_set_int16(int16_t *p16, const int16_t val) { ASSERT_RANGE_TEST_WRITE(p16); uint32_t sval = (uint16_t)val; - uint32_t pos = ((uintptr_t)p16 & 0x3) * 8; + uint32_t pos = ((uint32_t)p16 & 3u) * 8u; sval <<= pos; - uint32_t valmask = 0x0FFFF << pos; + uint32_t valmask = 0x0FFFFu << pos; + + void *v32 = (void *)((uintptr_t)p16 & ~(uintptr_t)3u); + uint32_t ival; + __builtin_memcpy(&ival, v32, sizeof(uint32_t)); + asm volatile ("" :"+r"(ival)); - uint32_t *p32 = (uint32_t *)((uintptr_t)p16 & ~0x3); - uint32_t ival = *p32; ival &= (~valmask); ival |= sval; - *p32 = ival; + asm volatile ("" :"+r"(ival)); + __builtin_memcpy(v32, &ival, sizeof(uint32_t)); return val; } @@ -204,13 +245,13 @@ extern void _text_end(void); static inline __attribute__((always_inline)) void *mmu_sec_heap(void) { - uint32_t sec_heap = (uint32_t)_text_end + 32; - return (void *)(sec_heap &= ~7); + uintptr_t sec_heap = (uintptr_t)_text_end + (uintptr_t)32u; + return (void *)(sec_heap &= ~(uintptr_t)7u); } static inline __attribute__((always_inline)) size_t mmu_sec_heap_size(void) { - return (size_t)0xC000UL - ((size_t)mmu_sec_heap() - 0x40100000UL); + return (size_t)0xC000ul - ((uintptr_t)mmu_sec_heap() - (uintptr_t)0x40100000ul); } #endif diff --git a/libraries/esp8266/examples/irammem/irammem.ino b/libraries/esp8266/examples/irammem/irammem.ino index 169d53e05e..5bdf4d938a 100644 --- a/libraries/esp8266/examples/irammem/irammem.ino +++ b/libraries/esp8266/examples/irammem/irammem.ino @@ -14,6 +14,204 @@ #define ETS_PRINTF ets_uart_printf #endif +/* + Verify mmu_get_uint16()'s compliance with strict-aliasing rules under + different optimizations. +*/ + +#pragma GCC push_options +// reference +#pragma GCC optimize("O0") // We expect -O0 to generate the correct results +__attribute__((noinline)) +void aliasTestReference(uint16_t *x) { + // Without adhearance to strict-aliasing, this sequence of code would fail + // when optimized by GCC Version 10.3 + size_t len = 3; + for (size_t u = 0; u < len; u++) { + uint16_t x1 = mmu_get_uint16(&x[0]); + for (size_t v = 0; v < len; v++) { + x[v] = mmu_get_uint16(&x[v]) + x1; + } + } +} +// Tests +#pragma GCC optimize("Os") +__attribute__((noinline)) +void aliasTestOs(uint16_t *x) { + size_t len = 3; + for (size_t u = 0; u < len; u++) { + uint16_t x1 = mmu_get_uint16(&x[0]); + for (size_t v = 0; v < len; v++) { + x[v] = mmu_get_uint16(&x[v]) + x1; + } + } +} +#pragma GCC optimize("O2") +__attribute__((noinline)) +void aliasTestO2(uint16_t *x) { + size_t len = 3; + for (size_t u = 0; u < len; u++) { + uint16_t x1 = mmu_get_uint16(&x[0]); + for (size_t v = 0; v < len; v++) { + x[v] = mmu_get_uint16(&x[v]) + x1; + } + } +} +#pragma GCC optimize("O3") +__attribute__((noinline)) +void aliasTestO3(uint16_t *x) { + size_t len = 3; + for (size_t u = 0; u < len; u++) { + uint16_t x1 = mmu_get_uint16(&x[0]); + for (size_t v = 0; v < len; v++) { + x[v] = mmu_get_uint16(&x[v]) + x1; + } + } +} + +// Evaluate if optomizer may have changed 32-bit access to 8-bit. +// 8-bit access will take longer as it will be processed thought +// the exception handler. For this case the -O0 version will appear faster. +#pragma GCC optimize("O0") +__attribute__((noinline)) IRAM_ATTR +uint32_t timedRead_Reference(uint8_t *res) { + // This test case was verified with GCC 10.3 + // There is a code case that can result in 32-bit wide IRAM load from memory + // being optimized down to an 8-bit memory access. In this test case we need + // to supply a constant IRAM address that is not 0 when anded with 3u. + // This section verifies that the workaround implimented by the inline + // function mmu_get_uint8() is preventing this. See comments for function + // mmu_get_uint8(() in mmu_iram.h for more details. + const uint8_t *x = (const uint8_t *)0x40100003ul; + uint32_t b = ESP.getCycleCount(); + *res = mmu_get_uint8(x); + return ESP.getCycleCount() - b; +} +#pragma GCC optimize("Os") +__attribute__((noinline)) IRAM_ATTR +uint32_t timedRead_Os(uint8_t *res) { + const uint8_t *x = (const uint8_t *)0x40100003ul; + uint32_t b = ESP.getCycleCount(); + *res = mmu_get_uint8(x); + return ESP.getCycleCount() - b; +} +#pragma GCC optimize("O2") +__attribute__((noinline)) IRAM_ATTR +uint32_t timedRead_O2(uint8_t *res) { + const uint8_t *x = (const uint8_t *)0x40100003ul; + uint32_t b = ESP.getCycleCount(); + *res = mmu_get_uint8(x); + return ESP.getCycleCount() - b; +} +#pragma GCC optimize("O3") +__attribute__((noinline)) IRAM_ATTR +uint32_t timedRead_O3(uint8_t *res) { + const uint8_t *x = (const uint8_t *)0x40100003ul; + uint32_t b = ESP.getCycleCount(); + *res = mmu_get_uint8(x); + return ESP.getCycleCount() - b; +} +#pragma GCC pop_options + +bool test4_32bit_loads() { + bool result = true; + uint8_t res; + uint32_t cycle_count_ref, cycle_count; + Serial.printf("\r\nFor mmu_get_uint8, verify that 32-bit wide IRAM access is preserved across different optimizations:\r\n"); + cycle_count_ref = timedRead_Reference(&res); + /* + If the optimizer (for options -Os, -O2, and -O3) replaces the 32-bit wide + IRAM access with an 8-bit, the exception handler will get invoked on memory + reads. The total execution time will show a significant increase when + compared to the reference (option -O0). + */ + Serial.printf(" Option -O0, cycle count %5u - reference\r\n", cycle_count_ref); + cycle_count = timedRead_Os(&res); + Serial.printf(" Option -Os, cycle count %5u ", cycle_count); + if (cycle_count_ref > cycle_count) { + Serial.printf("- passed\r\n"); + } else { + result = false; + Serial.printf("- failed\r\n"); + } + cycle_count = timedRead_O2(&res); + Serial.printf(" Option -O2, cycle count %5u ", cycle_count); + if (cycle_count_ref > cycle_count) { + Serial.printf("- passed\r\n"); + } else { + result = false; + Serial.printf("- failed\r\n"); + } + cycle_count = timedRead_O3(&res); + Serial.printf(" Option -O3, cycle count %5u ", cycle_count); + if (cycle_count_ref > cycle_count) { + Serial.printf("- passed\r\n"); + } else { + result = false; + Serial.printf("- failed\r\n"); + } + return result; +} + +void printPunFail(uint16_t *ref, uint16_t *x, size_t sz) { + Serial.printf(" Expected:"); + for (size_t i = 0; i < sz; i++) { + Serial.printf(" %3u", ref[i]); + } + Serial.printf("\r\n Got: "); + for (size_t i = 0; i < sz; i++) { + Serial.printf(" %3u", x[i]); + } + Serial.printf("\r\n"); +} + +bool testPunning() { + bool result = true; + // Get reference result for verifing test + alignas(alignof(uint32_t)) uint16_t x_ref[] = {1, 2, 3, 0}; + aliasTestReference(x_ref); // -O0 + Serial.printf("mmu_get_uint16() strict-aliasing tests with different optimizations:\r\n"); + + { + alignas(alignof(uint32_t)) uint16_t x[] = {1, 2, 3, 0}; + aliasTestOs(x); + Serial.printf(" Option -Os "); + if (0 == memcmp(x_ref, x, sizeof(x_ref))) { + Serial.printf("- passed\r\n"); + } else { + result = false; + Serial.printf("- failed\r\n"); + printPunFail(x_ref, x, sizeof(x_ref)/sizeof(uint16_t)); + } + } + { + alignas(alignof(uint32_t)) uint16_t x[] = {1, 2, 3, 0}; + aliasTestO2(x); + Serial.printf(" Option -O2 "); + if (0 == memcmp(x_ref, x, sizeof(x_ref))) { + Serial.printf("- passed\r\n"); + } else { + result = false; + Serial.printf("- failed\r\n"); + printPunFail(x_ref, x, sizeof(x_ref)/sizeof(uint16_t)); + } + } + { + alignas(alignof(uint32_t)) uint16_t x[] = {1, 2, 3, 0}; + aliasTestO3(x); + Serial.printf(" Option -O3 "); + if (0 == memcmp(x_ref, x, sizeof(x_ref))) { + Serial.printf("- passed\r\n"); + } else { + result = false; + Serial.printf("- failed\r\n"); + printPunFail(x_ref, x, sizeof(x_ref)/sizeof(uint16_t)); + } + } + return result; +} + + uint32_t cyclesToRead_nKx32(int n, unsigned int *x, uint32_t *res) { uint32_t b = ESP.getCycleCount(); uint32_t sum = 0; @@ -94,8 +292,10 @@ uint32_t cyclesToWrite_nKx8(int n, unsigned char*x) { return ESP.getCycleCount() - b; } +/* + Option "no-strict-aliasing" is required when using mmu_get... or mmu_set_... + */ // Compare with Inline - uint32_t cyclesToRead_nKx16_viaInline(int n, unsigned short *x, uint32_t *res) { uint32_t b = ESP.getCycleCount(); uint32_t sum = 0; @@ -159,6 +359,7 @@ uint32_t cyclesToWrite_nKx8_viaInline(int n, unsigned char*x) { return ESP.getCycleCount() - b; } + bool perfTest_nK(int nK, uint32_t *mem, uint32_t *imem) { uint32_t res, verify_res; uint32_t t; @@ -317,7 +518,7 @@ void setup() { Serial.println(); - if (perfTest_nK(1, mem, imem)) { + if (perfTest_nK(1, mem, imem) && testPunning() && test4_32bit_loads()) { Serial.println(); } else { Serial.println("\r\n*******************************"); From bac14f22932c2622497eaf374eeae3289e37cbe8 Mon Sep 17 00:00:00 2001 From: M Hightower <27247790+mhightower83@users.noreply.github.com> Date: Fri, 20 Aug 2021 15:52:53 -0700 Subject: [PATCH 2/6] Update mmu_get... and mmu_set... to comply with strict-aliasing rules. Added 32-bit dependency injections as needed to guard against compiler optimizing 32-bit loads from IRAM to 8-bit or 16-bit loads. --- cores/esp8266/mmu_iram.h | 89 ++++++-- .../esp8266/examples/irammem/irammem.ino | 205 +++++++++++++++++- 2 files changed, 268 insertions(+), 26 deletions(-) diff --git a/cores/esp8266/mmu_iram.h b/cores/esp8266/mmu_iram.h index 7ba06be9bb..9eb1247646 100644 --- a/cores/esp8266/mmu_iram.h +++ b/cores/esp8266/mmu_iram.h @@ -127,8 +127,26 @@ bool mmu_is_icache(const void *addr) { static inline __attribute__((always_inline)) uint8_t mmu_get_uint8(const void *p8) { ASSERT_RANGE_TEST_READ(p8); - uint32_t val = (*(uint32_t *)((uintptr_t)p8 & ~0x3)); - uint32_t pos = ((uintptr_t)p8 & 0x3) * 8; + // https://gist.github.com/shafik/848ae25ee209f698763cffee272a58f8#how-do-we-type-pun-correctly + // Comply with strict-aliasing rules. Using memcpy is a Standards suggested + // method for type punning. The compiler optimizer will replace the memcpy + // with an `l32i` instruction. Using __builtin_memcpy to ensure we get the + // effects of the compiler optimization and not some #define version of + // memcpy. + void *v32 = (void *)((uintptr_t)p8 & ~(uintptr_t)3u); + uint32_t val; + __builtin_memcpy(&val, v32, sizeof(uint32_t)); + // Use an empty ASM to reference the 32-bit value. This will block the + // compiler from immediately optimizing to an 8-bit or 16-bit load instruction + // against IRAM memory. (This approach was inspired by + // https://github.com/esp8266/Arduino/pull/7780#discussion_r548303374) + // This issue was seen when using a constant address with the GCC 10.3 + // compiler. + // As a general practice, I think referencing by way of Extended ASM R/W + // output register will stop the the compiler from reloading the value later + // as 8-bit load from IRAM. + asm volatile ("" :"+r"(val)); // inject 32-bit dependency + uint32_t pos = ((uint32_t)p8 & 3u) * 8u; val >>= pos; return (uint8_t)val; } @@ -136,8 +154,11 @@ uint8_t mmu_get_uint8(const void *p8) { static inline __attribute__((always_inline)) uint16_t mmu_get_uint16(const uint16_t *p16) { ASSERT_RANGE_TEST_READ(p16); - uint32_t val = (*(uint32_t *)((uintptr_t)p16 & ~0x3)); - uint32_t pos = ((uintptr_t)p16 & 0x3) * 8; + void *v32 = (void *)((uintptr_t)p16 & ~(uintptr_t)0x3u); + uint32_t val; + __builtin_memcpy(&val, v32, sizeof(uint32_t)); + asm volatile ("" :"+r"(val)); + uint32_t pos = ((uint32_t)p16 & 3u) * 8u; val >>= pos; return (uint16_t)val; } @@ -145,8 +166,11 @@ uint16_t mmu_get_uint16(const uint16_t *p16) { static inline __attribute__((always_inline)) int16_t mmu_get_int16(const int16_t *p16) { ASSERT_RANGE_TEST_READ(p16); - uint32_t val = (*(uint32_t *)((uintptr_t)p16 & ~0x3)); - uint32_t pos = ((uintptr_t)p16 & 0x3) * 8; + void *v32 = (void *)((uintptr_t)p16 & ~(uintptr_t)3u); + uint32_t val; + __builtin_memcpy(&val, v32, sizeof(uint32_t)); + asm volatile ("" :"+r"(val)); + uint32_t pos = ((uint32_t)p16 & 3u) * 8u; val >>= pos; return (int16_t)val; } @@ -154,30 +178,43 @@ int16_t mmu_get_int16(const int16_t *p16) { static inline __attribute__((always_inline)) uint8_t mmu_set_uint8(void *p8, const uint8_t val) { ASSERT_RANGE_TEST_WRITE(p8); - uint32_t pos = ((uintptr_t)p8 & 0x3) * 8; + uint32_t pos = ((uint32_t)p8 & 3u) * 8u; uint32_t sval = val << pos; - uint32_t valmask = 0x0FF << pos; + uint32_t valmask = 0x0FFu << pos; + + void *v32 = (void *)((uintptr_t)p8 & ~(uintptr_t)3u); + uint32_t ival; + __builtin_memcpy(&ival, v32, sizeof(uint32_t)); + asm volatile ("" :"+r"(ival)); - uint32_t *p32 = (uint32_t *)((uintptr_t)p8 & ~0x3); - uint32_t ival = *p32; ival &= (~valmask); ival |= sval; - *p32 = ival; + /* + This 32-bit dependency injection does not appear to be needed with the + current GCC 10.3; however, that could change in the future versions. Or, I + may not have the right test for it to fail. + */ + asm volatile ("" :"+r"(ival)); + __builtin_memcpy(v32, &ival, sizeof(uint32_t)); return val; } static inline __attribute__((always_inline)) uint16_t mmu_set_uint16(uint16_t *p16, const uint16_t val) { ASSERT_RANGE_TEST_WRITE(p16); - uint32_t pos = ((uintptr_t)p16 & 0x3) * 8; + uint32_t pos = ((uint32_t)p16 & 3u) * 8u; uint32_t sval = val << pos; - uint32_t valmask = 0x0FFFF << pos; + uint32_t valmask = 0x0FFFFu << pos; + + void *v32 = (void *)((uintptr_t)p16 & ~(uintptr_t)3u); + uint32_t ival; + __builtin_memcpy(&ival, v32, sizeof(uint32_t)); + asm volatile ("" :"+r"(ival)); - uint32_t *p32 = (uint32_t *)((uintptr_t)p16 & ~0x3); - uint32_t ival = *p32; ival &= (~valmask); ival |= sval; - *p32 = ival; + asm volatile ("" :"+r"(ival)); + __builtin_memcpy(v32, &ival, sizeof(uint32_t)); return val; } @@ -185,15 +222,19 @@ static inline __attribute__((always_inline)) int16_t mmu_set_int16(int16_t *p16, const int16_t val) { ASSERT_RANGE_TEST_WRITE(p16); uint32_t sval = (uint16_t)val; - uint32_t pos = ((uintptr_t)p16 & 0x3) * 8; + uint32_t pos = ((uint32_t)p16 & 3u) * 8u; sval <<= pos; - uint32_t valmask = 0x0FFFF << pos; + uint32_t valmask = 0x0FFFFu << pos; + + void *v32 = (void *)((uintptr_t)p16 & ~(uintptr_t)3u); + uint32_t ival; + __builtin_memcpy(&ival, v32, sizeof(uint32_t)); + asm volatile ("" :"+r"(ival)); - uint32_t *p32 = (uint32_t *)((uintptr_t)p16 & ~0x3); - uint32_t ival = *p32; ival &= (~valmask); ival |= sval; - *p32 = ival; + asm volatile ("" :"+r"(ival)); + __builtin_memcpy(v32, &ival, sizeof(uint32_t)); return val; } @@ -204,13 +245,13 @@ extern void _text_end(void); static inline __attribute__((always_inline)) void *mmu_sec_heap(void) { - uint32_t sec_heap = (uint32_t)_text_end + 32; - return (void *)(sec_heap &= ~7); + uintptr_t sec_heap = (uintptr_t)_text_end + (uintptr_t)32u; + return (void *)(sec_heap &= ~(uintptr_t)7u); } static inline __attribute__((always_inline)) size_t mmu_sec_heap_size(void) { - return (size_t)0xC000UL - ((size_t)mmu_sec_heap() - 0x40100000UL); + return (size_t)0xC000ul - ((uintptr_t)mmu_sec_heap() - (uintptr_t)0x40100000ul); } #endif diff --git a/libraries/esp8266/examples/irammem/irammem.ino b/libraries/esp8266/examples/irammem/irammem.ino index 169d53e05e..5bdf4d938a 100644 --- a/libraries/esp8266/examples/irammem/irammem.ino +++ b/libraries/esp8266/examples/irammem/irammem.ino @@ -14,6 +14,204 @@ #define ETS_PRINTF ets_uart_printf #endif +/* + Verify mmu_get_uint16()'s compliance with strict-aliasing rules under + different optimizations. +*/ + +#pragma GCC push_options +// reference +#pragma GCC optimize("O0") // We expect -O0 to generate the correct results +__attribute__((noinline)) +void aliasTestReference(uint16_t *x) { + // Without adhearance to strict-aliasing, this sequence of code would fail + // when optimized by GCC Version 10.3 + size_t len = 3; + for (size_t u = 0; u < len; u++) { + uint16_t x1 = mmu_get_uint16(&x[0]); + for (size_t v = 0; v < len; v++) { + x[v] = mmu_get_uint16(&x[v]) + x1; + } + } +} +// Tests +#pragma GCC optimize("Os") +__attribute__((noinline)) +void aliasTestOs(uint16_t *x) { + size_t len = 3; + for (size_t u = 0; u < len; u++) { + uint16_t x1 = mmu_get_uint16(&x[0]); + for (size_t v = 0; v < len; v++) { + x[v] = mmu_get_uint16(&x[v]) + x1; + } + } +} +#pragma GCC optimize("O2") +__attribute__((noinline)) +void aliasTestO2(uint16_t *x) { + size_t len = 3; + for (size_t u = 0; u < len; u++) { + uint16_t x1 = mmu_get_uint16(&x[0]); + for (size_t v = 0; v < len; v++) { + x[v] = mmu_get_uint16(&x[v]) + x1; + } + } +} +#pragma GCC optimize("O3") +__attribute__((noinline)) +void aliasTestO3(uint16_t *x) { + size_t len = 3; + for (size_t u = 0; u < len; u++) { + uint16_t x1 = mmu_get_uint16(&x[0]); + for (size_t v = 0; v < len; v++) { + x[v] = mmu_get_uint16(&x[v]) + x1; + } + } +} + +// Evaluate if optomizer may have changed 32-bit access to 8-bit. +// 8-bit access will take longer as it will be processed thought +// the exception handler. For this case the -O0 version will appear faster. +#pragma GCC optimize("O0") +__attribute__((noinline)) IRAM_ATTR +uint32_t timedRead_Reference(uint8_t *res) { + // This test case was verified with GCC 10.3 + // There is a code case that can result in 32-bit wide IRAM load from memory + // being optimized down to an 8-bit memory access. In this test case we need + // to supply a constant IRAM address that is not 0 when anded with 3u. + // This section verifies that the workaround implimented by the inline + // function mmu_get_uint8() is preventing this. See comments for function + // mmu_get_uint8(() in mmu_iram.h for more details. + const uint8_t *x = (const uint8_t *)0x40100003ul; + uint32_t b = ESP.getCycleCount(); + *res = mmu_get_uint8(x); + return ESP.getCycleCount() - b; +} +#pragma GCC optimize("Os") +__attribute__((noinline)) IRAM_ATTR +uint32_t timedRead_Os(uint8_t *res) { + const uint8_t *x = (const uint8_t *)0x40100003ul; + uint32_t b = ESP.getCycleCount(); + *res = mmu_get_uint8(x); + return ESP.getCycleCount() - b; +} +#pragma GCC optimize("O2") +__attribute__((noinline)) IRAM_ATTR +uint32_t timedRead_O2(uint8_t *res) { + const uint8_t *x = (const uint8_t *)0x40100003ul; + uint32_t b = ESP.getCycleCount(); + *res = mmu_get_uint8(x); + return ESP.getCycleCount() - b; +} +#pragma GCC optimize("O3") +__attribute__((noinline)) IRAM_ATTR +uint32_t timedRead_O3(uint8_t *res) { + const uint8_t *x = (const uint8_t *)0x40100003ul; + uint32_t b = ESP.getCycleCount(); + *res = mmu_get_uint8(x); + return ESP.getCycleCount() - b; +} +#pragma GCC pop_options + +bool test4_32bit_loads() { + bool result = true; + uint8_t res; + uint32_t cycle_count_ref, cycle_count; + Serial.printf("\r\nFor mmu_get_uint8, verify that 32-bit wide IRAM access is preserved across different optimizations:\r\n"); + cycle_count_ref = timedRead_Reference(&res); + /* + If the optimizer (for options -Os, -O2, and -O3) replaces the 32-bit wide + IRAM access with an 8-bit, the exception handler will get invoked on memory + reads. The total execution time will show a significant increase when + compared to the reference (option -O0). + */ + Serial.printf(" Option -O0, cycle count %5u - reference\r\n", cycle_count_ref); + cycle_count = timedRead_Os(&res); + Serial.printf(" Option -Os, cycle count %5u ", cycle_count); + if (cycle_count_ref > cycle_count) { + Serial.printf("- passed\r\n"); + } else { + result = false; + Serial.printf("- failed\r\n"); + } + cycle_count = timedRead_O2(&res); + Serial.printf(" Option -O2, cycle count %5u ", cycle_count); + if (cycle_count_ref > cycle_count) { + Serial.printf("- passed\r\n"); + } else { + result = false; + Serial.printf("- failed\r\n"); + } + cycle_count = timedRead_O3(&res); + Serial.printf(" Option -O3, cycle count %5u ", cycle_count); + if (cycle_count_ref > cycle_count) { + Serial.printf("- passed\r\n"); + } else { + result = false; + Serial.printf("- failed\r\n"); + } + return result; +} + +void printPunFail(uint16_t *ref, uint16_t *x, size_t sz) { + Serial.printf(" Expected:"); + for (size_t i = 0; i < sz; i++) { + Serial.printf(" %3u", ref[i]); + } + Serial.printf("\r\n Got: "); + for (size_t i = 0; i < sz; i++) { + Serial.printf(" %3u", x[i]); + } + Serial.printf("\r\n"); +} + +bool testPunning() { + bool result = true; + // Get reference result for verifing test + alignas(alignof(uint32_t)) uint16_t x_ref[] = {1, 2, 3, 0}; + aliasTestReference(x_ref); // -O0 + Serial.printf("mmu_get_uint16() strict-aliasing tests with different optimizations:\r\n"); + + { + alignas(alignof(uint32_t)) uint16_t x[] = {1, 2, 3, 0}; + aliasTestOs(x); + Serial.printf(" Option -Os "); + if (0 == memcmp(x_ref, x, sizeof(x_ref))) { + Serial.printf("- passed\r\n"); + } else { + result = false; + Serial.printf("- failed\r\n"); + printPunFail(x_ref, x, sizeof(x_ref)/sizeof(uint16_t)); + } + } + { + alignas(alignof(uint32_t)) uint16_t x[] = {1, 2, 3, 0}; + aliasTestO2(x); + Serial.printf(" Option -O2 "); + if (0 == memcmp(x_ref, x, sizeof(x_ref))) { + Serial.printf("- passed\r\n"); + } else { + result = false; + Serial.printf("- failed\r\n"); + printPunFail(x_ref, x, sizeof(x_ref)/sizeof(uint16_t)); + } + } + { + alignas(alignof(uint32_t)) uint16_t x[] = {1, 2, 3, 0}; + aliasTestO3(x); + Serial.printf(" Option -O3 "); + if (0 == memcmp(x_ref, x, sizeof(x_ref))) { + Serial.printf("- passed\r\n"); + } else { + result = false; + Serial.printf("- failed\r\n"); + printPunFail(x_ref, x, sizeof(x_ref)/sizeof(uint16_t)); + } + } + return result; +} + + uint32_t cyclesToRead_nKx32(int n, unsigned int *x, uint32_t *res) { uint32_t b = ESP.getCycleCount(); uint32_t sum = 0; @@ -94,8 +292,10 @@ uint32_t cyclesToWrite_nKx8(int n, unsigned char*x) { return ESP.getCycleCount() - b; } +/* + Option "no-strict-aliasing" is required when using mmu_get... or mmu_set_... + */ // Compare with Inline - uint32_t cyclesToRead_nKx16_viaInline(int n, unsigned short *x, uint32_t *res) { uint32_t b = ESP.getCycleCount(); uint32_t sum = 0; @@ -159,6 +359,7 @@ uint32_t cyclesToWrite_nKx8_viaInline(int n, unsigned char*x) { return ESP.getCycleCount() - b; } + bool perfTest_nK(int nK, uint32_t *mem, uint32_t *imem) { uint32_t res, verify_res; uint32_t t; @@ -317,7 +518,7 @@ void setup() { Serial.println(); - if (perfTest_nK(1, mem, imem)) { + if (perfTest_nK(1, mem, imem) && testPunning() && test4_32bit_loads()) { Serial.println(); } else { Serial.println("\r\n*******************************"); From 0cc26d625aa8c321a465c3e34b568f856afb9b3d Mon Sep 17 00:00:00 2001 From: M Hightower <27247790+mhightower83@users.noreply.github.com> Date: Mon, 23 Aug 2021 21:08:36 -0700 Subject: [PATCH 3/6] style --- cores/esp8266/mmu_iram.h | 12 +++--- .../esp8266/examples/irammem/irammem.ino | 40 +++++++++---------- 2 files changed, 26 insertions(+), 26 deletions(-) diff --git a/cores/esp8266/mmu_iram.h b/cores/esp8266/mmu_iram.h index 9eb1247646..1aa734e326 100644 --- a/cores/esp8266/mmu_iram.h +++ b/cores/esp8266/mmu_iram.h @@ -146,7 +146,7 @@ uint8_t mmu_get_uint8(const void *p8) { // output register will stop the the compiler from reloading the value later // as 8-bit load from IRAM. asm volatile ("" :"+r"(val)); // inject 32-bit dependency - uint32_t pos = ((uint32_t)p8 & 3u) * 8u; + uint32_t pos = ((uintptr_t)p8 & 3u) * 8u; val >>= pos; return (uint8_t)val; } @@ -158,7 +158,7 @@ uint16_t mmu_get_uint16(const uint16_t *p16) { uint32_t val; __builtin_memcpy(&val, v32, sizeof(uint32_t)); asm volatile ("" :"+r"(val)); - uint32_t pos = ((uint32_t)p16 & 3u) * 8u; + uint32_t pos = ((uintptr_t)p16 & 3u) * 8u; val >>= pos; return (uint16_t)val; } @@ -170,7 +170,7 @@ int16_t mmu_get_int16(const int16_t *p16) { uint32_t val; __builtin_memcpy(&val, v32, sizeof(uint32_t)); asm volatile ("" :"+r"(val)); - uint32_t pos = ((uint32_t)p16 & 3u) * 8u; + uint32_t pos = ((uintptr_t)p16 & 3u) * 8u; val >>= pos; return (int16_t)val; } @@ -178,7 +178,7 @@ int16_t mmu_get_int16(const int16_t *p16) { static inline __attribute__((always_inline)) uint8_t mmu_set_uint8(void *p8, const uint8_t val) { ASSERT_RANGE_TEST_WRITE(p8); - uint32_t pos = ((uint32_t)p8 & 3u) * 8u; + uint32_t pos = ((uintptr_t)p8 & 3u) * 8u; uint32_t sval = val << pos; uint32_t valmask = 0x0FFu << pos; @@ -202,7 +202,7 @@ uint8_t mmu_set_uint8(void *p8, const uint8_t val) { static inline __attribute__((always_inline)) uint16_t mmu_set_uint16(uint16_t *p16, const uint16_t val) { ASSERT_RANGE_TEST_WRITE(p16); - uint32_t pos = ((uint32_t)p16 & 3u) * 8u; + uint32_t pos = ((uintptr_t)p16 & 3u) * 8u; uint32_t sval = val << pos; uint32_t valmask = 0x0FFFFu << pos; @@ -222,7 +222,7 @@ static inline __attribute__((always_inline)) int16_t mmu_set_int16(int16_t *p16, const int16_t val) { ASSERT_RANGE_TEST_WRITE(p16); uint32_t sval = (uint16_t)val; - uint32_t pos = ((uint32_t)p16 & 3u) * 8u; + uint32_t pos = ((uintptr_t)p16 & 3u) * 8u; sval <<= pos; uint32_t valmask = 0x0FFFFu << pos; diff --git a/libraries/esp8266/examples/irammem/irammem.ino b/libraries/esp8266/examples/irammem/irammem.ino index 5bdf4d938a..abee02888b 100644 --- a/libraries/esp8266/examples/irammem/irammem.ino +++ b/libraries/esp8266/examples/irammem/irammem.ino @@ -27,46 +27,46 @@ void aliasTestReference(uint16_t *x) { // Without adhearance to strict-aliasing, this sequence of code would fail // when optimized by GCC Version 10.3 size_t len = 3; - for (size_t u = 0; u < len; u++) { + for (size_t u = 0; u < len; u++) { uint16_t x1 = mmu_get_uint16(&x[0]); - for (size_t v = 0; v < len; v++) { + for (size_t v = 0; v < len; v++) { x[v] = mmu_get_uint16(&x[v]) + x1; - } - } + } + } } // Tests #pragma GCC optimize("Os") __attribute__((noinline)) void aliasTestOs(uint16_t *x) { size_t len = 3; - for (size_t u = 0; u < len; u++) { + for (size_t u = 0; u < len; u++) { uint16_t x1 = mmu_get_uint16(&x[0]); - for (size_t v = 0; v < len; v++) { + for (size_t v = 0; v < len; v++) { x[v] = mmu_get_uint16(&x[v]) + x1; - } - } + } + } } #pragma GCC optimize("O2") __attribute__((noinline)) void aliasTestO2(uint16_t *x) { size_t len = 3; - for (size_t u = 0; u < len; u++) { + for (size_t u = 0; u < len; u++) { uint16_t x1 = mmu_get_uint16(&x[0]); - for (size_t v = 0; v < len; v++) { + for (size_t v = 0; v < len; v++) { x[v] = mmu_get_uint16(&x[v]) + x1; - } - } + } + } } #pragma GCC optimize("O3") __attribute__((noinline)) void aliasTestO3(uint16_t *x) { size_t len = 3; - for (size_t u = 0; u < len; u++) { + for (size_t u = 0; u < len; u++) { uint16_t x1 = mmu_get_uint16(&x[0]); - for (size_t v = 0; v < len; v++) { + for (size_t v = 0; v < len; v++) { x[v] = mmu_get_uint16(&x[v]) + x1; - } - } + } + } } // Evaluate if optomizer may have changed 32-bit access to 8-bit. @@ -181,7 +181,7 @@ bool testPunning() { } else { result = false; Serial.printf("- failed\r\n"); - printPunFail(x_ref, x, sizeof(x_ref)/sizeof(uint16_t)); + printPunFail(x_ref, x, sizeof(x_ref) / sizeof(uint16_t)); } } { @@ -193,7 +193,7 @@ bool testPunning() { } else { result = false; Serial.printf("- failed\r\n"); - printPunFail(x_ref, x, sizeof(x_ref)/sizeof(uint16_t)); + printPunFail(x_ref, x, sizeof(x_ref) / sizeof(uint16_t)); } } { @@ -205,7 +205,7 @@ bool testPunning() { } else { result = false; Serial.printf("- failed\r\n"); - printPunFail(x_ref, x, sizeof(x_ref)/sizeof(uint16_t)); + printPunFail(x_ref, x, sizeof(x_ref) / sizeof(uint16_t)); } } return result; @@ -294,7 +294,7 @@ uint32_t cyclesToWrite_nKx8(int n, unsigned char*x) { /* Option "no-strict-aliasing" is required when using mmu_get... or mmu_set_... - */ +*/ // Compare with Inline uint32_t cyclesToRead_nKx16_viaInline(int n, unsigned short *x, uint32_t *res) { uint32_t b = ESP.getCycleCount(); From 5df867f6f7c5d961f0df180fc56641dd4cef38d3 Mon Sep 17 00:00:00 2001 From: M Hightower <27247790+mhightower83@users.noreply.github.com> Date: Thu, 2 Sep 2021 07:16:59 -0700 Subject: [PATCH 4/6] Removed stale comment --- libraries/esp8266/examples/irammem/irammem.ino | 3 --- 1 file changed, 3 deletions(-) diff --git a/libraries/esp8266/examples/irammem/irammem.ino b/libraries/esp8266/examples/irammem/irammem.ino index abee02888b..6dbf26c608 100644 --- a/libraries/esp8266/examples/irammem/irammem.ino +++ b/libraries/esp8266/examples/irammem/irammem.ino @@ -292,9 +292,6 @@ uint32_t cyclesToWrite_nKx8(int n, unsigned char*x) { return ESP.getCycleCount() - b; } -/* - Option "no-strict-aliasing" is required when using mmu_get... or mmu_set_... -*/ // Compare with Inline uint32_t cyclesToRead_nKx16_viaInline(int n, unsigned short *x, uint32_t *res) { uint32_t b = ESP.getCycleCount(); From ebbac7bfbfc51f8cd317c34c8cb7911dd414385b Mon Sep 17 00:00:00 2001 From: M Hightower <27247790+mhightower83@users.noreply.github.com> Date: Fri, 24 Sep 2021 11:40:26 -0700 Subject: [PATCH 5/6] Simplified use of aligas Corrected start of DRAM constant in mmu_is_dram(). Replaced #define(s) with const to properly limit scope. Compiler appears to optomize it down to the same size. In some places used ld variables and core-isa.h defines to set range checking values. --- cores/esp8266/mmu_iram.h | 35 ++++++++++++------- .../esp8266/examples/irammem/irammem.ino | 2 +- 2 files changed, 23 insertions(+), 14 deletions(-) diff --git a/cores/esp8266/mmu_iram.h b/cores/esp8266/mmu_iram.h index 1aa734e326..4acaf78acd 100644 --- a/cores/esp8266/mmu_iram.h +++ b/cores/esp8266/mmu_iram.h @@ -26,7 +26,7 @@ extern "C" { #endif -//C This turns on range checking. Is this the value you want to trigger it? +// This turns on range checking. #ifdef DEBUG_ESP_CORE #define DEBUG_ESP_MMU #endif @@ -47,6 +47,13 @@ extern "C" { #if defined(DEV_DEBUG_PRINT) || defined(DEBUG_ESP_MMU) #include +#include // For config/core-isa.h +/* + Cautiously use XCHAL_..._VADDR values where possible. + While XCHAL_..._VADDR values in core-isa.h may define the Xtensa processor + CONFIG options, they are not always an indication of DRAM, IRAM, or ROM + size or position in the address space. +*/ #define DBG_MMU_FLUSH(a) while((USS(a) >> USTXC) & 0xff) {} @@ -71,32 +78,34 @@ DBG_MMU_FLUSH(0) static inline __attribute__((always_inline)) bool mmu_is_iram(const void *addr) { - #define IRAM_START 0x40100000UL + const uintptr_t iram_start = (uintptr_t)XCHAL_INSTRAM1_VADDR; #ifndef MMU_IRAM_SIZE #if defined(__GNUC__) && !defined(CORE_MOCK) #warning "MMU_IRAM_SIZE was undefined, setting to 0x8000UL!" #endif - #define MMU_IRAM_SIZE 0x8000UL + #define MMU_IRAM_SIZE 0x8000ul #endif - #define IRAM_END (IRAM_START + MMU_IRAM_SIZE) + const uintptr_t iram_end = iram_start + MMU_IRAM_SIZE; - return (IRAM_START <= (uintptr_t)addr && IRAM_END > (uintptr_t)addr); + return (iram_start <= (uintptr_t)addr && iram_end > (uintptr_t)addr); } static inline __attribute__((always_inline)) bool mmu_is_dram(const void *addr) { - #define DRAM_START 0x3FF80000UL - #define DRAM_END 0x40000000UL + const uintptr_t dram_start = 0x3FFE8000ul; + // The start of the Boot ROM sits at the end of DRAM. 0x40000000ul; + const uintptr_t dram_end = (uintptr_t)XCHAL_INSTRAM0_VADDR; - return (DRAM_START <= (uintptr_t)addr && DRAM_END > (uintptr_t)addr); + return (dram_start <= (uintptr_t)addr && dram_end > (uintptr_t)addr); } static inline __attribute__((always_inline)) bool mmu_is_icache(const void *addr) { - #define ICACHE_START 0x40200000UL - #define ICACHE_END (ICACHE_START + 0x100000UL) + extern void _irom0_text_end(void); + const uintptr_t icache_start = (uintptr_t)XCHAL_INSTROM0_VADDR; + const uintptr_t icache_end = (uintptr_t)_irom0_text_end; - return (ICACHE_START <= (uintptr_t)addr && ICACHE_END > (uintptr_t)addr); + return (icache_start <= (uintptr_t)addr && icache_end > (uintptr_t)addr); } #ifdef DEBUG_ESP_MMU @@ -239,19 +248,19 @@ int16_t mmu_set_int16(int16_t *p16, const int16_t val) { } #if (MMU_IRAM_SIZE > 32*1024) && !defined(MMU_SEC_HEAP) -extern void _text_end(void); #define MMU_SEC_HEAP mmu_sec_heap() #define MMU_SEC_HEAP_SIZE mmu_sec_heap_size() static inline __attribute__((always_inline)) void *mmu_sec_heap(void) { + extern void _text_end(void); uintptr_t sec_heap = (uintptr_t)_text_end + (uintptr_t)32u; return (void *)(sec_heap &= ~(uintptr_t)7u); } static inline __attribute__((always_inline)) size_t mmu_sec_heap_size(void) { - return (size_t)0xC000ul - ((uintptr_t)mmu_sec_heap() - (uintptr_t)0x40100000ul); + return (size_t)0xC000ul - ((uintptr_t)mmu_sec_heap() - (uintptr_t)XCHAL_INSTRAM1_VADDR); } #endif diff --git a/libraries/esp8266/examples/irammem/irammem.ino b/libraries/esp8266/examples/irammem/irammem.ino index 6dbf26c608..dccc34a6a4 100644 --- a/libraries/esp8266/examples/irammem/irammem.ino +++ b/libraries/esp8266/examples/irammem/irammem.ino @@ -168,7 +168,7 @@ void printPunFail(uint16_t *ref, uint16_t *x, size_t sz) { bool testPunning() { bool result = true; // Get reference result for verifing test - alignas(alignof(uint32_t)) uint16_t x_ref[] = {1, 2, 3, 0}; + alignas(uint32_t) uint16_t x_ref[] = {1, 2, 3, 0}; aliasTestReference(x_ref); // -O0 Serial.printf("mmu_get_uint16() strict-aliasing tests with different optimizations:\r\n"); From 9814f70ea0c3126248fd3247b10b0c8ff25063f3 Mon Sep 17 00:00:00 2001 From: M Hightower <27247790+mhightower83@users.noreply.github.com> Date: Fri, 24 Sep 2021 18:41:02 -0700 Subject: [PATCH 6/6] Added CORE_MOCK #if. Updated related examples. --- cores/esp8266/mmu_iram.h | 18 +++++++++++------- .../examples/IramReserve/IramReserve.ino | 12 +++++++++--- libraries/esp8266/examples/MMU48K/MMU48K.ino | 8 +++++++- 3 files changed, 27 insertions(+), 11 deletions(-) diff --git a/cores/esp8266/mmu_iram.h b/cores/esp8266/mmu_iram.h index 4acaf78acd..f7b62ba8c2 100644 --- a/cores/esp8266/mmu_iram.h +++ b/cores/esp8266/mmu_iram.h @@ -33,6 +33,17 @@ extern "C" { #if defined(CORE_MOCK) #define ets_uart_printf(...) do {} while(false) +#define XCHAL_INSTRAM0_VADDR 0x40000000 +#define XCHAL_INSTRAM1_VADDR 0x40100000 +#define XCHAL_INSTROM0_VADDR 0x40200000 +#else +#include // For config/core-isa.h +/* + Cautiously use XCHAL_..._VADDR values where possible. + While XCHAL_..._VADDR values in core-isa.h may define the Xtensa processor + CONFIG options, they are not always an indication of DRAM, IRAM, or ROM + size or position in the address space. +*/ #endif /* @@ -47,13 +58,6 @@ extern "C" { #if defined(DEV_DEBUG_PRINT) || defined(DEBUG_ESP_MMU) #include -#include // For config/core-isa.h -/* - Cautiously use XCHAL_..._VADDR values where possible. - While XCHAL_..._VADDR values in core-isa.h may define the Xtensa processor - CONFIG options, they are not always an indication of DRAM, IRAM, or ROM - size or position in the address space. -*/ #define DBG_MMU_FLUSH(a) while((USS(a) >> USTXC) & 0xff) {} diff --git a/libraries/esp8266/examples/IramReserve/IramReserve.ino b/libraries/esp8266/examples/IramReserve/IramReserve.ino index 344622f958..96d7479da4 100644 --- a/libraries/esp8266/examples/IramReserve/IramReserve.ino +++ b/libraries/esp8266/examples/IramReserve/IramReserve.ino @@ -17,6 +17,12 @@ #include #if defined(UMM_HEAP_IRAM) +#if defined(CORE_MOCK) +#define XCHAL_INSTRAM1_VADDR 0x40100000 +#else +#include // For config/core-isa.h +#endif + // durable - as in long life, persisting across reboots. struct durable { uint32_t bootCounter; @@ -30,7 +36,7 @@ struct durable { #define IRAM_RESERVE_SZ ((sizeof(struct durable) + 7UL) & ~7UL) // Position its address just above the reduced 2nd Heap. -#define IRAM_RESERVE (0x40100000UL + 0xC000UL - IRAM_RESERVE_SZ) +#define IRAM_RESERVE ((uintptr_t)XCHAL_INSTRAM1_VADDR + 0xC000UL - IRAM_RESERVE_SZ) // Define a reference with the right properties to make access easier. #define DURABLE ((struct durable *)IRAM_RESERVE) @@ -100,9 +106,9 @@ extern "C" void umm_init_iram(void) { adjustments and checksums. These can affect the persistence of data across reboots. */ - uint32_t sec_heap = (uint32_t)_text_end + 32; + uintptr_t sec_heap = (uintptr_t)_text_end + 32; sec_heap &= ~7; - size_t sec_heap_sz = 0xC000UL - (sec_heap - 0x40100000UL); + size_t sec_heap_sz = 0xC000UL - (sec_heap - (uintptr_t)XCHAL_INSTRAM1_VADDR); sec_heap_sz -= IRAM_RESERVE_SZ; // Shrink IRAM heap if (0xC000UL > sec_heap_sz) { diff --git a/libraries/esp8266/examples/MMU48K/MMU48K.ino b/libraries/esp8266/examples/MMU48K/MMU48K.ino index 6acb3840f9..d75d91232b 100644 --- a/libraries/esp8266/examples/MMU48K/MMU48K.ino +++ b/libraries/esp8266/examples/MMU48K/MMU48K.ino @@ -3,6 +3,12 @@ #include #include +#if defined(CORE_MOCK) +#define XCHAL_INSTRAM1_VADDR 0x40100000 +#else +#include // For config/core-isa.h +#endif + uint32_t timed_byte_read(char *pc, uint32_t * o); uint32_t timed_byte_read2(char *pc, uint32_t * o); int divideA_B(int a, int b); @@ -102,7 +108,7 @@ void print_mmu_status(Print& oStream) { #ifdef MMU_IRAM_SIZE oStream.printf_P(PSTR(" IRAM Size: %u"), MMU_IRAM_SIZE); oStream.println(); - const uint32_t iram_free = MMU_IRAM_SIZE - (uint32_t)((uintptr_t)_text_end - 0x40100000UL); + const uint32_t iram_free = MMU_IRAM_SIZE - (uint32_t)((uintptr_t)_text_end - (uintptr_t)XCHAL_INSTRAM1_VADDR); oStream.printf_P(PSTR(" IRAM free: %u"), iram_free); oStream.println(); #endif pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy