From a8aed2953b2abcce4321960031656d3c6bdd5156 Mon Sep 17 00:00:00 2001
From: M Hightower <27247790+mhightower83@users.noreply.github.com>
Date: Fri, 20 Aug 2021 15:52:53 -0700
Subject: [PATCH 1/6] Update mmu_get... and mmu_set... to comply with
 strict-aliasing rules. Added 32-bit dependency injections as needed to guard
 against compiler optimizing 32-bit loads from IRAM to 8-bit or 16-bit loads.

---
 cores/esp8266/mmu_iram.h                      |  89 ++++++--
 .../esp8266/examples/irammem/irammem.ino      | 205 +++++++++++++++++-
 2 files changed, 268 insertions(+), 26 deletions(-)

diff --git a/cores/esp8266/mmu_iram.h b/cores/esp8266/mmu_iram.h
index 7ba06be9bb..9eb1247646 100644
--- a/cores/esp8266/mmu_iram.h
+++ b/cores/esp8266/mmu_iram.h
@@ -127,8 +127,26 @@ bool mmu_is_icache(const void *addr) {
 static inline __attribute__((always_inline))
 uint8_t mmu_get_uint8(const void *p8) {
   ASSERT_RANGE_TEST_READ(p8);
-  uint32_t val = (*(uint32_t *)((uintptr_t)p8 & ~0x3));
-  uint32_t pos = ((uintptr_t)p8 & 0x3) * 8;
+  // https://gist.github.com/shafik/848ae25ee209f698763cffee272a58f8#how-do-we-type-pun-correctly
+  // Comply with strict-aliasing rules. Using memcpy is a Standards suggested
+  // method for type punning. The compiler optimizer will replace the memcpy
+  // with an `l32i` instruction.  Using __builtin_memcpy to ensure we get the
+  // effects of the compiler optimization and not some #define version of
+  // memcpy.
+  void *v32 = (void *)((uintptr_t)p8 & ~(uintptr_t)3u);
+  uint32_t val;
+  __builtin_memcpy(&val, v32, sizeof(uint32_t));
+  // Use an empty ASM to reference the 32-bit value. This will block the
+  // compiler from immediately optimizing to an 8-bit or 16-bit load instruction
+  // against IRAM memory. (This approach was inspired by
+  // https://github.com/esp8266/Arduino/pull/7780#discussion_r548303374)
+  // This issue was seen when using a constant address with the GCC 10.3
+  // compiler.
+  // As a general practice, I think referencing by way of Extended ASM R/W
+  // output register will stop the the compiler from reloading the value later
+  // as 8-bit load from IRAM.
+  asm volatile ("" :"+r"(val)); // inject 32-bit dependency
+  uint32_t pos = ((uint32_t)p8 & 3u) * 8u;
   val >>= pos;
   return (uint8_t)val;
 }
@@ -136,8 +154,11 @@ uint8_t mmu_get_uint8(const void *p8) {
 static inline __attribute__((always_inline))
 uint16_t mmu_get_uint16(const uint16_t *p16) {
   ASSERT_RANGE_TEST_READ(p16);
-  uint32_t val = (*(uint32_t *)((uintptr_t)p16 & ~0x3));
-  uint32_t pos = ((uintptr_t)p16 & 0x3) * 8;
+  void *v32 = (void *)((uintptr_t)p16 & ~(uintptr_t)0x3u);
+  uint32_t val;
+  __builtin_memcpy(&val, v32, sizeof(uint32_t));
+  asm volatile ("" :"+r"(val));
+  uint32_t pos = ((uint32_t)p16 & 3u) * 8u;
   val >>= pos;
   return (uint16_t)val;
 }
@@ -145,8 +166,11 @@ uint16_t mmu_get_uint16(const uint16_t *p16) {
 static inline __attribute__((always_inline))
 int16_t mmu_get_int16(const int16_t *p16) {
   ASSERT_RANGE_TEST_READ(p16);
-  uint32_t val = (*(uint32_t *)((uintptr_t)p16 & ~0x3));
-  uint32_t pos = ((uintptr_t)p16 & 0x3) * 8;
+  void *v32 = (void *)((uintptr_t)p16 & ~(uintptr_t)3u);
+  uint32_t val;
+  __builtin_memcpy(&val, v32, sizeof(uint32_t));
+  asm volatile ("" :"+r"(val));
+  uint32_t pos = ((uint32_t)p16 & 3u) * 8u;
   val >>= pos;
   return (int16_t)val;
 }
@@ -154,30 +178,43 @@ int16_t mmu_get_int16(const int16_t *p16) {
 static inline __attribute__((always_inline))
 uint8_t mmu_set_uint8(void *p8, const uint8_t val) {
   ASSERT_RANGE_TEST_WRITE(p8);
-  uint32_t pos = ((uintptr_t)p8 & 0x3) * 8;
+  uint32_t pos = ((uint32_t)p8 & 3u) * 8u;
   uint32_t sval = val << pos;
-  uint32_t valmask =  0x0FF << pos;
+  uint32_t valmask =  0x0FFu << pos;
+
+  void *v32 = (void *)((uintptr_t)p8 & ~(uintptr_t)3u);
+  uint32_t ival;
+  __builtin_memcpy(&ival, v32, sizeof(uint32_t));
+  asm volatile ("" :"+r"(ival));
 
-  uint32_t *p32 = (uint32_t *)((uintptr_t)p8 & ~0x3);
-  uint32_t ival = *p32;
   ival &= (~valmask);
   ival |= sval;
-  *p32 = ival;
+  /*
+    This 32-bit dependency injection does not appear to be needed with the
+    current GCC 10.3; however, that could change in the future versions. Or, I
+    may not have the right test for it to fail.
+  */
+  asm volatile ("" :"+r"(ival));
+  __builtin_memcpy(v32, &ival, sizeof(uint32_t));
   return val;
 }
 
 static inline __attribute__((always_inline))
 uint16_t mmu_set_uint16(uint16_t *p16, const uint16_t val) {
   ASSERT_RANGE_TEST_WRITE(p16);
-  uint32_t pos = ((uintptr_t)p16 & 0x3) * 8;
+  uint32_t pos = ((uint32_t)p16 & 3u) * 8u;
   uint32_t sval = val << pos;
-  uint32_t valmask =  0x0FFFF << pos;
+  uint32_t valmask =  0x0FFFFu << pos;
+
+  void *v32 = (void *)((uintptr_t)p16 & ~(uintptr_t)3u);
+  uint32_t ival;
+  __builtin_memcpy(&ival, v32, sizeof(uint32_t));
+  asm volatile ("" :"+r"(ival));
 
-  uint32_t *p32 = (uint32_t *)((uintptr_t)p16 & ~0x3);
-  uint32_t ival = *p32;
   ival &= (~valmask);
   ival |= sval;
-  *p32 = ival;
+  asm volatile ("" :"+r"(ival));
+  __builtin_memcpy(v32, &ival, sizeof(uint32_t));
   return val;
 }
 
@@ -185,15 +222,19 @@ static inline __attribute__((always_inline))
 int16_t mmu_set_int16(int16_t *p16, const int16_t val) {
   ASSERT_RANGE_TEST_WRITE(p16);
   uint32_t sval = (uint16_t)val;
-  uint32_t pos = ((uintptr_t)p16 & 0x3) * 8;
+  uint32_t pos = ((uint32_t)p16 & 3u) * 8u;
   sval <<= pos;
-  uint32_t valmask =  0x0FFFF << pos;
+  uint32_t valmask =  0x0FFFFu << pos;
+
+  void *v32 = (void *)((uintptr_t)p16 & ~(uintptr_t)3u);
+  uint32_t ival;
+  __builtin_memcpy(&ival, v32, sizeof(uint32_t));
+  asm volatile ("" :"+r"(ival));
 
-  uint32_t *p32 = (uint32_t *)((uintptr_t)p16 & ~0x3);
-  uint32_t ival = *p32;
   ival &= (~valmask);
   ival |= sval;
-  *p32 = ival;
+  asm volatile ("" :"+r"(ival));
+  __builtin_memcpy(v32, &ival, sizeof(uint32_t));
   return val;
 }
 
@@ -204,13 +245,13 @@ extern void _text_end(void);
 
 static inline __attribute__((always_inline))
 void *mmu_sec_heap(void) {
-  uint32_t sec_heap = (uint32_t)_text_end + 32;
-  return (void *)(sec_heap &= ~7);
+  uintptr_t sec_heap = (uintptr_t)_text_end + (uintptr_t)32u;
+  return (void *)(sec_heap &= ~(uintptr_t)7u);
 }
 
 static inline __attribute__((always_inline))
 size_t mmu_sec_heap_size(void) {
-  return (size_t)0xC000UL - ((size_t)mmu_sec_heap() - 0x40100000UL);
+  return (size_t)0xC000ul - ((uintptr_t)mmu_sec_heap() - (uintptr_t)0x40100000ul);
 }
 #endif
 
diff --git a/libraries/esp8266/examples/irammem/irammem.ino b/libraries/esp8266/examples/irammem/irammem.ino
index 169d53e05e..5bdf4d938a 100644
--- a/libraries/esp8266/examples/irammem/irammem.ino
+++ b/libraries/esp8266/examples/irammem/irammem.ino
@@ -14,6 +14,204 @@
 #define ETS_PRINTF ets_uart_printf
 #endif
 
+/*
+  Verify mmu_get_uint16()'s compliance with strict-aliasing rules under
+  different optimizations.
+*/
+
+#pragma GCC push_options
+// reference
+#pragma GCC optimize("O0")   // We expect -O0 to generate the correct results
+__attribute__((noinline))
+void aliasTestReference(uint16_t *x) {
+  // Without adhearance to strict-aliasing, this sequence of code would fail
+  // when optimized by GCC Version 10.3
+  size_t len = 3;
+	for (size_t u = 0; u < len; u++) {
+    uint16_t x1 = mmu_get_uint16(&x[0]);
+		for (size_t v = 0; v < len; v++) {
+      x[v] = mmu_get_uint16(&x[v]) + x1;
+		}
+	}
+}
+// Tests
+#pragma GCC optimize("Os")
+__attribute__((noinline))
+void aliasTestOs(uint16_t *x) {
+  size_t len = 3;
+	for (size_t u = 0; u < len; u++) {
+    uint16_t x1 = mmu_get_uint16(&x[0]);
+		for (size_t v = 0; v < len; v++) {
+      x[v] = mmu_get_uint16(&x[v]) + x1;
+		}
+	}
+}
+#pragma GCC optimize("O2")
+__attribute__((noinline))
+void aliasTestO2(uint16_t *x) {
+  size_t len = 3;
+	for (size_t u = 0; u < len; u++) {
+    uint16_t x1 = mmu_get_uint16(&x[0]);
+		for (size_t v = 0; v < len; v++) {
+      x[v] = mmu_get_uint16(&x[v]) + x1;
+		}
+	}
+}
+#pragma GCC optimize("O3")
+__attribute__((noinline))
+void aliasTestO3(uint16_t *x) {
+  size_t len = 3;
+	for (size_t u = 0; u < len; u++) {
+    uint16_t x1 = mmu_get_uint16(&x[0]);
+		for (size_t v = 0; v < len; v++) {
+      x[v] = mmu_get_uint16(&x[v]) + x1;
+		}
+	}
+}
+
+// Evaluate if optomizer may have changed 32-bit access to 8-bit.
+// 8-bit access will take longer as it will be processed thought
+// the exception handler. For this case the -O0 version will appear faster.
+#pragma GCC optimize("O0")
+__attribute__((noinline)) IRAM_ATTR
+uint32_t timedRead_Reference(uint8_t *res) {
+  // This test case was verified with GCC 10.3
+  // There is a code case that can result in 32-bit wide IRAM load from memory
+  // being optimized down to an 8-bit memory access. In this test case we need
+  // to supply a constant IRAM address that is not 0 when anded with 3u.
+  // This section verifies that the workaround implimented by the inline
+  // function mmu_get_uint8() is preventing this. See comments for function
+  // mmu_get_uint8(() in mmu_iram.h for more details.
+  const uint8_t *x = (const uint8_t *)0x40100003ul;
+  uint32_t b = ESP.getCycleCount();
+  *res = mmu_get_uint8(x);
+  return ESP.getCycleCount() - b;
+}
+#pragma GCC optimize("Os")
+__attribute__((noinline)) IRAM_ATTR
+uint32_t timedRead_Os(uint8_t *res) {
+  const uint8_t *x = (const uint8_t *)0x40100003ul;
+  uint32_t b = ESP.getCycleCount();
+  *res = mmu_get_uint8(x);
+  return ESP.getCycleCount() - b;
+}
+#pragma GCC optimize("O2")
+__attribute__((noinline)) IRAM_ATTR
+uint32_t timedRead_O2(uint8_t *res) {
+  const uint8_t *x = (const uint8_t *)0x40100003ul;
+  uint32_t b = ESP.getCycleCount();
+  *res = mmu_get_uint8(x);
+  return ESP.getCycleCount() - b;
+}
+#pragma GCC optimize("O3")
+__attribute__((noinline)) IRAM_ATTR
+uint32_t timedRead_O3(uint8_t *res) {
+  const uint8_t *x = (const uint8_t *)0x40100003ul;
+  uint32_t b = ESP.getCycleCount();
+  *res = mmu_get_uint8(x);
+  return ESP.getCycleCount() - b;
+}
+#pragma GCC pop_options
+
+bool test4_32bit_loads() {
+  bool result = true;
+  uint8_t res;
+  uint32_t cycle_count_ref, cycle_count;
+  Serial.printf("\r\nFor mmu_get_uint8, verify that 32-bit wide IRAM access is preserved across different optimizations:\r\n");
+  cycle_count_ref = timedRead_Reference(&res);
+  /*
+    If the optimizer (for options -Os, -O2, and -O3) replaces the 32-bit wide
+    IRAM access with an 8-bit, the exception handler will get invoked on memory
+    reads. The total execution time will show a significant increase when
+    compared to the reference (option -O0).
+  */
+  Serial.printf("  Option -O0, cycle count %5u - reference\r\n", cycle_count_ref);
+  cycle_count = timedRead_Os(&res);
+  Serial.printf("  Option -Os, cycle count %5u ", cycle_count);
+  if (cycle_count_ref > cycle_count) {
+    Serial.printf("- passed\r\n");
+  } else {
+    result = false;
+    Serial.printf("- failed\r\n");
+  }
+  cycle_count = timedRead_O2(&res);
+  Serial.printf("  Option -O2, cycle count %5u ", cycle_count);
+  if (cycle_count_ref > cycle_count) {
+    Serial.printf("- passed\r\n");
+  } else {
+    result = false;
+    Serial.printf("- failed\r\n");
+  }
+  cycle_count = timedRead_O3(&res);
+  Serial.printf("  Option -O3, cycle count %5u ", cycle_count);
+  if (cycle_count_ref > cycle_count) {
+    Serial.printf("- passed\r\n");
+  } else {
+    result = false;
+    Serial.printf("- failed\r\n");
+  }
+  return result;
+}
+
+void printPunFail(uint16_t *ref, uint16_t *x, size_t sz) {
+  Serial.printf("    Expected:");
+  for (size_t i = 0; i < sz; i++) {
+    Serial.printf(" %3u", ref[i]);
+  }
+  Serial.printf("\r\n    Got:     ");
+  for (size_t i = 0; i < sz; i++) {
+    Serial.printf(" %3u", x[i]);
+  }
+  Serial.printf("\r\n");
+}
+
+bool testPunning() {
+  bool result = true;
+  // Get reference result for verifing test
+  alignas(alignof(uint32_t)) uint16_t x_ref[] = {1, 2, 3, 0};
+  aliasTestReference(x_ref);  // -O0
+  Serial.printf("mmu_get_uint16() strict-aliasing tests with different optimizations:\r\n");
+
+  {
+    alignas(alignof(uint32_t)) uint16_t x[] = {1, 2, 3, 0};
+    aliasTestOs(x);
+    Serial.printf("  Option -Os ");
+    if (0 == memcmp(x_ref, x, sizeof(x_ref))) {
+      Serial.printf("- passed\r\n");
+    } else {
+      result = false;
+      Serial.printf("- failed\r\n");
+      printPunFail(x_ref, x, sizeof(x_ref)/sizeof(uint16_t));
+    }
+  }
+  {
+    alignas(alignof(uint32_t)) uint16_t x[] = {1, 2, 3, 0};
+    aliasTestO2(x);
+    Serial.printf("  Option -O2 ");
+    if (0 == memcmp(x_ref, x, sizeof(x_ref))) {
+      Serial.printf("- passed\r\n");
+    } else {
+      result = false;
+      Serial.printf("- failed\r\n");
+      printPunFail(x_ref, x, sizeof(x_ref)/sizeof(uint16_t));
+    }
+  }
+  {
+    alignas(alignof(uint32_t)) uint16_t x[] = {1, 2, 3, 0};
+    aliasTestO3(x);
+    Serial.printf("  Option -O3 ");
+    if (0 == memcmp(x_ref, x, sizeof(x_ref))) {
+      Serial.printf("- passed\r\n");
+    } else {
+      result = false;
+      Serial.printf("- failed\r\n");
+      printPunFail(x_ref, x, sizeof(x_ref)/sizeof(uint16_t));
+    }
+  }
+  return result;
+}
+
+
 uint32_t cyclesToRead_nKx32(int n, unsigned int *x, uint32_t *res) {
   uint32_t b = ESP.getCycleCount();
   uint32_t sum = 0;
@@ -94,8 +292,10 @@ uint32_t cyclesToWrite_nKx8(int n, unsigned char*x) {
   return ESP.getCycleCount() - b;
 }
 
+/*
+  Option "no-strict-aliasing" is required when using mmu_get... or mmu_set_...
+ */
 // Compare with Inline
-
 uint32_t cyclesToRead_nKx16_viaInline(int n, unsigned short *x, uint32_t *res) {
   uint32_t b = ESP.getCycleCount();
   uint32_t sum = 0;
@@ -159,6 +359,7 @@ uint32_t cyclesToWrite_nKx8_viaInline(int n, unsigned char*x) {
   return ESP.getCycleCount() - b;
 }
 
+
 bool perfTest_nK(int nK, uint32_t *mem, uint32_t *imem) {
   uint32_t res, verify_res;
   uint32_t t;
@@ -317,7 +518,7 @@ void setup() {
   Serial.println();
 
 
-  if (perfTest_nK(1, mem, imem)) {
+  if (perfTest_nK(1, mem, imem) && testPunning() && test4_32bit_loads()) {
     Serial.println();
   } else {
     Serial.println("\r\n*******************************");

From bac14f22932c2622497eaf374eeae3289e37cbe8 Mon Sep 17 00:00:00 2001
From: M Hightower <27247790+mhightower83@users.noreply.github.com>
Date: Fri, 20 Aug 2021 15:52:53 -0700
Subject: [PATCH 2/6] Update mmu_get... and mmu_set... to comply with
 strict-aliasing rules. Added 32-bit dependency injections as needed to guard
 against compiler optimizing 32-bit loads from IRAM to 8-bit or 16-bit loads.

---
 cores/esp8266/mmu_iram.h                      |  89 ++++++--
 .../esp8266/examples/irammem/irammem.ino      | 205 +++++++++++++++++-
 2 files changed, 268 insertions(+), 26 deletions(-)

diff --git a/cores/esp8266/mmu_iram.h b/cores/esp8266/mmu_iram.h
index 7ba06be9bb..9eb1247646 100644
--- a/cores/esp8266/mmu_iram.h
+++ b/cores/esp8266/mmu_iram.h
@@ -127,8 +127,26 @@ bool mmu_is_icache(const void *addr) {
 static inline __attribute__((always_inline))
 uint8_t mmu_get_uint8(const void *p8) {
   ASSERT_RANGE_TEST_READ(p8);
-  uint32_t val = (*(uint32_t *)((uintptr_t)p8 & ~0x3));
-  uint32_t pos = ((uintptr_t)p8 & 0x3) * 8;
+  // https://gist.github.com/shafik/848ae25ee209f698763cffee272a58f8#how-do-we-type-pun-correctly
+  // Comply with strict-aliasing rules. Using memcpy is a Standards suggested
+  // method for type punning. The compiler optimizer will replace the memcpy
+  // with an `l32i` instruction.  Using __builtin_memcpy to ensure we get the
+  // effects of the compiler optimization and not some #define version of
+  // memcpy.
+  void *v32 = (void *)((uintptr_t)p8 & ~(uintptr_t)3u);
+  uint32_t val;
+  __builtin_memcpy(&val, v32, sizeof(uint32_t));
+  // Use an empty ASM to reference the 32-bit value. This will block the
+  // compiler from immediately optimizing to an 8-bit or 16-bit load instruction
+  // against IRAM memory. (This approach was inspired by
+  // https://github.com/esp8266/Arduino/pull/7780#discussion_r548303374)
+  // This issue was seen when using a constant address with the GCC 10.3
+  // compiler.
+  // As a general practice, I think referencing by way of Extended ASM R/W
+  // output register will stop the the compiler from reloading the value later
+  // as 8-bit load from IRAM.
+  asm volatile ("" :"+r"(val)); // inject 32-bit dependency
+  uint32_t pos = ((uint32_t)p8 & 3u) * 8u;
   val >>= pos;
   return (uint8_t)val;
 }
@@ -136,8 +154,11 @@ uint8_t mmu_get_uint8(const void *p8) {
 static inline __attribute__((always_inline))
 uint16_t mmu_get_uint16(const uint16_t *p16) {
   ASSERT_RANGE_TEST_READ(p16);
-  uint32_t val = (*(uint32_t *)((uintptr_t)p16 & ~0x3));
-  uint32_t pos = ((uintptr_t)p16 & 0x3) * 8;
+  void *v32 = (void *)((uintptr_t)p16 & ~(uintptr_t)0x3u);
+  uint32_t val;
+  __builtin_memcpy(&val, v32, sizeof(uint32_t));
+  asm volatile ("" :"+r"(val));
+  uint32_t pos = ((uint32_t)p16 & 3u) * 8u;
   val >>= pos;
   return (uint16_t)val;
 }
@@ -145,8 +166,11 @@ uint16_t mmu_get_uint16(const uint16_t *p16) {
 static inline __attribute__((always_inline))
 int16_t mmu_get_int16(const int16_t *p16) {
   ASSERT_RANGE_TEST_READ(p16);
-  uint32_t val = (*(uint32_t *)((uintptr_t)p16 & ~0x3));
-  uint32_t pos = ((uintptr_t)p16 & 0x3) * 8;
+  void *v32 = (void *)((uintptr_t)p16 & ~(uintptr_t)3u);
+  uint32_t val;
+  __builtin_memcpy(&val, v32, sizeof(uint32_t));
+  asm volatile ("" :"+r"(val));
+  uint32_t pos = ((uint32_t)p16 & 3u) * 8u;
   val >>= pos;
   return (int16_t)val;
 }
@@ -154,30 +178,43 @@ int16_t mmu_get_int16(const int16_t *p16) {
 static inline __attribute__((always_inline))
 uint8_t mmu_set_uint8(void *p8, const uint8_t val) {
   ASSERT_RANGE_TEST_WRITE(p8);
-  uint32_t pos = ((uintptr_t)p8 & 0x3) * 8;
+  uint32_t pos = ((uint32_t)p8 & 3u) * 8u;
   uint32_t sval = val << pos;
-  uint32_t valmask =  0x0FF << pos;
+  uint32_t valmask =  0x0FFu << pos;
+
+  void *v32 = (void *)((uintptr_t)p8 & ~(uintptr_t)3u);
+  uint32_t ival;
+  __builtin_memcpy(&ival, v32, sizeof(uint32_t));
+  asm volatile ("" :"+r"(ival));
 
-  uint32_t *p32 = (uint32_t *)((uintptr_t)p8 & ~0x3);
-  uint32_t ival = *p32;
   ival &= (~valmask);
   ival |= sval;
-  *p32 = ival;
+  /*
+    This 32-bit dependency injection does not appear to be needed with the
+    current GCC 10.3; however, that could change in the future versions. Or, I
+    may not have the right test for it to fail.
+  */
+  asm volatile ("" :"+r"(ival));
+  __builtin_memcpy(v32, &ival, sizeof(uint32_t));
   return val;
 }
 
 static inline __attribute__((always_inline))
 uint16_t mmu_set_uint16(uint16_t *p16, const uint16_t val) {
   ASSERT_RANGE_TEST_WRITE(p16);
-  uint32_t pos = ((uintptr_t)p16 & 0x3) * 8;
+  uint32_t pos = ((uint32_t)p16 & 3u) * 8u;
   uint32_t sval = val << pos;
-  uint32_t valmask =  0x0FFFF << pos;
+  uint32_t valmask =  0x0FFFFu << pos;
+
+  void *v32 = (void *)((uintptr_t)p16 & ~(uintptr_t)3u);
+  uint32_t ival;
+  __builtin_memcpy(&ival, v32, sizeof(uint32_t));
+  asm volatile ("" :"+r"(ival));
 
-  uint32_t *p32 = (uint32_t *)((uintptr_t)p16 & ~0x3);
-  uint32_t ival = *p32;
   ival &= (~valmask);
   ival |= sval;
-  *p32 = ival;
+  asm volatile ("" :"+r"(ival));
+  __builtin_memcpy(v32, &ival, sizeof(uint32_t));
   return val;
 }
 
@@ -185,15 +222,19 @@ static inline __attribute__((always_inline))
 int16_t mmu_set_int16(int16_t *p16, const int16_t val) {
   ASSERT_RANGE_TEST_WRITE(p16);
   uint32_t sval = (uint16_t)val;
-  uint32_t pos = ((uintptr_t)p16 & 0x3) * 8;
+  uint32_t pos = ((uint32_t)p16 & 3u) * 8u;
   sval <<= pos;
-  uint32_t valmask =  0x0FFFF << pos;
+  uint32_t valmask =  0x0FFFFu << pos;
+
+  void *v32 = (void *)((uintptr_t)p16 & ~(uintptr_t)3u);
+  uint32_t ival;
+  __builtin_memcpy(&ival, v32, sizeof(uint32_t));
+  asm volatile ("" :"+r"(ival));
 
-  uint32_t *p32 = (uint32_t *)((uintptr_t)p16 & ~0x3);
-  uint32_t ival = *p32;
   ival &= (~valmask);
   ival |= sval;
-  *p32 = ival;
+  asm volatile ("" :"+r"(ival));
+  __builtin_memcpy(v32, &ival, sizeof(uint32_t));
   return val;
 }
 
@@ -204,13 +245,13 @@ extern void _text_end(void);
 
 static inline __attribute__((always_inline))
 void *mmu_sec_heap(void) {
-  uint32_t sec_heap = (uint32_t)_text_end + 32;
-  return (void *)(sec_heap &= ~7);
+  uintptr_t sec_heap = (uintptr_t)_text_end + (uintptr_t)32u;
+  return (void *)(sec_heap &= ~(uintptr_t)7u);
 }
 
 static inline __attribute__((always_inline))
 size_t mmu_sec_heap_size(void) {
-  return (size_t)0xC000UL - ((size_t)mmu_sec_heap() - 0x40100000UL);
+  return (size_t)0xC000ul - ((uintptr_t)mmu_sec_heap() - (uintptr_t)0x40100000ul);
 }
 #endif
 
diff --git a/libraries/esp8266/examples/irammem/irammem.ino b/libraries/esp8266/examples/irammem/irammem.ino
index 169d53e05e..5bdf4d938a 100644
--- a/libraries/esp8266/examples/irammem/irammem.ino
+++ b/libraries/esp8266/examples/irammem/irammem.ino
@@ -14,6 +14,204 @@
 #define ETS_PRINTF ets_uart_printf
 #endif
 
+/*
+  Verify mmu_get_uint16()'s compliance with strict-aliasing rules under
+  different optimizations.
+*/
+
+#pragma GCC push_options
+// reference
+#pragma GCC optimize("O0")   // We expect -O0 to generate the correct results
+__attribute__((noinline))
+void aliasTestReference(uint16_t *x) {
+  // Without adhearance to strict-aliasing, this sequence of code would fail
+  // when optimized by GCC Version 10.3
+  size_t len = 3;
+	for (size_t u = 0; u < len; u++) {
+    uint16_t x1 = mmu_get_uint16(&x[0]);
+		for (size_t v = 0; v < len; v++) {
+      x[v] = mmu_get_uint16(&x[v]) + x1;
+		}
+	}
+}
+// Tests
+#pragma GCC optimize("Os")
+__attribute__((noinline))
+void aliasTestOs(uint16_t *x) {
+  size_t len = 3;
+	for (size_t u = 0; u < len; u++) {
+    uint16_t x1 = mmu_get_uint16(&x[0]);
+		for (size_t v = 0; v < len; v++) {
+      x[v] = mmu_get_uint16(&x[v]) + x1;
+		}
+	}
+}
+#pragma GCC optimize("O2")
+__attribute__((noinline))
+void aliasTestO2(uint16_t *x) {
+  size_t len = 3;
+	for (size_t u = 0; u < len; u++) {
+    uint16_t x1 = mmu_get_uint16(&x[0]);
+		for (size_t v = 0; v < len; v++) {
+      x[v] = mmu_get_uint16(&x[v]) + x1;
+		}
+	}
+}
+#pragma GCC optimize("O3")
+__attribute__((noinline))
+void aliasTestO3(uint16_t *x) {
+  size_t len = 3;
+	for (size_t u = 0; u < len; u++) {
+    uint16_t x1 = mmu_get_uint16(&x[0]);
+		for (size_t v = 0; v < len; v++) {
+      x[v] = mmu_get_uint16(&x[v]) + x1;
+		}
+	}
+}
+
+// Evaluate if optomizer may have changed 32-bit access to 8-bit.
+// 8-bit access will take longer as it will be processed thought
+// the exception handler. For this case the -O0 version will appear faster.
+#pragma GCC optimize("O0")
+__attribute__((noinline)) IRAM_ATTR
+uint32_t timedRead_Reference(uint8_t *res) {
+  // This test case was verified with GCC 10.3
+  // There is a code case that can result in 32-bit wide IRAM load from memory
+  // being optimized down to an 8-bit memory access. In this test case we need
+  // to supply a constant IRAM address that is not 0 when anded with 3u.
+  // This section verifies that the workaround implimented by the inline
+  // function mmu_get_uint8() is preventing this. See comments for function
+  // mmu_get_uint8(() in mmu_iram.h for more details.
+  const uint8_t *x = (const uint8_t *)0x40100003ul;
+  uint32_t b = ESP.getCycleCount();
+  *res = mmu_get_uint8(x);
+  return ESP.getCycleCount() - b;
+}
+#pragma GCC optimize("Os")
+__attribute__((noinline)) IRAM_ATTR
+uint32_t timedRead_Os(uint8_t *res) {
+  const uint8_t *x = (const uint8_t *)0x40100003ul;
+  uint32_t b = ESP.getCycleCount();
+  *res = mmu_get_uint8(x);
+  return ESP.getCycleCount() - b;
+}
+#pragma GCC optimize("O2")
+__attribute__((noinline)) IRAM_ATTR
+uint32_t timedRead_O2(uint8_t *res) {
+  const uint8_t *x = (const uint8_t *)0x40100003ul;
+  uint32_t b = ESP.getCycleCount();
+  *res = mmu_get_uint8(x);
+  return ESP.getCycleCount() - b;
+}
+#pragma GCC optimize("O3")
+__attribute__((noinline)) IRAM_ATTR
+uint32_t timedRead_O3(uint8_t *res) {
+  const uint8_t *x = (const uint8_t *)0x40100003ul;
+  uint32_t b = ESP.getCycleCount();
+  *res = mmu_get_uint8(x);
+  return ESP.getCycleCount() - b;
+}
+#pragma GCC pop_options
+
+bool test4_32bit_loads() {
+  bool result = true;
+  uint8_t res;
+  uint32_t cycle_count_ref, cycle_count;
+  Serial.printf("\r\nFor mmu_get_uint8, verify that 32-bit wide IRAM access is preserved across different optimizations:\r\n");
+  cycle_count_ref = timedRead_Reference(&res);
+  /*
+    If the optimizer (for options -Os, -O2, and -O3) replaces the 32-bit wide
+    IRAM access with an 8-bit, the exception handler will get invoked on memory
+    reads. The total execution time will show a significant increase when
+    compared to the reference (option -O0).
+  */
+  Serial.printf("  Option -O0, cycle count %5u - reference\r\n", cycle_count_ref);
+  cycle_count = timedRead_Os(&res);
+  Serial.printf("  Option -Os, cycle count %5u ", cycle_count);
+  if (cycle_count_ref > cycle_count) {
+    Serial.printf("- passed\r\n");
+  } else {
+    result = false;
+    Serial.printf("- failed\r\n");
+  }
+  cycle_count = timedRead_O2(&res);
+  Serial.printf("  Option -O2, cycle count %5u ", cycle_count);
+  if (cycle_count_ref > cycle_count) {
+    Serial.printf("- passed\r\n");
+  } else {
+    result = false;
+    Serial.printf("- failed\r\n");
+  }
+  cycle_count = timedRead_O3(&res);
+  Serial.printf("  Option -O3, cycle count %5u ", cycle_count);
+  if (cycle_count_ref > cycle_count) {
+    Serial.printf("- passed\r\n");
+  } else {
+    result = false;
+    Serial.printf("- failed\r\n");
+  }
+  return result;
+}
+
+void printPunFail(uint16_t *ref, uint16_t *x, size_t sz) {
+  Serial.printf("    Expected:");
+  for (size_t i = 0; i < sz; i++) {
+    Serial.printf(" %3u", ref[i]);
+  }
+  Serial.printf("\r\n    Got:     ");
+  for (size_t i = 0; i < sz; i++) {
+    Serial.printf(" %3u", x[i]);
+  }
+  Serial.printf("\r\n");
+}
+
+bool testPunning() {
+  bool result = true;
+  // Get reference result for verifing test
+  alignas(alignof(uint32_t)) uint16_t x_ref[] = {1, 2, 3, 0};
+  aliasTestReference(x_ref);  // -O0
+  Serial.printf("mmu_get_uint16() strict-aliasing tests with different optimizations:\r\n");
+
+  {
+    alignas(alignof(uint32_t)) uint16_t x[] = {1, 2, 3, 0};
+    aliasTestOs(x);
+    Serial.printf("  Option -Os ");
+    if (0 == memcmp(x_ref, x, sizeof(x_ref))) {
+      Serial.printf("- passed\r\n");
+    } else {
+      result = false;
+      Serial.printf("- failed\r\n");
+      printPunFail(x_ref, x, sizeof(x_ref)/sizeof(uint16_t));
+    }
+  }
+  {
+    alignas(alignof(uint32_t)) uint16_t x[] = {1, 2, 3, 0};
+    aliasTestO2(x);
+    Serial.printf("  Option -O2 ");
+    if (0 == memcmp(x_ref, x, sizeof(x_ref))) {
+      Serial.printf("- passed\r\n");
+    } else {
+      result = false;
+      Serial.printf("- failed\r\n");
+      printPunFail(x_ref, x, sizeof(x_ref)/sizeof(uint16_t));
+    }
+  }
+  {
+    alignas(alignof(uint32_t)) uint16_t x[] = {1, 2, 3, 0};
+    aliasTestO3(x);
+    Serial.printf("  Option -O3 ");
+    if (0 == memcmp(x_ref, x, sizeof(x_ref))) {
+      Serial.printf("- passed\r\n");
+    } else {
+      result = false;
+      Serial.printf("- failed\r\n");
+      printPunFail(x_ref, x, sizeof(x_ref)/sizeof(uint16_t));
+    }
+  }
+  return result;
+}
+
+
 uint32_t cyclesToRead_nKx32(int n, unsigned int *x, uint32_t *res) {
   uint32_t b = ESP.getCycleCount();
   uint32_t sum = 0;
@@ -94,8 +292,10 @@ uint32_t cyclesToWrite_nKx8(int n, unsigned char*x) {
   return ESP.getCycleCount() - b;
 }
 
+/*
+  Option "no-strict-aliasing" is required when using mmu_get... or mmu_set_...
+ */
 // Compare with Inline
-
 uint32_t cyclesToRead_nKx16_viaInline(int n, unsigned short *x, uint32_t *res) {
   uint32_t b = ESP.getCycleCount();
   uint32_t sum = 0;
@@ -159,6 +359,7 @@ uint32_t cyclesToWrite_nKx8_viaInline(int n, unsigned char*x) {
   return ESP.getCycleCount() - b;
 }
 
+
 bool perfTest_nK(int nK, uint32_t *mem, uint32_t *imem) {
   uint32_t res, verify_res;
   uint32_t t;
@@ -317,7 +518,7 @@ void setup() {
   Serial.println();
 
 
-  if (perfTest_nK(1, mem, imem)) {
+  if (perfTest_nK(1, mem, imem) && testPunning() && test4_32bit_loads()) {
     Serial.println();
   } else {
     Serial.println("\r\n*******************************");

From 0cc26d625aa8c321a465c3e34b568f856afb9b3d Mon Sep 17 00:00:00 2001
From: M Hightower <27247790+mhightower83@users.noreply.github.com>
Date: Mon, 23 Aug 2021 21:08:36 -0700
Subject: [PATCH 3/6] style

---
 cores/esp8266/mmu_iram.h                      | 12 +++---
 .../esp8266/examples/irammem/irammem.ino      | 40 +++++++++----------
 2 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/cores/esp8266/mmu_iram.h b/cores/esp8266/mmu_iram.h
index 9eb1247646..1aa734e326 100644
--- a/cores/esp8266/mmu_iram.h
+++ b/cores/esp8266/mmu_iram.h
@@ -146,7 +146,7 @@ uint8_t mmu_get_uint8(const void *p8) {
   // output register will stop the the compiler from reloading the value later
   // as 8-bit load from IRAM.
   asm volatile ("" :"+r"(val)); // inject 32-bit dependency
-  uint32_t pos = ((uint32_t)p8 & 3u) * 8u;
+  uint32_t pos = ((uintptr_t)p8 & 3u) * 8u;
   val >>= pos;
   return (uint8_t)val;
 }
@@ -158,7 +158,7 @@ uint16_t mmu_get_uint16(const uint16_t *p16) {
   uint32_t val;
   __builtin_memcpy(&val, v32, sizeof(uint32_t));
   asm volatile ("" :"+r"(val));
-  uint32_t pos = ((uint32_t)p16 & 3u) * 8u;
+  uint32_t pos = ((uintptr_t)p16 & 3u) * 8u;
   val >>= pos;
   return (uint16_t)val;
 }
@@ -170,7 +170,7 @@ int16_t mmu_get_int16(const int16_t *p16) {
   uint32_t val;
   __builtin_memcpy(&val, v32, sizeof(uint32_t));
   asm volatile ("" :"+r"(val));
-  uint32_t pos = ((uint32_t)p16 & 3u) * 8u;
+  uint32_t pos = ((uintptr_t)p16 & 3u) * 8u;
   val >>= pos;
   return (int16_t)val;
 }
@@ -178,7 +178,7 @@ int16_t mmu_get_int16(const int16_t *p16) {
 static inline __attribute__((always_inline))
 uint8_t mmu_set_uint8(void *p8, const uint8_t val) {
   ASSERT_RANGE_TEST_WRITE(p8);
-  uint32_t pos = ((uint32_t)p8 & 3u) * 8u;
+  uint32_t pos = ((uintptr_t)p8 & 3u) * 8u;
   uint32_t sval = val << pos;
   uint32_t valmask =  0x0FFu << pos;
 
@@ -202,7 +202,7 @@ uint8_t mmu_set_uint8(void *p8, const uint8_t val) {
 static inline __attribute__((always_inline))
 uint16_t mmu_set_uint16(uint16_t *p16, const uint16_t val) {
   ASSERT_RANGE_TEST_WRITE(p16);
-  uint32_t pos = ((uint32_t)p16 & 3u) * 8u;
+  uint32_t pos = ((uintptr_t)p16 & 3u) * 8u;
   uint32_t sval = val << pos;
   uint32_t valmask =  0x0FFFFu << pos;
 
@@ -222,7 +222,7 @@ static inline __attribute__((always_inline))
 int16_t mmu_set_int16(int16_t *p16, const int16_t val) {
   ASSERT_RANGE_TEST_WRITE(p16);
   uint32_t sval = (uint16_t)val;
-  uint32_t pos = ((uint32_t)p16 & 3u) * 8u;
+  uint32_t pos = ((uintptr_t)p16 & 3u) * 8u;
   sval <<= pos;
   uint32_t valmask =  0x0FFFFu << pos;
 
diff --git a/libraries/esp8266/examples/irammem/irammem.ino b/libraries/esp8266/examples/irammem/irammem.ino
index 5bdf4d938a..abee02888b 100644
--- a/libraries/esp8266/examples/irammem/irammem.ino
+++ b/libraries/esp8266/examples/irammem/irammem.ino
@@ -27,46 +27,46 @@ void aliasTestReference(uint16_t *x) {
   // Without adhearance to strict-aliasing, this sequence of code would fail
   // when optimized by GCC Version 10.3
   size_t len = 3;
-	for (size_t u = 0; u < len; u++) {
+  for (size_t u = 0; u < len; u++) {
     uint16_t x1 = mmu_get_uint16(&x[0]);
-		for (size_t v = 0; v < len; v++) {
+    for (size_t v = 0; v < len; v++) {
       x[v] = mmu_get_uint16(&x[v]) + x1;
-		}
-	}
+    }
+  }
 }
 // Tests
 #pragma GCC optimize("Os")
 __attribute__((noinline))
 void aliasTestOs(uint16_t *x) {
   size_t len = 3;
-	for (size_t u = 0; u < len; u++) {
+  for (size_t u = 0; u < len; u++) {
     uint16_t x1 = mmu_get_uint16(&x[0]);
-		for (size_t v = 0; v < len; v++) {
+    for (size_t v = 0; v < len; v++) {
       x[v] = mmu_get_uint16(&x[v]) + x1;
-		}
-	}
+    }
+  }
 }
 #pragma GCC optimize("O2")
 __attribute__((noinline))
 void aliasTestO2(uint16_t *x) {
   size_t len = 3;
-	for (size_t u = 0; u < len; u++) {
+  for (size_t u = 0; u < len; u++) {
     uint16_t x1 = mmu_get_uint16(&x[0]);
-		for (size_t v = 0; v < len; v++) {
+    for (size_t v = 0; v < len; v++) {
       x[v] = mmu_get_uint16(&x[v]) + x1;
-		}
-	}
+    }
+  }
 }
 #pragma GCC optimize("O3")
 __attribute__((noinline))
 void aliasTestO3(uint16_t *x) {
   size_t len = 3;
-	for (size_t u = 0; u < len; u++) {
+  for (size_t u = 0; u < len; u++) {
     uint16_t x1 = mmu_get_uint16(&x[0]);
-		for (size_t v = 0; v < len; v++) {
+    for (size_t v = 0; v < len; v++) {
       x[v] = mmu_get_uint16(&x[v]) + x1;
-		}
-	}
+    }
+  }
 }
 
 // Evaluate if optomizer may have changed 32-bit access to 8-bit.
@@ -181,7 +181,7 @@ bool testPunning() {
     } else {
       result = false;
       Serial.printf("- failed\r\n");
-      printPunFail(x_ref, x, sizeof(x_ref)/sizeof(uint16_t));
+      printPunFail(x_ref, x, sizeof(x_ref) / sizeof(uint16_t));
     }
   }
   {
@@ -193,7 +193,7 @@ bool testPunning() {
     } else {
       result = false;
       Serial.printf("- failed\r\n");
-      printPunFail(x_ref, x, sizeof(x_ref)/sizeof(uint16_t));
+      printPunFail(x_ref, x, sizeof(x_ref) / sizeof(uint16_t));
     }
   }
   {
@@ -205,7 +205,7 @@ bool testPunning() {
     } else {
       result = false;
       Serial.printf("- failed\r\n");
-      printPunFail(x_ref, x, sizeof(x_ref)/sizeof(uint16_t));
+      printPunFail(x_ref, x, sizeof(x_ref) / sizeof(uint16_t));
     }
   }
   return result;
@@ -294,7 +294,7 @@ uint32_t cyclesToWrite_nKx8(int n, unsigned char*x) {
 
 /*
   Option "no-strict-aliasing" is required when using mmu_get... or mmu_set_...
- */
+*/
 // Compare with Inline
 uint32_t cyclesToRead_nKx16_viaInline(int n, unsigned short *x, uint32_t *res) {
   uint32_t b = ESP.getCycleCount();

From 5df867f6f7c5d961f0df180fc56641dd4cef38d3 Mon Sep 17 00:00:00 2001
From: M Hightower <27247790+mhightower83@users.noreply.github.com>
Date: Thu, 2 Sep 2021 07:16:59 -0700
Subject: [PATCH 4/6] Removed stale comment

---
 libraries/esp8266/examples/irammem/irammem.ino | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/libraries/esp8266/examples/irammem/irammem.ino b/libraries/esp8266/examples/irammem/irammem.ino
index abee02888b..6dbf26c608 100644
--- a/libraries/esp8266/examples/irammem/irammem.ino
+++ b/libraries/esp8266/examples/irammem/irammem.ino
@@ -292,9 +292,6 @@ uint32_t cyclesToWrite_nKx8(int n, unsigned char*x) {
   return ESP.getCycleCount() - b;
 }
 
-/*
-  Option "no-strict-aliasing" is required when using mmu_get... or mmu_set_...
-*/
 // Compare with Inline
 uint32_t cyclesToRead_nKx16_viaInline(int n, unsigned short *x, uint32_t *res) {
   uint32_t b = ESP.getCycleCount();

From ebbac7bfbfc51f8cd317c34c8cb7911dd414385b Mon Sep 17 00:00:00 2001
From: M Hightower <27247790+mhightower83@users.noreply.github.com>
Date: Fri, 24 Sep 2021 11:40:26 -0700
Subject: [PATCH 5/6] Simplified use of aligas Corrected start of DRAM constant
 in mmu_is_dram(). Replaced #define(s) with const to properly limit scope.
 Compiler appears to optomize it down to the same size. In some places used ld
 variables and core-isa.h defines to set range checking values.

---
 cores/esp8266/mmu_iram.h                      | 35 ++++++++++++-------
 .../esp8266/examples/irammem/irammem.ino      |  2 +-
 2 files changed, 23 insertions(+), 14 deletions(-)

diff --git a/cores/esp8266/mmu_iram.h b/cores/esp8266/mmu_iram.h
index 1aa734e326..4acaf78acd 100644
--- a/cores/esp8266/mmu_iram.h
+++ b/cores/esp8266/mmu_iram.h
@@ -26,7 +26,7 @@
 extern "C" {
 #endif
 
-//C This turns on range checking. Is this the value you want to trigger it?
+// This turns on range checking.
 #ifdef DEBUG_ESP_CORE
 #define DEBUG_ESP_MMU
 #endif
@@ -47,6 +47,13 @@ extern "C" {
 
 #if defined(DEV_DEBUG_PRINT) || defined(DEBUG_ESP_MMU)
 #include <esp8266_peri.h>
+#include <sys/config.h> // For config/core-isa.h
+/*
+  Cautiously use XCHAL_..._VADDR values where possible.
+  While XCHAL_..._VADDR values in core-isa.h may define the Xtensa processor
+  CONFIG options, they are not always an indication of DRAM, IRAM, or ROM
+  size or position in the address space.
+*/
 
 #define DBG_MMU_FLUSH(a) while((USS(a) >> USTXC) & 0xff) {}
 
@@ -71,32 +78,34 @@ DBG_MMU_FLUSH(0)
 
 static inline __attribute__((always_inline))
 bool mmu_is_iram(const void *addr) {
-  #define IRAM_START 0x40100000UL
+  const uintptr_t iram_start = (uintptr_t)XCHAL_INSTRAM1_VADDR;
 #ifndef MMU_IRAM_SIZE
 #if defined(__GNUC__) && !defined(CORE_MOCK)
   #warning "MMU_IRAM_SIZE was undefined, setting to 0x8000UL!"
 #endif
-  #define MMU_IRAM_SIZE 0x8000UL
+  #define MMU_IRAM_SIZE 0x8000ul
 #endif
-  #define IRAM_END (IRAM_START + MMU_IRAM_SIZE)
+  const uintptr_t iram_end = iram_start + MMU_IRAM_SIZE;
 
-  return (IRAM_START <= (uintptr_t)addr && IRAM_END > (uintptr_t)addr);
+  return (iram_start <= (uintptr_t)addr && iram_end > (uintptr_t)addr);
 }
 
 static inline __attribute__((always_inline))
 bool mmu_is_dram(const void *addr) {
-  #define DRAM_START 0x3FF80000UL
-  #define DRAM_END 0x40000000UL
+  const uintptr_t dram_start = 0x3FFE8000ul;
+  // The start of the Boot ROM sits at the end of DRAM. 0x40000000ul;
+  const uintptr_t dram_end = (uintptr_t)XCHAL_INSTRAM0_VADDR;
 
-  return (DRAM_START <= (uintptr_t)addr && DRAM_END > (uintptr_t)addr);
+  return (dram_start <= (uintptr_t)addr && dram_end > (uintptr_t)addr);
 }
 
 static inline __attribute__((always_inline))
 bool mmu_is_icache(const void *addr) {
-  #define ICACHE_START 0x40200000UL
-  #define ICACHE_END (ICACHE_START + 0x100000UL)
+  extern void _irom0_text_end(void);
+  const uintptr_t icache_start = (uintptr_t)XCHAL_INSTROM0_VADDR;
+  const uintptr_t icache_end = (uintptr_t)_irom0_text_end;
 
-  return (ICACHE_START <= (uintptr_t)addr && ICACHE_END > (uintptr_t)addr);
+  return (icache_start <= (uintptr_t)addr && icache_end > (uintptr_t)addr);
 }
 
 #ifdef DEBUG_ESP_MMU
@@ -239,19 +248,19 @@ int16_t mmu_set_int16(int16_t *p16, const int16_t val) {
 }
 
 #if (MMU_IRAM_SIZE > 32*1024) && !defined(MMU_SEC_HEAP)
-extern void _text_end(void);
 #define MMU_SEC_HEAP mmu_sec_heap()
 #define MMU_SEC_HEAP_SIZE mmu_sec_heap_size()
 
 static inline __attribute__((always_inline))
 void *mmu_sec_heap(void) {
+  extern void _text_end(void);
   uintptr_t sec_heap = (uintptr_t)_text_end + (uintptr_t)32u;
   return (void *)(sec_heap &= ~(uintptr_t)7u);
 }
 
 static inline __attribute__((always_inline))
 size_t mmu_sec_heap_size(void) {
-  return (size_t)0xC000ul - ((uintptr_t)mmu_sec_heap() - (uintptr_t)0x40100000ul);
+  return (size_t)0xC000ul - ((uintptr_t)mmu_sec_heap() - (uintptr_t)XCHAL_INSTRAM1_VADDR);
 }
 #endif
 
diff --git a/libraries/esp8266/examples/irammem/irammem.ino b/libraries/esp8266/examples/irammem/irammem.ino
index 6dbf26c608..dccc34a6a4 100644
--- a/libraries/esp8266/examples/irammem/irammem.ino
+++ b/libraries/esp8266/examples/irammem/irammem.ino
@@ -168,7 +168,7 @@ void printPunFail(uint16_t *ref, uint16_t *x, size_t sz) {
 bool testPunning() {
   bool result = true;
   // Get reference result for verifing test
-  alignas(alignof(uint32_t)) uint16_t x_ref[] = {1, 2, 3, 0};
+  alignas(uint32_t) uint16_t x_ref[] = {1, 2, 3, 0};
   aliasTestReference(x_ref);  // -O0
   Serial.printf("mmu_get_uint16() strict-aliasing tests with different optimizations:\r\n");
 

From 9814f70ea0c3126248fd3247b10b0c8ff25063f3 Mon Sep 17 00:00:00 2001
From: M Hightower <27247790+mhightower83@users.noreply.github.com>
Date: Fri, 24 Sep 2021 18:41:02 -0700
Subject: [PATCH 6/6] Added CORE_MOCK #if. Updated related examples.

---
 cores/esp8266/mmu_iram.h                       | 18 +++++++++++-------
 .../examples/IramReserve/IramReserve.ino       | 12 +++++++++---
 libraries/esp8266/examples/MMU48K/MMU48K.ino   |  8 +++++++-
 3 files changed, 27 insertions(+), 11 deletions(-)

diff --git a/cores/esp8266/mmu_iram.h b/cores/esp8266/mmu_iram.h
index 4acaf78acd..f7b62ba8c2 100644
--- a/cores/esp8266/mmu_iram.h
+++ b/cores/esp8266/mmu_iram.h
@@ -33,6 +33,17 @@ extern "C" {
 
 #if defined(CORE_MOCK)
 #define ets_uart_printf(...) do {} while(false)
+#define XCHAL_INSTRAM0_VADDR		0x40000000
+#define XCHAL_INSTRAM1_VADDR		0x40100000
+#define XCHAL_INSTROM0_VADDR		0x40200000
+#else
+#include <sys/config.h> // For config/core-isa.h
+/*
+  Cautiously use XCHAL_..._VADDR values where possible.
+  While XCHAL_..._VADDR values in core-isa.h may define the Xtensa processor
+  CONFIG options, they are not always an indication of DRAM, IRAM, or ROM
+  size or position in the address space.
+*/
 #endif
 
 /*
@@ -47,13 +58,6 @@ extern "C" {
 
 #if defined(DEV_DEBUG_PRINT) || defined(DEBUG_ESP_MMU)
 #include <esp8266_peri.h>
-#include <sys/config.h> // For config/core-isa.h
-/*
-  Cautiously use XCHAL_..._VADDR values where possible.
-  While XCHAL_..._VADDR values in core-isa.h may define the Xtensa processor
-  CONFIG options, they are not always an indication of DRAM, IRAM, or ROM
-  size or position in the address space.
-*/
 
 #define DBG_MMU_FLUSH(a) while((USS(a) >> USTXC) & 0xff) {}
 
diff --git a/libraries/esp8266/examples/IramReserve/IramReserve.ino b/libraries/esp8266/examples/IramReserve/IramReserve.ino
index 344622f958..96d7479da4 100644
--- a/libraries/esp8266/examples/IramReserve/IramReserve.ino
+++ b/libraries/esp8266/examples/IramReserve/IramReserve.ino
@@ -17,6 +17,12 @@
 #include <umm_malloc/umm_malloc.h>
 #if defined(UMM_HEAP_IRAM)
 
+#if defined(CORE_MOCK)
+#define XCHAL_INSTRAM1_VADDR		0x40100000
+#else
+#include <sys/config.h> // For config/core-isa.h
+#endif
+
 // durable - as in long life, persisting across reboots.
 struct durable {
   uint32_t bootCounter;
@@ -30,7 +36,7 @@ struct durable {
 #define IRAM_RESERVE_SZ ((sizeof(struct durable) + 7UL) & ~7UL)
 
 // Position its address just above the reduced 2nd Heap.
-#define IRAM_RESERVE (0x40100000UL + 0xC000UL - IRAM_RESERVE_SZ)
+#define IRAM_RESERVE ((uintptr_t)XCHAL_INSTRAM1_VADDR + 0xC000UL - IRAM_RESERVE_SZ)
 
 // Define a reference with the right properties to make access easier.
 #define DURABLE ((struct durable *)IRAM_RESERVE)
@@ -100,9 +106,9 @@ extern "C" void umm_init_iram(void) {
     adjustments and checksums. These can affect the persistence of data across
     reboots.
   */
-  uint32_t sec_heap = (uint32_t)_text_end + 32;
+  uintptr_t sec_heap = (uintptr_t)_text_end + 32;
   sec_heap &= ~7;
-  size_t sec_heap_sz = 0xC000UL - (sec_heap - 0x40100000UL);
+  size_t sec_heap_sz = 0xC000UL - (sec_heap - (uintptr_t)XCHAL_INSTRAM1_VADDR);
   sec_heap_sz -= IRAM_RESERVE_SZ; // Shrink IRAM heap
   if (0xC000UL > sec_heap_sz) {
 
diff --git a/libraries/esp8266/examples/MMU48K/MMU48K.ino b/libraries/esp8266/examples/MMU48K/MMU48K.ino
index 6acb3840f9..d75d91232b 100644
--- a/libraries/esp8266/examples/MMU48K/MMU48K.ino
+++ b/libraries/esp8266/examples/MMU48K/MMU48K.ino
@@ -3,6 +3,12 @@
 #include <umm_malloc/umm_malloc.h>
 #include <umm_malloc/umm_heap_select.h>
 
+#if defined(CORE_MOCK)
+#define XCHAL_INSTRAM1_VADDR		0x40100000
+#else
+#include <sys/config.h> // For config/core-isa.h
+#endif
+
 uint32_t timed_byte_read(char *pc, uint32_t * o);
 uint32_t timed_byte_read2(char *pc, uint32_t * o);
 int divideA_B(int a, int b);
@@ -102,7 +108,7 @@ void print_mmu_status(Print& oStream) {
 #ifdef MMU_IRAM_SIZE
   oStream.printf_P(PSTR("  IRAM Size:               %u"), MMU_IRAM_SIZE);
   oStream.println();
-  const uint32_t iram_free = MMU_IRAM_SIZE - (uint32_t)((uintptr_t)_text_end - 0x40100000UL);
+  const uint32_t iram_free = MMU_IRAM_SIZE - (uint32_t)((uintptr_t)_text_end - (uintptr_t)XCHAL_INSTRAM1_VADDR);
   oStream.printf_P(PSTR("  IRAM free:               %u"), iram_free);
   oStream.println();
 #endif

<!DOCTYPE html PUBLIC '-//W3C//DTD XHTML 1.0 Transitional//EN' 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'>
<html xmlns='http://www.w3.org/1999/xhtml'>
<head>
<title>pFad - Phonifier reborn</title>
<meta http-equiv='Content-Type' content='text/html; charset=utf-8' />
</head>
<body>
<h1>Pfad - The Proxy pFad of &#169; 2024 Garber Painting. All rights reserved.</h1>


<!-- Disclaimer -->
<p>Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.</p>
<br>
<p>Alternative Proxies:</p><p><a href="http://clevelandohioweatherforecast.com/php-proxy/index.php?q=https://patch-diff.githubusercontent.com/raw/esp8266/Arduino/pull/8290.patch" target="_blank">Alternative Proxy</a></p><p><a href="http://clevelandohioweatherforecast.com/pFad/index.php?u=https://patch-diff.githubusercontent.com/raw/esp8266/Arduino/pull/8290.patch" target="_blank">pFad Proxy</a></p><p><a href="http://clevelandohioweatherforecast.com/pFad/v3index.php?u=https://patch-diff.githubusercontent.com/raw/esp8266/Arduino/pull/8290.patch" target="_blank">pFad v3 Proxy</a></p><p><a href="http://clevelandohioweatherforecast.com/pFad/v4index.php?u=https://patch-diff.githubusercontent.com/raw/esp8266/Arduino/pull/8290.patch" target="_blank">pFad v4 Proxy</a></p></body>
</html>