From b8d31a59eec6b27b900a6d51d602a6d6f47ac4d4 Mon Sep 17 00:00:00 2001 From: Roman Leonov Date: Tue, 19 Nov 2024 11:28:04 +0100 Subject: [PATCH 1/5] feature(dcd_dwc2): Added cache synchronization --- src/portable/synopsys/dwc2/dcd_dwc2.c | 27 ++++++++++++++++++++++++- src/portable/synopsys/dwc2/dwc2_esp32.h | 27 +++++++++++++++++++++++++ 2 files changed, 53 insertions(+), 1 deletion(-) diff --git a/src/portable/synopsys/dwc2/dcd_dwc2.c b/src/portable/synopsys/dwc2/dcd_dwc2.c index 2b3ef096f9..1addd28f9f 100644 --- a/src/portable/synopsys/dwc2/dcd_dwc2.c +++ b/src/portable/synopsys/dwc2/dcd_dwc2.c @@ -47,7 +47,20 @@ // MACRO TYPEDEF CONSTANT ENUM //--------------------------------------------------------------------+ +#ifdef DWC2_MEM_CACHE_LINE_SIZE +CFG_TUD_MEM_SECTION struct { + union { + uint32_t data[2]; + uint8_t buffer[DWC2_MEM_CACHE_LINE_SIZE]; + }; +} _cache_aligned_setup_packet; + +#define _setup_packet _cache_aligned_setup_packet.data +#define _sizeof_setup_packet() DWC2_MEM_CACHE_LINE_SIZE +#else static CFG_TUD_MEM_SECTION TU_ATTR_ALIGNED(4) uint32_t _setup_packet[2]; +#define _sizeof_setup_packet() sizeof(_setup_packet) +#endif // DWC2_MEM_CACHE_LINE_SIZE typedef struct { uint8_t* buffer; @@ -348,6 +361,11 @@ static void edpt_schedule_packets(uint8_t rhport, const uint8_t epnum, const uin const bool is_dma = dma_device_enabled(dwc2); if(is_dma) { + if (dir == TUSB_DIR_IN && total_bytes != 0) { + // CACHE HINT + // The xfer->buffer has new data for Host, move it to memory for DMA to transfer it + dcd_dcache_clean(xfer->buffer, total_bytes); + } dep->diepdma = (uintptr_t) xfer->buffer; } @@ -847,6 +865,11 @@ static void handle_epout_dma(uint8_t rhport, uint8_t epnum, dwc2_doepint_t doepi if (doepint_bm.setup_phase_done) { dma_setup_prepare(rhport); + // CACHE HINT + // When cache is enabled, _setup_packet must have cache line size alignment + // and there should be no valuable data in memory after. + // Thus, specific struct is used as a buffer for setup packet data + dcd_dcache_invalidate((uint8_t*) _setup_packet, _sizeof_setup_packet()); dcd_event_setup_received(rhport, (uint8_t*) _setup_packet, true); return; } @@ -872,7 +895,9 @@ static void handle_epout_dma(uint8_t rhport, uint8_t epnum, dwc2_doepint_t doepi if(epnum == 0 && xfer->total_len == 0) { dma_setup_prepare(rhport); } - + // CACHE HINT + // Some data has been received by DMA, fetch the data from memory to cache + dcd_dcache_invalidate(xfer->buffer, xfer->total_len); dcd_event_xfer_complete(rhport, epnum, xfer->total_len, XFER_RESULT_SUCCESS, true); } } diff --git a/src/portable/synopsys/dwc2/dwc2_esp32.h b/src/portable/synopsys/dwc2/dwc2_esp32.h index 42ab4b80f3..c7f1f78e95 100644 --- a/src/portable/synopsys/dwc2/dwc2_esp32.h +++ b/src/portable/synopsys/dwc2/dwc2_esp32.h @@ -39,6 +39,14 @@ #include "soc/periph_defs.h" #include "soc/usb_wrap_struct.h" +#if (CFG_TUD_DWC2_DMA_ENABLE && SOC_CACHE_INTERNAL_MEM_VIA_L1CACHE) +#include "sdkconfig.h" +#include "esp_cache.h" +#include "esp_log.h" + +#define DWC2_MEM_CACHE_LINE_SIZE CONFIG_CACHE_L1_CACHE_LINE_SIZE +#endif // SOC_CACHE_INTERNAL_MEM_VIA_L1CACHE + #if TU_CHECK_MCU(OPT_MCU_ESP32S2, OPT_MCU_ESP32S3) #define DWC2_FS_REG_BASE 0x60080000UL #define DWC2_EP_MAX 7 @@ -111,6 +119,25 @@ TU_ATTR_ALWAYS_INLINE static inline void dwc2_phy_update(dwc2_regs_t* dwc2, uint // maybe usb_utmi_hal_disable() } +#if (CFG_TUD_DWC2_DMA_ENABLE && SOC_CACHE_INTERNAL_MEM_VIA_L1CACHE) +void dcd_dcache_clean(void const* addr, uint32_t data_size) { + int flags = ESP_CACHE_MSYNC_FLAG_DIR_C2M | ESP_CACHE_MSYNC_FLAG_UNALIGNED; + if (addr != NULL && data_size) { + esp_err_t ret = esp_cache_msync((void *) addr, data_size, flags); + assert(ret == ESP_OK); + } +} + +void dcd_dcache_invalidate(void const* addr, uint32_t data_size) { + int flags = ESP_CACHE_MSYNC_FLAG_DIR_M2C; + if (addr != NULL && data_size) { + data_size = (data_size < DWC2_MEM_CACHE_LINE_SIZE)? DWC2_MEM_CACHE_LINE_SIZE : data_size; + esp_err_t ret = esp_cache_msync((void *) addr, data_size, flags); + assert(ret == ESP_OK); + } +} +#endif // CFG_TUD_DWC2_DMA_ENABLE && SOC_CACHE_INTERNAL_MEM_VIA_L1CACHE + #ifdef __cplusplus } #endif From 43a45f29cd14bad681be3bb3928ea7e0453cc886 Mon Sep 17 00:00:00 2001 From: Roman Leonov Date: Tue, 19 Nov 2024 11:28:04 +0100 Subject: [PATCH 2/5] feature(dcd_dwc2): Added cache synchronization --- src/portable/synopsys/dwc2/dcd_dwc2.c | 27 +------------------------ src/portable/synopsys/dwc2/dwc2_esp32.h | 27 ------------------------- 2 files changed, 1 insertion(+), 53 deletions(-) diff --git a/src/portable/synopsys/dwc2/dcd_dwc2.c b/src/portable/synopsys/dwc2/dcd_dwc2.c index 1addd28f9f..2b3ef096f9 100644 --- a/src/portable/synopsys/dwc2/dcd_dwc2.c +++ b/src/portable/synopsys/dwc2/dcd_dwc2.c @@ -47,20 +47,7 @@ // MACRO TYPEDEF CONSTANT ENUM //--------------------------------------------------------------------+ -#ifdef DWC2_MEM_CACHE_LINE_SIZE -CFG_TUD_MEM_SECTION struct { - union { - uint32_t data[2]; - uint8_t buffer[DWC2_MEM_CACHE_LINE_SIZE]; - }; -} _cache_aligned_setup_packet; - -#define _setup_packet _cache_aligned_setup_packet.data -#define _sizeof_setup_packet() DWC2_MEM_CACHE_LINE_SIZE -#else static CFG_TUD_MEM_SECTION TU_ATTR_ALIGNED(4) uint32_t _setup_packet[2]; -#define _sizeof_setup_packet() sizeof(_setup_packet) -#endif // DWC2_MEM_CACHE_LINE_SIZE typedef struct { uint8_t* buffer; @@ -361,11 +348,6 @@ static void edpt_schedule_packets(uint8_t rhport, const uint8_t epnum, const uin const bool is_dma = dma_device_enabled(dwc2); if(is_dma) { - if (dir == TUSB_DIR_IN && total_bytes != 0) { - // CACHE HINT - // The xfer->buffer has new data for Host, move it to memory for DMA to transfer it - dcd_dcache_clean(xfer->buffer, total_bytes); - } dep->diepdma = (uintptr_t) xfer->buffer; } @@ -865,11 +847,6 @@ static void handle_epout_dma(uint8_t rhport, uint8_t epnum, dwc2_doepint_t doepi if (doepint_bm.setup_phase_done) { dma_setup_prepare(rhport); - // CACHE HINT - // When cache is enabled, _setup_packet must have cache line size alignment - // and there should be no valuable data in memory after. - // Thus, specific struct is used as a buffer for setup packet data - dcd_dcache_invalidate((uint8_t*) _setup_packet, _sizeof_setup_packet()); dcd_event_setup_received(rhport, (uint8_t*) _setup_packet, true); return; } @@ -895,9 +872,7 @@ static void handle_epout_dma(uint8_t rhport, uint8_t epnum, dwc2_doepint_t doepi if(epnum == 0 && xfer->total_len == 0) { dma_setup_prepare(rhport); } - // CACHE HINT - // Some data has been received by DMA, fetch the data from memory to cache - dcd_dcache_invalidate(xfer->buffer, xfer->total_len); + dcd_event_xfer_complete(rhport, epnum, xfer->total_len, XFER_RESULT_SUCCESS, true); } } diff --git a/src/portable/synopsys/dwc2/dwc2_esp32.h b/src/portable/synopsys/dwc2/dwc2_esp32.h index c7f1f78e95..42ab4b80f3 100644 --- a/src/portable/synopsys/dwc2/dwc2_esp32.h +++ b/src/portable/synopsys/dwc2/dwc2_esp32.h @@ -39,14 +39,6 @@ #include "soc/periph_defs.h" #include "soc/usb_wrap_struct.h" -#if (CFG_TUD_DWC2_DMA_ENABLE && SOC_CACHE_INTERNAL_MEM_VIA_L1CACHE) -#include "sdkconfig.h" -#include "esp_cache.h" -#include "esp_log.h" - -#define DWC2_MEM_CACHE_LINE_SIZE CONFIG_CACHE_L1_CACHE_LINE_SIZE -#endif // SOC_CACHE_INTERNAL_MEM_VIA_L1CACHE - #if TU_CHECK_MCU(OPT_MCU_ESP32S2, OPT_MCU_ESP32S3) #define DWC2_FS_REG_BASE 0x60080000UL #define DWC2_EP_MAX 7 @@ -119,25 +111,6 @@ TU_ATTR_ALWAYS_INLINE static inline void dwc2_phy_update(dwc2_regs_t* dwc2, uint // maybe usb_utmi_hal_disable() } -#if (CFG_TUD_DWC2_DMA_ENABLE && SOC_CACHE_INTERNAL_MEM_VIA_L1CACHE) -void dcd_dcache_clean(void const* addr, uint32_t data_size) { - int flags = ESP_CACHE_MSYNC_FLAG_DIR_C2M | ESP_CACHE_MSYNC_FLAG_UNALIGNED; - if (addr != NULL && data_size) { - esp_err_t ret = esp_cache_msync((void *) addr, data_size, flags); - assert(ret == ESP_OK); - } -} - -void dcd_dcache_invalidate(void const* addr, uint32_t data_size) { - int flags = ESP_CACHE_MSYNC_FLAG_DIR_M2C; - if (addr != NULL && data_size) { - data_size = (data_size < DWC2_MEM_CACHE_LINE_SIZE)? DWC2_MEM_CACHE_LINE_SIZE : data_size; - esp_err_t ret = esp_cache_msync((void *) addr, data_size, flags); - assert(ret == ESP_OK); - } -} -#endif // CFG_TUD_DWC2_DMA_ENABLE && SOC_CACHE_INTERNAL_MEM_VIA_L1CACHE - #ifdef __cplusplus } #endif From 4da5de707b53389c8fe35f6b448f3d1bfe7c91e8 Mon Sep 17 00:00:00 2001 From: hathach Date: Wed, 20 Nov 2024 18:03:42 +0700 Subject: [PATCH 3/5] have p4 dma somewhat working but having issue with buffer that does not occupy the whole cache line --- .../components/tinyusb_src/CMakeLists.txt | 8 ++++ src/device/dcd.h | 6 +-- src/device/usbd.c | 20 ++++++--- src/portable/synopsys/dwc2/dcd_dwc2.c | 42 ++++++++++++++---- src/portable/synopsys/dwc2/dwc2_common.c | 2 + src/portable/synopsys/dwc2/dwc2_esp32.h | 44 +++++++++++++++++++ 6 files changed, 104 insertions(+), 18 deletions(-) diff --git a/hw/bsp/espressif/components/tinyusb_src/CMakeLists.txt b/hw/bsp/espressif/components/tinyusb_src/CMakeLists.txt index 900f620fd8..9f55c8d5a7 100644 --- a/hw/bsp/espressif/components/tinyusb_src/CMakeLists.txt +++ b/hw/bsp/espressif/components/tinyusb_src/CMakeLists.txt @@ -15,6 +15,13 @@ list(APPEND compile_definitions BOARD_TUH_MAX_SPEED=${RHPORT_HOST_SPEED} ) +if (target STREQUAL esp32p4) + # P4 change alignment to 64 (DCache line size) for possible DMA configuration + list(APPEND compile_definitions + CFG_TUSB_MEM_ALIGN=__attribute__\(\(aligned\(64\)\)\) + ) +endif () + list(APPEND srcs # common ${tusb_src}/tusb.c @@ -68,6 +75,7 @@ endif() idf_component_register(SRCS ${srcs} INCLUDE_DIRS ${tusb_src} REQUIRES src + PRIV_REQUIRES esp_mm ) target_compile_definitions(${COMPONENT_LIB} PUBLIC ${compile_definitions}) diff --git a/src/device/dcd.h b/src/device/dcd.h index d01f82e01c..0ecdec4ed5 100644 --- a/src/device/dcd.h +++ b/src/device/dcd.h @@ -93,15 +93,15 @@ typedef struct TU_ATTR_ALIGNED(4) { // clean/flush data cache: write cache -> memory. // Required before an DMA TX transfer to make sure data is in memory -void dcd_dcache_clean(void const* addr, uint32_t data_size) TU_ATTR_WEAK; +void dcd_dcache_clean(const void* addr, uint32_t data_size); // invalidate data cache: mark cache as invalid, next read will read from memory // Required BOTH before and after an DMA RX transfer -void dcd_dcache_invalidate(void const* addr, uint32_t data_size) TU_ATTR_WEAK; +void dcd_dcache_invalidate(const void* addr, uint32_t data_size); // clean and invalidate data cache // Required before an DMA transfer where memory is both read/write by DMA -void dcd_dcache_clean_invalidate(void const* addr, uint32_t data_size) TU_ATTR_WEAK; +void dcd_dcache_clean_invalidate(const void* addr, uint32_t data_size); //--------------------------------------------------------------------+ // Controller API diff --git a/src/device/usbd.c b/src/device/usbd.c index a730b745b8..f485b6872e 100644 --- a/src/device/usbd.c +++ b/src/device/usbd.c @@ -46,9 +46,7 @@ // Weak stubs: invoked if no strong implementation is available //--------------------------------------------------------------------+ TU_ATTR_WEAK void tud_event_hook_cb(uint8_t rhport, uint32_t eventid, bool in_isr) { - (void) rhport; - (void) eventid; - (void) in_isr; + (void) rhport; (void) eventid; (void) in_isr; } TU_ATTR_WEAK void tud_sof_cb(uint32_t frame_count) { @@ -82,9 +80,7 @@ TU_ATTR_WEAK void tud_resume_cb(void) { } TU_ATTR_WEAK bool tud_vendor_control_xfer_cb(uint8_t rhport, uint8_t stage, tusb_control_request_t const* request) { - (void) rhport; - (void) stage; - (void) request; + (void) rhport; (void) stage; (void) request; return false; } @@ -101,6 +97,18 @@ TU_ATTR_WEAK void dcd_disconnect(uint8_t rhport) { (void) rhport; } +TU_ATTR_WEAK void dcd_dcache_clean(const void* addr, uint32_t data_size) { + (void) addr; (void) data_size; +} + +TU_ATTR_WEAK void dcd_dcache_invalidate(const void* addr, uint32_t data_size) { + (void) addr; (void) data_size; +} + +TU_ATTR_WEAK void dcd_dcache_clean_invalidate(const void* addr, uint32_t data_size) { + (void) addr; (void) data_size; +} + //--------------------------------------------------------------------+ // Device Data //--------------------------------------------------------------------+ diff --git a/src/portable/synopsys/dwc2/dcd_dwc2.c b/src/portable/synopsys/dwc2/dcd_dwc2.c index 2b3ef096f9..a4ebacfcb2 100644 --- a/src/portable/synopsys/dwc2/dcd_dwc2.c +++ b/src/portable/synopsys/dwc2/dcd_dwc2.c @@ -46,8 +46,7 @@ //--------------------------------------------------------------------+ // MACRO TYPEDEF CONSTANT ENUM //--------------------------------------------------------------------+ - -static CFG_TUD_MEM_SECTION TU_ATTR_ALIGNED(4) uint32_t _setup_packet[2]; +static CFG_TUD_MEM_SECTION CFG_TUD_MEM_ALIGN uint32_t _setup_packet[2]; typedef struct { uint8_t* buffer; @@ -73,6 +72,25 @@ static bool _sof_en; //-------------------------------------------------------------------- // DMA //-------------------------------------------------------------------- +#if DWC2_ENABLE_MEM_CACHE +void dcd_dcache_clean(const void* addr, uint32_t data_size) { + if (addr && data_size) { + dwc2_dcache_clean(addr, data_size); + } +} + +void dcd_dcache_invalidate(const void* addr, uint32_t data_size) { + if (addr && data_size) { + dwc2_dcache_invalidate(addr, data_size); + } +} + +void dcd_dcache_clean_invalidate(const void* addr, uint32_t data_size) { + if (addr && data_size) { + dwc2_dcache_clean_invalidate(addr, data_size); + } +} +#endif TU_ATTR_ALWAYS_INLINE static inline bool dma_device_enabled(const dwc2_regs_t* dwc2) { (void) dwc2; @@ -180,7 +198,7 @@ static bool dfifo_alloc(uint8_t rhport, uint8_t ep_addr, uint16_t packet_size) { // Check if free space is available TU_ASSERT(_dfifo_top >= fifo_size + dwc2->grxfsiz); _dfifo_top -= fifo_size; - TU_LOG(DWC2_DEBUG, " TX FIFO %u: allocated %u words at offset %u\r\n", epnum, fifo_size, _dfifo_top); + // TU_LOG(DWC2_DEBUG, " TX FIFO %u: allocated %u words at offset %u\r\n", epnum, fifo_size, _dfifo_top); // Both TXFD and TXSA are in unit of 32-bit words. if (epnum == 0) { @@ -348,14 +366,18 @@ static void edpt_schedule_packets(uint8_t rhport, const uint8_t epnum, const uin const bool is_dma = dma_device_enabled(dwc2); if(is_dma) { + if (dir == TUSB_DIR_IN && total_bytes != 0) { + dcd_dcache_clean(xfer->buffer, total_bytes); + } dep->diepdma = (uintptr_t) xfer->buffer; - } - - dep->diepctl = depctl.value; // enable endpoint + dep->diepctl = depctl.value; // enable endpoint + } else { + dep->diepctl = depctl.value; // enable endpoint - // Slave: enable tx fifo empty interrupt only if there is data. Note must after depctl enable - if (!is_dma && dir == TUSB_DIR_IN && total_bytes != 0) { - dwc2->diepempmsk |= (1 << epnum); + // Enable tx fifo empty interrupt only if there is data. Note must after depctl enable + if (dir == TUSB_DIR_IN && total_bytes != 0) { + dwc2->diepempmsk |= (1 << epnum); + } } } @@ -847,6 +869,7 @@ static void handle_epout_dma(uint8_t rhport, uint8_t epnum, dwc2_doepint_t doepi if (doepint_bm.setup_phase_done) { dma_setup_prepare(rhport); + dcd_dcache_invalidate(_setup_packet, 8); dcd_event_setup_received(rhport, (uint8_t*) _setup_packet, true); return; } @@ -873,6 +896,7 @@ static void handle_epout_dma(uint8_t rhport, uint8_t epnum, dwc2_doepint_t doepi dma_setup_prepare(rhport); } + dcd_dcache_invalidate(xfer->buffer, xfer->total_len); dcd_event_xfer_complete(rhport, epnum, xfer->total_len, XFER_RESULT_SUCCESS, true); } } diff --git a/src/portable/synopsys/dwc2/dwc2_common.c b/src/portable/synopsys/dwc2/dwc2_common.c index ef155c8f78..f80ae9acbc 100644 --- a/src/portable/synopsys/dwc2/dwc2_common.c +++ b/src/portable/synopsys/dwc2/dwc2_common.c @@ -229,6 +229,8 @@ bool dwc2_core_init(uint8_t rhport, bool is_highspeed, bool is_dma) { dwc2->gotgint = 0xFFFFFFFFU; dwc2->gintmsk = 0; + TU_LOG(DWC2_COMMON_DEBUG, "DMA = %u\r\n", is_dma); + if (is_dma) { // DMA seems to be only settable after a core reset, and not possible to switch on-the-fly dwc2->gahbcfg |= GAHBCFG_DMAEN | GAHBCFG_HBSTLEN_2; diff --git a/src/portable/synopsys/dwc2/dwc2_esp32.h b/src/portable/synopsys/dwc2/dwc2_esp32.h index 42ab4b80f3..24ff80bce8 100644 --- a/src/portable/synopsys/dwc2/dwc2_esp32.h +++ b/src/portable/synopsys/dwc2/dwc2_esp32.h @@ -111,6 +111,50 @@ TU_ATTR_ALWAYS_INLINE static inline void dwc2_phy_update(dwc2_regs_t* dwc2, uint // maybe usb_utmi_hal_disable() } +//--------------------------------------------------------------------+ +// Data Cache +//--------------------------------------------------------------------+ +#if defined(SOC_CACHE_INTERNAL_MEM_VIA_L1CACHE) && SOC_CACHE_INTERNAL_MEM_VIA_L1CACHE +#include "sdkconfig.h" +#include "hal/cache_hal.h" +#include "esp_cache.h" +#include "esp_log.h" + +#define DWC2_MEM_CACHE_LINE_SIZE CONFIG_CACHE_L1_CACHE_LINE_SIZE +#define DWC2_ENABLE_MEM_CACHE 1 + +TU_ATTR_ALWAYS_INLINE static inline uint32_t round_up_to_cache_line_size(uint32_t size) { + if (size & (CONFIG_CACHE_L1_CACHE_LINE_SIZE-1)) { + size = (size & ~(CONFIG_CACHE_L1_CACHE_LINE_SIZE-1)) + CONFIG_CACHE_L1_CACHE_LINE_SIZE; + } + return size; +} + +TU_ATTR_ALWAYS_INLINE static inline void dwc2_dcache_clean(const void* addr, uint32_t data_size) { + // round up to cache line size + const int flag = ESP_CACHE_MSYNC_FLAG_TYPE_DATA | ESP_CACHE_MSYNC_FLAG_DIR_C2M; + data_size = round_up_to_cache_line_size(data_size); + //ESP_EARLY_LOGI("ESP32_DWC", "dcache clean, addr 0x%"PRIx32", size %d (%s)", (uintptr_t)addr, data_size); + assert(ESP_OK == esp_cache_msync((void*)addr, data_size, flag)); +} + +TU_ATTR_ALWAYS_INLINE static inline void dwc2_dcache_invalidate(const void* addr, uint32_t data_size) { + const int flag = ESP_CACHE_MSYNC_FLAG_TYPE_DATA | ESP_CACHE_MSYNC_FLAG_DIR_M2C; + data_size = round_up_to_cache_line_size(data_size); + //ESP_EARLY_LOGI("ESP32_DWC", "dcache invalidate, addr 0x%"PRIx32", size %d (%s)", (uintptr_t)addr, data_size); + assert(ESP_OK == esp_cache_msync((void*)addr, data_size, flag)); +} + +TU_ATTR_ALWAYS_INLINE static inline void dwc2_dcache_clean_invalidate(const void* addr, uint32_t data_size) { + const int flag = ESP_CACHE_MSYNC_FLAG_TYPE_DATA | ESP_CACHE_MSYNC_FLAG_DIR_C2M | ESP_CACHE_MSYNC_FLAG_DIR_M2C; + data_size = round_up_to_cache_line_size(data_size); + //ESP_EARLY_LOGI("ESP32_DWC", "dcache clean_invalidate, addr 0x%"PRIx32", size %d (%s)", (uintptr_t)addr, data_size); + assert(ESP_OK == esp_cache_msync((void*)addr, data_size, flag)); +} + +#endif // SOC_CACHE_INTERNAL_MEM_VIA_L1CACHE + + #ifdef __cplusplus } #endif From b3b8bd88cb7e40063de6b54e7f7741c19550d0ab Mon Sep 17 00:00:00 2001 From: hathach Date: Wed, 20 Nov 2024 21:30:29 +0700 Subject: [PATCH 4/5] add CFG_TUD_MEM_DCACHE_ENABLE, CFG_TUD_MEM_DCACHE_LINE_SIZE option --- src/common/tusb_mcu.h | 12 ++++++++++++ src/portable/synopsys/dwc2/dcd_dwc2.c | 2 +- src/portable/synopsys/dwc2/dwc2_esp32.h | 17 +++++++---------- src/tusb_option.h | 14 ++++++++++++++ 4 files changed, 34 insertions(+), 11 deletions(-) diff --git a/src/common/tusb_mcu.h b/src/common/tusb_mcu.h index 3ee04b6087..dfa6c7151b 100644 --- a/src/common/tusb_mcu.h +++ b/src/common/tusb_mcu.h @@ -361,6 +361,18 @@ #define TUP_USBIP_DWC2_ESP32 #define TUP_RHPORT_HIGHSPEED 1 // port0 FS, port1 HS #define TUP_DCD_ENDPOINT_MAX 16 // FS 7 ep, HS 16 ep + + #if defined(CFG_TUD_DWC2_DMA_ENABLE) && CFG_TUD_DWC2_DMA_ENABLE == 1 + #define CFG_TUD_MEM_DCACHE_ENABLE_DEFAULT 1 + #endif + + #if defined(CFG_TUH_DWC2_DMA_ENABLE) && CFG_TUH_DWC2_DMA_ENABLE == 1 + #define CFG_TUH_MEM_DCACHE_ENABLE_DEFAULT 1 + #endif + + #define CFG_TUD_MEM_DCACHE_LINE_SIZE 64 + #define CFG_TUH_MEM_DCACHE_LINE_SIZE 64 + #define CFG_TUH_DWC2_DMA_ENABLE_DEFAULT 0 // TODO currently have issue with buffer DMA with espressif #elif TU_CHECK_MCU(OPT_MCU_ESP32, OPT_MCU_ESP32C2, OPT_MCU_ESP32C3, OPT_MCU_ESP32C6, OPT_MCU_ESP32H2) diff --git a/src/portable/synopsys/dwc2/dcd_dwc2.c b/src/portable/synopsys/dwc2/dcd_dwc2.c index a4ebacfcb2..18b7fd024a 100644 --- a/src/portable/synopsys/dwc2/dcd_dwc2.c +++ b/src/portable/synopsys/dwc2/dcd_dwc2.c @@ -72,7 +72,7 @@ static bool _sof_en; //-------------------------------------------------------------------- // DMA //-------------------------------------------------------------------- -#if DWC2_ENABLE_MEM_CACHE +#if CFG_TUD_MEM_DCACHE_ENABLE void dcd_dcache_clean(const void* addr, uint32_t data_size) { if (addr && data_size) { dwc2_dcache_clean(addr, data_size); diff --git a/src/portable/synopsys/dwc2/dwc2_esp32.h b/src/portable/synopsys/dwc2/dwc2_esp32.h index 24ff80bce8..84a8360c5f 100644 --- a/src/portable/synopsys/dwc2/dwc2_esp32.h +++ b/src/portable/synopsys/dwc2/dwc2_esp32.h @@ -114,14 +114,15 @@ TU_ATTR_ALWAYS_INLINE static inline void dwc2_phy_update(dwc2_regs_t* dwc2, uint //--------------------------------------------------------------------+ // Data Cache //--------------------------------------------------------------------+ +#if CFG_TUD_DWC2_DMA_ENABLE || CFG_TUH_DWC2_DMA_ENABLE #if defined(SOC_CACHE_INTERNAL_MEM_VIA_L1CACHE) && SOC_CACHE_INTERNAL_MEM_VIA_L1CACHE -#include "sdkconfig.h" #include "hal/cache_hal.h" #include "esp_cache.h" -#include "esp_log.h" -#define DWC2_MEM_CACHE_LINE_SIZE CONFIG_CACHE_L1_CACHE_LINE_SIZE -#define DWC2_ENABLE_MEM_CACHE 1 +#if CFG_TUD_MEM_DCACHE_LINE_SIZE != CONFIG_CACHE_L1_CACHE_LINE_SIZE || \ + CFG_TUH_MEM_DCACHE_LINE_SIZE != CONFIG_CACHE_L1_CACHE_LINE_SIZE +#error "CFG_TUD/TUH_MEM_DCACHE_LINE_SIZE must match CONFIG_CACHE_L1_CACHE_LINE_SIZE" +#endif TU_ATTR_ALWAYS_INLINE static inline uint32_t round_up_to_cache_line_size(uint32_t size) { if (size & (CONFIG_CACHE_L1_CACHE_LINE_SIZE-1)) { @@ -131,29 +132,25 @@ TU_ATTR_ALWAYS_INLINE static inline uint32_t round_up_to_cache_line_size(uint32_ } TU_ATTR_ALWAYS_INLINE static inline void dwc2_dcache_clean(const void* addr, uint32_t data_size) { - // round up to cache line size const int flag = ESP_CACHE_MSYNC_FLAG_TYPE_DATA | ESP_CACHE_MSYNC_FLAG_DIR_C2M; data_size = round_up_to_cache_line_size(data_size); - //ESP_EARLY_LOGI("ESP32_DWC", "dcache clean, addr 0x%"PRIx32", size %d (%s)", (uintptr_t)addr, data_size); assert(ESP_OK == esp_cache_msync((void*)addr, data_size, flag)); } TU_ATTR_ALWAYS_INLINE static inline void dwc2_dcache_invalidate(const void* addr, uint32_t data_size) { const int flag = ESP_CACHE_MSYNC_FLAG_TYPE_DATA | ESP_CACHE_MSYNC_FLAG_DIR_M2C; data_size = round_up_to_cache_line_size(data_size); - //ESP_EARLY_LOGI("ESP32_DWC", "dcache invalidate, addr 0x%"PRIx32", size %d (%s)", (uintptr_t)addr, data_size); assert(ESP_OK == esp_cache_msync((void*)addr, data_size, flag)); } TU_ATTR_ALWAYS_INLINE static inline void dwc2_dcache_clean_invalidate(const void* addr, uint32_t data_size) { const int flag = ESP_CACHE_MSYNC_FLAG_TYPE_DATA | ESP_CACHE_MSYNC_FLAG_DIR_C2M | ESP_CACHE_MSYNC_FLAG_DIR_M2C; data_size = round_up_to_cache_line_size(data_size); - //ESP_EARLY_LOGI("ESP32_DWC", "dcache clean_invalidate, addr 0x%"PRIx32", size %d (%s)", (uintptr_t)addr, data_size); assert(ESP_OK == esp_cache_msync((void*)addr, data_size, flag)); } -#endif // SOC_CACHE_INTERNAL_MEM_VIA_L1CACHE - +#endif +#endif #ifdef __cplusplus } diff --git a/src/tusb_option.h b/src/tusb_option.h index 51017221dc..500d749030 100644 --- a/src/tusb_option.h +++ b/src/tusb_option.h @@ -266,6 +266,7 @@ #ifndef CFG_TUH_DWC2_SLAVE_ENABLE_DEFAULT #define CFG_TUH_DWC2_SLAVE_ENABLE_DEFAULT 1 #endif + #define CFG_TUH_DWC2_SLAVE_ENABLE CFG_TUH_DWC2_SLAVE_ENABLE_DEFAULT #endif @@ -274,6 +275,7 @@ #ifndef CFG_TUH_DWC2_DMA_ENABLE_DEFAULT #define CFG_TUH_DWC2_DMA_ENABLE_DEFAULT 1 #endif + #define CFG_TUH_DWC2_DMA_ENABLE CFG_TUH_DWC2_DMA_ENABLE_DEFAULT #endif @@ -422,6 +424,18 @@ #define CFG_TUD_MEM_ALIGN CFG_TUSB_MEM_ALIGN #endif +#ifndef CFG_TUD_MEM_DCACHE_ENABLE + #ifndef CFG_TUD_MEM_DCACHE_ENABLE_DEFAULT + #define CFG_TUD_MEM_DCACHE_ENABLE_DEFAULT 0 + #endif + + #define CFG_TUD_MEM_DCACHE_ENABLE CFG_TUD_MEM_DCACHE_ENABLE_DEFAULT +#endif + +#ifndef CFG_TUD_MEM_DCACHE_LINE_SIZE + #define CFG_TUD_MEM_DCACHE_LINE_SIZE 32 +#endif + #ifndef CFG_TUD_ENDPOINT0_SIZE #define CFG_TUD_ENDPOINT0_SIZE 64 #endif From c61b55b191ffcc25c1feb1a693b6d9d5d704025f Mon Sep 17 00:00:00 2001 From: hathach Date: Wed, 20 Nov 2024 22:04:55 +0700 Subject: [PATCH 5/5] dcd wrap data to dcd_data_t, add padding for setup_packet to match cache line size --- src/portable/synopsys/dwc2/dcd_dwc2.c | 103 ++++++++++++++------------ 1 file changed, 57 insertions(+), 46 deletions(-) diff --git a/src/portable/synopsys/dwc2/dcd_dwc2.c b/src/portable/synopsys/dwc2/dcd_dwc2.c index 18b7fd024a..363c777616 100644 --- a/src/portable/synopsys/dwc2/dcd_dwc2.c +++ b/src/portable/synopsys/dwc2/dcd_dwc2.c @@ -46,8 +46,6 @@ //--------------------------------------------------------------------+ // MACRO TYPEDEF CONSTANT ENUM //--------------------------------------------------------------------+ -static CFG_TUD_MEM_SECTION CFG_TUD_MEM_ALIGN uint32_t _setup_packet[2]; - typedef struct { uint8_t* buffer; tu_fifo_t* ff; @@ -59,15 +57,26 @@ typedef struct { static xfer_ctl_t xfer_status[DWC2_EP_MAX][2]; #define XFER_CTL_BASE(_ep, _dir) (&xfer_status[_ep][_dir]) -// EP0 transfers are limited to 1 packet - larger sizes has to be split -static uint16_t ep0_pending[2]; // Index determines direction as tusb_dir_t type -static uint16_t _dfifo_top; // top free location in DFIFO in words +typedef struct { + CFG_TUD_MEM_ALIGN union { + uint32_t setup_packet[2]; +#if CFG_TUD_MEM_DCACHE_ENABLE + uint8_t setup_packet_cache_padding[CFG_TUD_MEM_DCACHE_LINE_SIZE]; +#endif + }; + + // EP0 transfers are limited to 1 packet - larger sizes has to be split + uint16_t ep0_pending[2]; // Index determines direction as tusb_dir_t type + uint16_t dfifo_top; // top free location in DFIFO in words -// Number of IN endpoints active -static uint8_t _allocated_ep_in_count; + // Number of IN endpoints active + uint8_t allocated_epin_count; -// SOF enabling flag - required for SOF to not get disabled in ISR when SOF was enabled by -static bool _sof_en; + // SOF enabling flag - required for SOF to not get disabled in ISR when SOF was enabled by + bool sof_en; +} dcd_data_t; + +CFG_TUD_MEM_SECTION static dcd_data_t _dcd_data; //-------------------------------------------------------------------- // DMA @@ -109,7 +118,7 @@ static void dma_setup_prepare(uint8_t rhport) { // Receive only 1 packet dwc2->epout[0].doeptsiz = (1 << DOEPTSIZ_STUPCNT_Pos) | (1 << DOEPTSIZ_PKTCNT_Pos) | (8 << DOEPTSIZ_XFRSIZ_Pos); - dwc2->epout[0].doepdma = (uintptr_t)_setup_packet; + dwc2->epout[0].doepdma = (uintptr_t) _dcd_data.setup_packet; dwc2->epout[0].doepctl |= DOEPCTL_EPENA | DOEPCTL_USBAEP; } @@ -167,27 +176,27 @@ TU_ATTR_ALWAYS_INLINE static inline uint16_t calc_device_grxfsiz(uint16_t larges static bool dfifo_alloc(uint8_t rhport, uint8_t ep_addr, uint16_t packet_size) { dwc2_regs_t* dwc2 = DWC2_REG(rhport); const dwc2_controller_t* dwc2_controller = &_dwc2_controller[rhport]; - uint8_t const ep_count = dwc2_controller->ep_count; - uint8_t const epnum = tu_edpt_number(ep_addr); - uint8_t const dir = tu_edpt_dir(ep_addr); + const uint8_t ep_count = dwc2_controller->ep_count; + const uint8_t epnum = tu_edpt_number(ep_addr); + const uint8_t dir = tu_edpt_dir(ep_addr); TU_ASSERT(epnum < ep_count); uint16_t fifo_size = tu_div_ceil(packet_size, 4); if (dir == TUSB_DIR_OUT) { // Calculate required size of RX FIFO - uint16_t const new_sz = calc_device_grxfsiz(4 * fifo_size, ep_count); + const uint16_t new_sz = calc_device_grxfsiz(4 * fifo_size, ep_count); // If size_rx needs to be extended check if there is enough free space if (dwc2->grxfsiz < new_sz) { - TU_ASSERT(new_sz <= _dfifo_top); + TU_ASSERT(new_sz <= _dcd_data.dfifo_top); dwc2->grxfsiz = new_sz; // Enlarge RX FIFO } } else { // Check IN endpoints concurrently active limit if(_dwc2_controller->ep_in_count) { - TU_ASSERT(_allocated_ep_in_count < _dwc2_controller->ep_in_count); - _allocated_ep_in_count++; + TU_ASSERT(_dcd_data.allocated_epin_count < _dwc2_controller->ep_in_count); + _dcd_data.allocated_epin_count++; } // If The TXFELVL is configured as half empty, the fifo must be twice the max_size. @@ -196,16 +205,16 @@ static bool dfifo_alloc(uint8_t rhport, uint8_t ep_addr, uint16_t packet_size) { } // Check if free space is available - TU_ASSERT(_dfifo_top >= fifo_size + dwc2->grxfsiz); - _dfifo_top -= fifo_size; - // TU_LOG(DWC2_DEBUG, " TX FIFO %u: allocated %u words at offset %u\r\n", epnum, fifo_size, _dfifo_top); + TU_ASSERT(_dcd_data.dfifo_top >= fifo_size + dwc2->grxfsiz); + _dcd_data.dfifo_top -= fifo_size; + // TU_LOG(DWC2_DEBUG, " TX FIFO %u: allocated %u words at offset %u\r\n", epnum, fifo_size, dfifo_top); // Both TXFD and TXSA are in unit of 32-bit words. if (epnum == 0) { - dwc2->dieptxf0 = (fifo_size << DIEPTXF0_TX0FD_Pos) | _dfifo_top; + dwc2->dieptxf0 = (fifo_size << DIEPTXF0_TX0FD_Pos) | _dcd_data.dfifo_top; } else { // DIEPTXF starts at FIFO #1. - dwc2->dieptxf[epnum - 1] = (fifo_size << DIEPTXF_INEPTXFD_Pos) | _dfifo_top; + dwc2->dieptxf[epnum - 1] = (fifo_size << DIEPTXF_INEPTXFD_Pos) | _dcd_data.dfifo_top; } } @@ -219,11 +228,11 @@ static void dfifo_device_init(uint8_t rhport) { // Scatter/Gather DMA mode is not yet supported. Buffer DMA only need 1 words per endpoint direction const bool is_dma = dma_device_enabled(dwc2); - _dfifo_top = dwc2_controller->ep_fifo_size/4; + _dcd_data.dfifo_top = dwc2_controller->ep_fifo_size/4; if (is_dma) { - _dfifo_top -= 2 * dwc2_controller->ep_count; + _dcd_data.dfifo_top -= 2 * dwc2_controller->ep_count; } - dwc2->gdfifocfg = (_dfifo_top << GDFIFOCFG_EPINFOBASE_SHIFT) | _dfifo_top; + dwc2->gdfifocfg = (_dcd_data.dfifo_top << GDFIFOCFG_EPINFOBASE_SHIFT) | _dcd_data.dfifo_top; // Allocate FIFO for EP0 IN dfifo_alloc(rhport, 0x80, CFG_TUD_ENDPOINT0_SIZE); @@ -233,7 +242,7 @@ static void dfifo_device_init(uint8_t rhport) { //-------------------------------------------------------------------- // Endpoint //-------------------------------------------------------------------- -static void edpt_activate(uint8_t rhport, tusb_desc_endpoint_t const * p_endpoint_desc) { +static void edpt_activate(uint8_t rhport, const tusb_desc_endpoint_t* p_endpoint_desc) { dwc2_regs_t* dwc2 = DWC2_REG(rhport); const uint8_t epnum = tu_edpt_number(p_endpoint_desc->bEndpointAddress); const uint8_t dir = tu_edpt_dir(p_endpoint_desc->bEndpointAddress); @@ -324,8 +333,8 @@ static void edpt_schedule_packets(uint8_t rhport, const uint8_t epnum, const uin // EP0 is limited to one packet per xfer if (epnum == 0) { - total_bytes = tu_min16(ep0_pending[dir], xfer->max_size); - ep0_pending[dir] -= total_bytes; + total_bytes = tu_min16(_dcd_data.ep0_pending[dir], xfer->max_size); + _dcd_data.ep0_pending[dir] -= total_bytes; num_packets = 1; } else { total_bytes = xfer->total_len; @@ -388,6 +397,8 @@ bool dcd_init(uint8_t rhport, const tusb_rhport_init_t* rh_init) { (void) rh_init; dwc2_regs_t* dwc2 = DWC2_REG(rhport); + tu_memclr(&_dcd_data, sizeof(_dcd_data)); + // Core Initialization const bool is_highspeed = dwc2_core_is_highspeed(dwc2, TUSB_ROLE_DEVICE); const bool is_dma = dma_device_enabled(dwc2); @@ -505,7 +516,7 @@ void dcd_sof_enable(uint8_t rhport, bool en) { (void) rhport; dwc2_regs_t* dwc2 = DWC2_REG(rhport); - _sof_en = en; + _dcd_data.sof_en = en; if (en) { dwc2->gintsts = GINTSTS_SOF; @@ -530,7 +541,7 @@ void dcd_edpt_close_all(uint8_t rhport) { dwc2_regs_t* dwc2 = DWC2_REG(rhport); uint8_t const ep_count = _dwc2_controller[rhport].ep_count; - _allocated_ep_in_count = 1; + _dcd_data.allocated_epin_count = 1; // Disable non-control interrupt dwc2->daintmsk = (1 << DAINTMSK_OEPM_Pos) | (1 << DAINTMSK_IEPM_Pos); @@ -574,7 +585,7 @@ bool dcd_edpt_xfer(uint8_t rhport, uint8_t ep_addr, uint8_t* buffer, uint16_t to // EP0 can only handle one packet if (epnum == 0) { - ep0_pending[dir] = total_bytes; + _dcd_data.ep0_pending[dir] = total_bytes; } // Schedule packets to be sent within interrupt @@ -640,8 +651,8 @@ static void handle_bus_reset(uint8_t rhport) { tu_memclr(xfer_status, sizeof(xfer_status)); - _sof_en = false; - _allocated_ep_in_count = 1; + _dcd_data.sof_en = false; + _dcd_data.allocated_epin_count = 1; // 1. NAK for all OUT endpoints for (uint8_t n = 0; n < ep_count; n++) { @@ -745,8 +756,8 @@ static void handle_rxflvl_irq(uint8_t rhport) { case GRXSTS_PKTSTS_SETUP_RX: // Setup packet received // We can receive up to three setup packets in succession, but only the last one is valid. - _setup_packet[0] = (*rx_fifo); - _setup_packet[1] = (*rx_fifo); + _dcd_data.setup_packet[0] = (*rx_fifo); + _dcd_data.setup_packet[1] = (*rx_fifo); break; case GRXSTS_PKTSTS_SETUP_DONE: @@ -773,8 +784,8 @@ static void handle_rxflvl_irq(uint8_t rhport) { if (byte_count < xfer->max_size) { xfer->total_len -= epout->tsiz_bm.xfer_size; if (epnum == 0) { - xfer->total_len -= ep0_pending[TUSB_DIR_OUT]; - ep0_pending[TUSB_DIR_OUT] = 0; + xfer->total_len -= _dcd_data.ep0_pending[TUSB_DIR_OUT]; + _dcd_data.ep0_pending[TUSB_DIR_OUT] = 0; } } } @@ -793,7 +804,7 @@ static void handle_rxflvl_irq(uint8_t rhport) { static void handle_epout_slave(uint8_t rhport, uint8_t epnum, dwc2_doepint_t doepint_bm) { if (doepint_bm.setup_phase_done) { - dcd_event_setup_received(rhport, (uint8_t*) _setup_packet, true); + dcd_event_setup_received(rhport, (uint8_t*) _dcd_data.setup_packet, true); return; } @@ -805,7 +816,7 @@ static void handle_epout_slave(uint8_t rhport, uint8_t epnum, dwc2_doepint_t doe if (!doepint_bm.status_phase_rx && !doepint_bm.setup_packet_rx) { xfer_ctl_t* xfer = XFER_CTL_BASE(epnum, TUSB_DIR_OUT); - if ((epnum == 0) && ep0_pending[TUSB_DIR_OUT]) { + if ((epnum == 0) && _dcd_data.ep0_pending[TUSB_DIR_OUT]) { // EP0 can only handle one packet, Schedule another packet to be received. edpt_schedule_packets(rhport, epnum, TUSB_DIR_OUT); } else { @@ -821,7 +832,7 @@ static void handle_epin_slave(uint8_t rhport, uint8_t epnum, dwc2_diepint_t diep xfer_ctl_t* xfer = XFER_CTL_BASE(epnum, TUSB_DIR_IN); if (diepint_bm.xfer_complete) { - if ((epnum == 0) && ep0_pending[TUSB_DIR_IN]) { + if ((epnum == 0) && _dcd_data.ep0_pending[TUSB_DIR_IN]) { // EP0 can only handle one packet. Schedule another packet to be transmitted. edpt_schedule_packets(rhport, epnum, TUSB_DIR_IN); } else { @@ -869,8 +880,8 @@ static void handle_epout_dma(uint8_t rhport, uint8_t epnum, dwc2_doepint_t doepi if (doepint_bm.setup_phase_done) { dma_setup_prepare(rhport); - dcd_dcache_invalidate(_setup_packet, 8); - dcd_event_setup_received(rhport, (uint8_t*) _setup_packet, true); + dcd_dcache_invalidate(_dcd_data.setup_packet, 8); + dcd_event_setup_received(rhport, (uint8_t*) _dcd_data.setup_packet, true); return; } @@ -879,7 +890,7 @@ static void handle_epout_dma(uint8_t rhport, uint8_t epnum, dwc2_doepint_t doepi // only handle data skip if it is setup or status related // Normal OUT transfer complete if (!doepint_bm.status_phase_rx && !doepint_bm.setup_packet_rx) { - if ((epnum == 0) && ep0_pending[TUSB_DIR_OUT]) { + if ((epnum == 0) && _dcd_data.ep0_pending[TUSB_DIR_OUT]) { // EP0 can only handle one packet Schedule another packet to be received. edpt_schedule_packets(rhport, epnum, TUSB_DIR_OUT); } else { @@ -907,7 +918,7 @@ static void handle_epin_dma(uint8_t rhport, uint8_t epnum, dwc2_diepint_t diepin xfer_ctl_t* xfer = XFER_CTL_BASE(epnum, TUSB_DIR_IN); if (diepint_bm.xfer_complete) { - if ((epnum == 0) && ep0_pending[TUSB_DIR_IN]) { + if ((epnum == 0) && _dcd_data.ep0_pending[TUSB_DIR_IN]) { // EP0 can only handle one packet. Schedule another packet to be transmitted. edpt_schedule_packets(rhport, epnum, TUSB_DIR_IN); } else { @@ -1007,7 +1018,7 @@ void dcd_int_handler(uint8_t rhport) { if (gintsts & GINTSTS_OTGINT) { // OTG INT bit is read-only - uint32_t const otg_int = dwc2->gotgint; + const uint32_t otg_int = dwc2->gotgint; if (otg_int & GOTGINT_SEDET) { dcd_event_bus_signal(rhport, DCD_EVENT_UNPLUGGED, true); @@ -1021,7 +1032,7 @@ void dcd_int_handler(uint8_t rhport) { const uint32_t frame = (dwc2->dsts & DSTS_FNSOF) >> DSTS_FNSOF_Pos; // Disable SOF interrupt if SOF was not explicitly enabled since SOF was used for remote wakeup detection - if (!_sof_en) { + if (!_dcd_data.sof_en) { dwc2->gintmsk &= ~GINTMSK_SOFM; }