CMSIS-DSP: Test framework improvement

Enabled MMU for A32.
pull/19/head
Christophe Favergeon 6 years ago
parent be40746c5e
commit 04449477b0

@ -25,6 +25,14 @@
* limitations under the License.
*/
/*
None of above values have been checked !
*/
#ifndef __ARMCA32_H__
#define __ARMCA32_H__
@ -33,8 +41,24 @@ extern "C" {
#endif
/* ------------------------- Interrupt Number Definition ------------------------ */
/******************************************************************************/
/* Peripheral memory map */
/******************************************************************************/
/* Peripheral and RAM base address */
#define VE_A32_NORMAL (0x00000000UL) /*!< (FLASH0 ) Base Address */
#define VE_A32_PERIPH (0x13000000UL) /*!< (FLASH0 ) Base Address */
#define VE_A32_NORMAL2 (0x14000000UL)
/* -------- Configuration of the Cortex-A32 Processor and Core Peripherals ------- */
#define __CA_REV 0x0000U /*!< Core revision r0p0 */
#define __CORTEX_A 32U /*!< Cortex-A32 Core */
#define __FPU_PRESENT 1U /* FPU present */
#define __GIC_PRESENT 1U /* GIC present */
#define __TIM_PRESENT 1U /* TIM present */
#define __L2C_PRESENT 0U /* L2C present */
/** Device specific Interrupt IDs */
typedef enum IRQn
{
/****** SGI Interrupts Numbers ****************************************/
@ -55,7 +79,7 @@ typedef enum IRQn
SGI14_IRQn = 14, /*!< Software Generated Interrupt 14 */
SGI15_IRQn = 15, /*!< Software Generated Interrupt 15 */
/****** Cortex-A5 Processor Exceptions Numbers ****************************************/
/****** Cortex-A9 Processor Exceptions Numbers ****************************************/
GlobalTimer_IRQn = 27, /*!< Global Timer Interrupt */
PrivTimer_IRQn = 29, /*!< Private Timer Interrupt */
PrivWatchdog_IRQn = 30, /*!< Private Watchdog Interrupt */
@ -79,54 +103,10 @@ typedef enum IRQn
VFS2_IRQn = 73, /*!< VFS2 Interrupt */
} IRQn_Type;
/******************************************************************************/
/* Peripheral memory map */
/******************************************************************************/
/* Peripheral and RAM base address */
#define VE_A5_MP_FLASH_BASE0 (0x00000000UL) /*!< (FLASH0 ) Base Address */
#define VE_A5_MP_FLASH_BASE1 (0x0C000000UL) /*!< (FLASH1 ) Base Address */
#define VE_A5_MP_SRAM_BASE (0x14000000UL) /*!< (SRAM ) Base Address */
#define VE_A5_MP_PERIPH_BASE_CS2 (0x18000000UL) /*!< (Peripheral ) Base Address */
#define VE_A5_MP_VRAM_BASE (0x00000000UL + VE_A5_MP_PERIPH_BASE_CS2) /*!< (VRAM ) Base Address */
#define VE_A5_MP_ETHERNET_BASE (0x02000000UL + VE_A5_MP_PERIPH_BASE_CS2) /*!< (ETHERNET ) Base Address */
#define VE_A5_MP_USB_BASE (0x03000000UL + VE_A5_MP_PERIPH_BASE_CS2) /*!< (USB ) Base Address */
#define VE_A5_MP_PERIPH_BASE_CS3 (0x1C000000UL) /*!< (Peripheral ) Base Address */
#define VE_A5_MP_DAP_BASE (0x00000000UL + VE_A5_MP_PERIPH_BASE_CS3) /*!< (LOCAL DAP ) Base Address */
#define VE_A5_MP_SYSTEM_REG_BASE (0x00010000UL + VE_A5_MP_PERIPH_BASE_CS3) /*!< (SYSTEM REG ) Base Address */
#define VE_A5_MP_SERIAL_BASE (0x00030000UL + VE_A5_MP_PERIPH_BASE_CS3) /*!< (SERIAL ) Base Address */
#define VE_A5_MP_AACI_BASE (0x00040000UL + VE_A5_MP_PERIPH_BASE_CS3) /*!< (AACI ) Base Address */
#define VE_A5_MP_MMCI_BASE (0x00050000UL + VE_A5_MP_PERIPH_BASE_CS3) /*!< (MMCI ) Base Address */
#define VE_A5_MP_KMI0_BASE (0x00060000UL + VE_A5_MP_PERIPH_BASE_CS3) /*!< (KMI0 ) Base Address */
#define VE_A5_MP_UART_BASE (0x00090000UL + VE_A5_MP_PERIPH_BASE_CS3) /*!< (UART ) Base Address */
#define VE_A5_MP_WDT_BASE (0x000F0000UL + VE_A5_MP_PERIPH_BASE_CS3) /*!< (WDT ) Base Address */
#define VE_A5_MP_TIMER_BASE (0x00110000UL + VE_A5_MP_PERIPH_BASE_CS3) /*!< (TIMER ) Base Address */
#define VE_A5_MP_DVI_BASE (0x00160000UL + VE_A5_MP_PERIPH_BASE_CS3) /*!< (DVI ) Base Address */
#define VE_A5_MP_RTC_BASE (0x00170000UL + VE_A5_MP_PERIPH_BASE_CS3) /*!< (RTC ) Base Address */
#define VE_A5_MP_UART4_BASE (0x001B0000UL + VE_A5_MP_PERIPH_BASE_CS3) /*!< (UART4 ) Base Address */
#define VE_A5_MP_CLCD_BASE (0x001F0000UL + VE_A5_MP_PERIPH_BASE_CS3) /*!< (CLCD ) Base Address */
#define VE_A5_MP_PRIVATE_PERIPH_BASE (0x2C000000UL) /*!< (Peripheral ) Base Address */
#define VE_A5_MP_GIC_DISTRIBUTOR_BASE (0x00001000UL + VE_A5_MP_PRIVATE_PERIPH_BASE) /*!< (GIC DIST ) Base Address */
#define VE_A5_MP_GIC_INTERFACE_BASE (0x00000100UL + VE_A5_MP_PRIVATE_PERIPH_BASE) /*!< (GIC CPU IF ) Base Address */
#define VE_A5_MP_PRIVATE_TIMER (0x00000600UL + VE_A5_MP_PRIVATE_PERIPH_BASE) /*!< (PTIM ) Base Address */
#define VE_A5_MP_PL310_BASE (0x000F0000UL + VE_A5_MP_PRIVATE_PERIPH_BASE) /*!< (L2C-310 ) Base Address */
#define VE_A5_MP_SSRAM_BASE (0x2E000000UL) /*!< (System SRAM) Base Address */
#define VE_A5_MP_DRAM_BASE (0x80000000UL) /*!< (DRAM ) Base Address */
#define GIC_DISTRIBUTOR_BASE VE_A5_MP_GIC_DISTRIBUTOR_BASE
#define GIC_INTERFACE_BASE VE_A5_MP_GIC_INTERFACE_BASE
#define TIMER_BASE VE_A5_MP_PRIVATE_TIMER
//The VE-A5 model implements L1 cache as architecturally defined, but does not implement L2 cache.
//Do not enable the L2 cache if you are running RTX on a VE-A5 model as it may cause a data abort.
#define L2C_310_BASE VE_A5_MP_PL310_BASE
/* -------- Configuration of the Cortex-A5 Processor and Core Peripherals ------- */
#define __CA_REV 0x0000U /* Core revision r0p0 */
#define __CORTEX_A 5U /* Cortex-A5 Core */
#define __FPU_PRESENT 1U /* FPU present */
#define __GIC_PRESENT 1U /* GIC present */
#define __TIM_PRESENT 1U /* TIM present */
#define __L2C_PRESENT 0U /* L2C present */
// To allow inclusion of core_ca.h but those symbols are not used in our code
#define GIC_DISTRIBUTOR_BASE 0
#define GIC_INTERFACE_BASE 0
#define TIMER_BASE 0
#include "core_ca.h"
#include <system_ARMCA32.h>

@ -69,14 +69,14 @@
// </h>
*----------------------------------------------------------------------------*/
//#define __RAM_BASE 0x80200000#
#define __RAM_BASE 0x0500000
#define __RAM_SIZE 0x00700000
#define __RAM_BASE 0x00100000
#define __RAM_SIZE 0x00200000
#define __RW_DATA_SIZE 0xF0000
#define __ZI_DATA_SIZE 0x00200000
#define __STACK_SIZE 0x00007000
#define __HEAP_SIZE 0x00200000
#define __HEAP_SIZE 0x00100000
#define __UND_STACK_SIZE 0x00000100
#define __ABT_STACK_SIZE 0x00000100
@ -95,7 +95,7 @@
// <o1> TTB Size (in Bytes) <0x0-0xFFFFFFFF:8>
// </h>
*----------------------------------------------------------------------------*/
#define __TTB_BASE 0x80500000
#define __TTB_BASE 0x00300000
#define __TTB_SIZE 0x00005000

@ -105,69 +105,34 @@ void Reset_Handler(void) {
"LDR R0, =Vectors \n"
"MCR p15, 0, R0, c12, c0, 0 \n"
"LDR r0,=Image$$TTB$$ZI$$Base \n"
"MCR p15, 0, r0, c2, c0, 0 \n"
"LDR r0, =0xFFFFFFFF \n"
"MCR p15, 0, r0, c3, c0, 0 \n" // Write Domain Access Control Register
);
#if defined(__ARM_NEON) || defined(__ARM_FP)
//----------------------------------------------------------------
// Enable access to NEON/VFP by enabling access to Coprocessors 10 and 11.
// Enables Full Access i.e. in both privileged and non privileged modes
//----------------------------------------------------------------
__ASM volatile(
"MRC p15, 0, r0, c1, c0, 2 \n" // Read Coprocessor Access Control Register (CPACR)
"ORR r0, r0, #(0xF << 20) \n" // Enable access to CP 10 & 11
"MCR p15, 0, r0, c1, c0, 2 \n" // Write Coprocessor Access Control Register (CPACR)
"ISB \n"
//----------------------------------------------------------------
// Switch on the VFP and NEON hardware
//----------------------------------------------------------------
"MOV r0, #0x40000000 \n"
"VMSR FPEXC, r0 \n" // Write FPEXC register, EN bit set
);
#endif
__ASM volatile(
"LDR SP, =Image$$ARM_LIB_STACK$$ZI$$Limit \n"
"MRC p15, 0, R0, c1, c0, 0 \n" // Read CP15 System Control register
"BIC R0, R0, #(0x1 << 12) \n"
"BIC R0, R0, #(0x1 << 2) \n"
"BIC R0, R0, #0x2 \n" // Clear A bit 1 to disable strict alignment fault checking
"ORR R0, R0, #(0x1 << 11) \n" // Set Z bit 11 to enable branch prediction
//"BIC R0, R0, #(0x1 << 13)
"MCR p15, 0, R0, c1, c0, 0 \n" // Write value back to CP15 System Control register
"ISB \n"
// Setup Stack for each exceptional mode
"CPS #0x11 \n"
"LDR SP, =Image$$FIQ_STACK$$ZI$$Limit \n"
"CPS #0x12 \n"
"LDR SP, =Image$$IRQ_STACK$$ZI$$Limit \n"
"CPS #0x13 \n"
"LDR SP, =Image$$SVC_STACK$$ZI$$Limit \n"
"CPS #0x17 \n"
"LDR SP, =Image$$ABT_STACK$$ZI$$Limit \n"
"CPS #0x1B \n"
"LDR SP, =Image$$UND_STACK$$ZI$$Limit \n"
"CPS #0x1F \n"
"LDR SP, =Image$$ARM_LIB_STACK$$ZI$$Limit \n"
// Call SystemInit
"BL SystemInit \n"
// Unmask interrupts
//"CPSIE if \n"
"CPSIE if \n"
// Call __main
"BL __main \n"
);
}
void enable_caches(void)
{
__ASM volatile(
"MRC p15, 0, R0, c1, c0, 0 \n" // Read CP15 System Control register
"ORR R0, R0, #(0x1 << 12) \n" // Set I bit 12 to enable I Cache
"ORR R0, R0, #(0x1 << 2) \n" // Set C bit 2 to enable D Cache
"MCR p15, 0, R0, c1, c0, 0 \n" // Write value back to CP15 System Control register
"ISB \n"
);
}
/*----------------------------------------------------------------------------
Default Handler for Exceptions / Interrupts
*----------------------------------------------------------------------------*/

@ -1,13 +1,178 @@
#include "cmsis_compiler.h"
#include "cmsis_cp15.h"
/**************************************************************************//**
* @file mmu_ARMCA32.c
* @brief MMU Configuration for Arm Cortex-A32 Device Series
* @version V1.2.0
* @date 15. May 2019
*
* @note
*
******************************************************************************/
/*
* Copyright (c) 2009-2019 Arm Limited. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/* Memory map description from: DUI0447G_v2m_p1_trm.pdf 4.2.2 Arm Cortex-A Series memory map
Memory Type
0xffffffff |--------------------------| ------------
| FLAG SYNC | Device Memory
0xfffff000 |--------------------------| ------------
| Fault | Fault
0xfff00000 |--------------------------| ------------
| | Normal
| |
| Daughterboard |
| memory |
| |
0x80505000 |--------------------------| ------------
|TTB (L2 Sync Flags ) 4k | Normal
0x80504C00 |--------------------------| ------------
|TTB (L2 Peripherals-B) 16k| Normal
0x80504800 |--------------------------| ------------
|TTB (L2 Peripherals-A) 16k| Normal
0x80504400 |--------------------------| ------------
|TTB (L2 Priv Periphs) 4k | Normal
0x80504000 |--------------------------| ------------
| TTB (L1 Descriptors) | Normal
0x80500000 |--------------------------| ------------
| Stack | Normal
|--------------------------| ------------
| Heap | Normal
0x80400000 |--------------------------| ------------
| ZI Data | Normal
0x80300000 |--------------------------| ------------
| RW Data | Normal
0x80200000 |--------------------------| ------------
| RO Data | Normal
|--------------------------| ------------
| RO Code | USH Normal
0x80000000 |--------------------------| ------------
| Daughterboard | Fault
| HSB AXI buses |
0x40000000 |--------------------------| ------------
| Daughterboard | Fault
| test chips peripherals |
0x2c002000 |--------------------------| ------------
| Private Address | Device Memory
0x2c000000 |--------------------------| ------------
| Daughterboard | Fault
| test chips peripherals |
0x20000000 |--------------------------| ------------
| Peripherals | Device Memory RW/RO
| | & Fault
0x00000000 |--------------------------|
*/
// L1 Cache info and restrictions about architecture of the caches (CCSIR register):
// Write-Through support *not* available
// Write-Back support available.
// Read allocation support available.
// Write allocation support available.
//Note: You should use the Shareable attribute carefully.
//For cores without coherency logic (such as SCU) marking a region as shareable forces the processor to not cache that region regardless of the inner cache settings.
//Cortex-A versions of RTX use LDREX/STREX instructions relying on Local monitors. Local monitors will be used only when the region gets cached, regions that are not cached will use the Global Monitor.
//Some Cortex-A implementations do not include Global Monitors, so wrongly setting the attribute Shareable may cause STREX to fail.
//Recall: When the Shareable attribute is applied to a memory region that is not Write-Back, Normal memory, data held in this region is treated as Non-cacheable.
//When SMP bit = 0, Inner WB/WA Cacheable Shareable attributes are treated as Non-cacheable.
//When SMP bit = 1, Inner WB/WA Cacheable Shareable attributes are treated as Cacheable.
//Following MMU configuration is expected
//SCTLR.AFE == 1 (Simplified access permissions model - AP[2:1] define access permissions, AP[0] is an access flag)
//SCTLR.TRE == 0 (TEX remap disabled, so memory type and attributes are described directly by bits in the descriptor)
//Domain 0 is always the Client domain
//Descriptors should place all memory in domain 0
#include "ARMCA32.h"
#include "mem_ARMCA32.h"
// TTB base address
#define TTB_BASE ((uint32_t*)__TTB_BASE)
// L2 table pointers
//----------------------------------------
#define TTB_L1_SIZE (0x00004000) // The L1 translation table divides the full 4GB address space of a 32-bit core
// into 4096 equally sized sections, each of which describes 1MB of virtual memory space.
// The L1 translation table therefore contains 4096 32-bit (word-sized) entries.
#define PRIVATE_TABLE_L2_BASE_4k (__TTB_BASE + TTB_L1_SIZE) // Map 4k Private Address space
#define PERIPHERAL_A_TABLE_L2_BASE_64k (__TTB_BASE + TTB_L1_SIZE + 0x400) // Map 64k Peripheral #1 0x1C000000 - 0x1C00FFFFF
#define PERIPHERAL_B_TABLE_L2_BASE_64k (__TTB_BASE + TTB_L1_SIZE + 0x800) // Map 64k Peripheral #2 0x1C100000 - 0x1C1FFFFFF
#define SYNC_FLAGS_TABLE_L2_BASE_4k (__TTB_BASE + TTB_L1_SIZE + 0xC00) // Map 4k Flag synchronization
//--------------------- PERIPHERALS -------------------
#define PERIPHERAL_A_FAULT (0x00000000 + 0x1c000000) //0x1C000000-0x1C00FFFF (1M)
#define PERIPHERAL_B_FAULT (0x00100000 + 0x1c000000) //0x1C100000-0x1C10FFFF (1M)
//--------------------- SYNC FLAGS --------------------
#define FLAG_SYNC 0xFFFFF000
#define F_SYNC_BASE 0xFFF00000 //1M aligned
static uint32_t Sect_Normal; //outer & inner wb/wa, non-shareable, executable, rw, domain 0, base addr 0
static uint32_t Sect_Normal_Cod; //outer & inner wb/wa, non-shareable, executable, ro, domain 0, base addr 0
static uint32_t Sect_Normal_RO; //as Sect_Normal_Cod, but not executable
static uint32_t Sect_Normal_RW; //as Sect_Normal_Cod, but writeable and not executable
static uint32_t Sect_Device_RO; //device, non-shareable, non-executable, ro, domain 0, base addr 0
static uint32_t Sect_Device_RW; //as Sect_Device_RO, but writeable
/* Define global descriptors */
static uint32_t Page_L1_4k = 0x0; //generic
static uint32_t Page_L1_64k = 0x0; //generic
static uint32_t Page_4k_Device_RW; //Shared device, not executable, rw, domain 0
static uint32_t Page_64k_Device_RW; //Shared device, not executable, rw, domain 0
void MMU_CreateTranslationTable(void)
{
mmu_region_attributes_Type region;
//Create 4GB of faulting entries
MMU_TTSection (TTB_BASE, 0, 4096, DESCRIPTOR_FAULT);
/*
* Generate descriptors. Refer to core_ca.h to get information about attributes
*
*/
//Create descriptors for Vectors, RO, RW, ZI sections
section_normal(Sect_Normal, region);
section_normal_cod(Sect_Normal_Cod, region);
section_normal_ro(Sect_Normal_RO, region);
section_normal_rw(Sect_Normal_RW, region);
//Create descriptors for peripherals
section_device_ro(Sect_Device_RO, region);
section_device_rw(Sect_Device_RW, region);
//Create descriptors for 64k pages
page64k_device_rw(Page_L1_64k, Page_64k_Device_RW, region);
//Create descriptors for 4k pages
page4k_device_rw(Page_L1_4k, Page_4k_Device_RW, region);
/*
* Define MMU flat-map regions and attributes
*
*/
//Define Image
MMU_TTSection (TTB_BASE, __ROM_BASE, __ROM_SIZE/0x100000, Sect_Normal_Cod); // multiple of 1MB sections
MMU_TTSection (TTB_BASE, __RAM_BASE, __RAM_SIZE/0x100000, Sect_Normal_RW); // multiple of 1MB sections
//--------------------- PERIPHERALS -------------------
MMU_TTSection (TTB_BASE, VE_A32_PERIPH , 64, Sect_Device_RW); // 64MB NOR
/* Set location of level 1 page table
; 31:14 - Translation table base addr (31:14-TTBCR.N, TTBCR.N is 0 out of reset)
@ -18,12 +183,12 @@ void MMU_CreateTranslationTable(void)
; 2 - IMP 0x0 (Implementation Defined)
; 1 - S 0x0 (Non-shared)
; 0 - IRGN[1] 0x0 (Inner WB WA) */
__set_TTBR0(__TTB_BASE);
__set_TTBR0(__TTB_BASE | 0x48);
__ISB();
/* Set up domain access control register
; We set domain 0 to Client and all other domains to No Access.
; All translation table entries specify domain 0 */
__set_DACR(0xFFFFFFFF);
__set_DACR(1);
__ISB();
}

@ -33,10 +33,12 @@
#include <rt_sys.h>
#include "ARMCA32.h"
#include "RTE_Components.h"
#include CMSIS_device_header
#define SERIAL_BASE_ADDRESS (0x13000000)
#define SERIAL_DATA *((volatile unsigned *) SERIAL_BASE_ADDRESS)
@ -57,15 +59,56 @@ void ttywrch (int ch)
stdout_putchar(ch);
}
extern void enable_caches(void);
extern void enable_caches();
/*----------------------------------------------------------------------------
System Initialization
*----------------------------------------------------------------------------*/
void SystemInit (void)
{
/* do not use global variables because this function is called before
reaching pre-main. RW section may be overwritten afterwards. */
// Invalidate entire Unified TLB
__set_TLBIALL(0);
// Invalidate entire branch predictor array
__set_BPIALL(0);
__DSB();
__ISB();
// Invalidate instruction cache and flush branch target cache
__set_ICIALLU(0);
__DSB();
__ISB();
// Invalidate data cache
//L1C_InvalidateDCacheAll();
#if ((__FPU_PRESENT == 1) && (__FPU_USED == 1))
// Enable FPU
__FPU_Enable();
#endif
// Create Translation Table
MMU_CreateTranslationTable();
// Enable MMU
MMU_Enable();
// Enable Caches
L1C_EnableCaches();
//L1C_EnableBTAC();
#if (__L2C_PRESENT == 1)
// Enable GIC
//L2C_Enable();
#endif
// IRQ Initialize
//IRQ_Initialize();
enable_caches();
}

@ -1,2 +1,6 @@
function(configure_platform PROJECTNAME ROOT CORE PLATFORMFOLDER)
#if (${CORE} STREQUAL "ARMCA32")
# target_sources(${PROJECTNAME} PRIVATE ${PLATFORMFOLDER}/${CORE}/pagetables.s)
#
#endif()
endfunction()

@ -89,7 +89,7 @@ void initCycleMeasurement()
if (ENABLE_DIVIDER)
value |= 8; // enable "by 64" divider for CCNT.
value |= 16;
//value |= 16;
// program the performance-counter control-register:
__set_CP(15, 0, value, 9, 12, 0);

@ -75,6 +75,7 @@
this->inp1=input1.ptr();
this->inp2=input2.ptr();
this->outp=output.ptr();
break;
case BasicMathsBenchmarksF32::VEC_NEGATE_F32_5:

@ -57,6 +57,14 @@ function(compilerSpecificCompileOptions PROJECTNAME ROOT)
endif()
endif()
if (ARM_CPU STREQUAL "cortex-a32" )
if (NEON OR NEONEXPERIMENTAL)
target_compile_options(${PROJECTNAME} PUBLIC "-mfpu=neon-fp-armv8")
endif()
endif()
if (ARM_CPU STREQUAL "cortex-a7" )
if (NOT (NEON OR NEONEXPERIMENTAL))
target_compile_options(${PROJECTNAME} PUBLIC "-mfpu=vfpv4-d16")

Loading…
Cancel
Save