I just tried, after slightly modifying cordic_calc to return both values [void cordic_calc(float angle, result_t * output)] thus improving the total execution time from 1351usec to 1196
Deku runs in 1562, which is a significant improvement compared to the existing one, but still significantly slower than CORDIC. And, unless I did something wrong, deku_sin129() seems to have a slightly larger error the the current implementation, making CORDIC much more precise. Another minor advantage, is that CORDIC works from -PI to PI, the SimpleFOC ones only from 0 to PI, which could help with minimizing the need for normalization is some cases (?)
Starting...
Initializing CORDIC...
CORDIC initialized.
Timing CORDIC vs stdlib vs SimpleFOC vs Deku Sine and Cosine calculations...
CORDIC:
CORDIC Time (us) for 3217 steps: 1196
Result: 2048.99
SimpleFOC _sin _cos:
SimpleFOC _sin _cos time (us) for 3217 steps: 2112
Result: 2048.97
Deku _sin _cos:
Deku _sin _cos time (us) for 3217 steps: 1562
Result: 2048.98
stdlib sin:
stdlib sin time (us) for 3217 steps: 5364
Result: 2049.00
Comparing accuracy...
RMS difference between CORDIC and stdlib: 0.00000059
RMS difference between SimpleFOC and stdlib: 0.00161161
RMS difference between Deku129 and stdlib: 0.00250501
Test complete.
Once again, I’m running both sin and cos at once and I implemented a deku_cos129() to use the same code for both sin and cos
Thanks for everything else in your reply. Very good points, and if we truly wanted to improve performance when CORDIC is available, probably implementing a custom STM32BLDCMotor class seems the best option, allowing to run some code in parallel with CORDIC, most of the math in q31 and avoiding conversions (e.g. the transforms can be calculated in q31 with no need to convert the sin and cos values back to float)
Adding the code I ran for this test
unsigned short sine_array3[129] = {0, 402, 804, 1206, 1608, 2009, 2411, 2811, 3212, 3612, 4011, 4410, 4808, 5205, 5602, 5998, 6393, 6787, 7180, 7571, 7962, 8351, 8740, 9127, 9512, 9896, 10279, 10660, 11039, 11417, 11793, 12167, 12540, 12910, 13279, 13646, 14010, 14373, 14733, 15091, 15447, 15800, 16151, 16500, 16846, 17190, 17531, 17869, 18205, 18538, 18868, 19195, 19520, 19841, 20160, 20475, 20788, 21097, 21403, 21706, 22006, 22302, 22595, 22884, 23170, 23453, 23732, 24008, 24279, 24548, 24812, 25073, 25330, 25583, 25833, 26078, 26320, 26557, 26791, 27020, 27246, 27467, 27684, 27897, 28106, 28311, 28511, 28707, 28899, 29086, 29269, 29448, 29622, 29792, 29957, 30118, 30274, 30425, 30572, 30715, 30853, 30986, 31114, 31238, 31357, 31471, 31581, 31686, 31786, 31881, 31972, 32058, 32138, 32214, 32286, 32352, 32413, 32470, 32522, 32568, 32610, 32647, 32679, 32706, 32729, 32746, 32758, 32766, 32768};
float deku_sin129(float a)
{
unsigned int i = ((unsigned int)(a * (128 * 8 / _2PI) + 1) >> 1) & 0x1ff;
if (i < 128)
{
return (1 / 32768.0f) * sine_array3[i];
}
else if (i < 256)
{
return (1 / 32768.0f) * sine_array3[256 - i];
}
else if (i < 384)
{
return -(1 / 32768.0f) * sine_array3[-256 + i];
}
else
{
return -(1 / 32768.0f) * sine_array3[512 - i];
}
}
float deku_cos129(float a)
{
float a_sin = a + _PI_2;
a_sin = a_sin > _2PI ? a_sin - _2PI : a_sin;
return deku_sin129(a_sin);
}
#include <Arduino.h>
#include <SimpleFOC.h>
#include "common/foc_utils.h"
#include "stm32g4xx_ll_cordic.h"
#include "stm32g4xx_ll_rcc.h"
#include "stm32g4xx_ll_bus.h"
#include "arm_math.h"
#define PI32f 3.141592f // max precision in (float) due to implementation
typedef struct results
{
float sin;
float cos;
} result_t;
void CORDIC_Config(void)
{
LL_AHB1_GRP1_EnableClock(LL_AHB1_GRP1_PERIPH_CORDIC);
/* Configure CORDIC peripheral */
LL_CORDIC_Config(CORDIC, LL_CORDIC_FUNCTION_COSINE, /* cosine function */
LL_CORDIC_PRECISION_6CYCLES, /* max precision for q1.31 cosine */
LL_CORDIC_SCALE_0, /* no scale */
LL_CORDIC_NBWRITE_1, /* One input data: angle. Second input data (modulus) is 1 after cordic reset */
LL_CORDIC_NBREAD_2, /* Two output data: cosine, then sine */
LL_CORDIC_INSIZE_32BITS, /* q1.31 format for input data */
LL_CORDIC_OUTSIZE_32BITS); /* q1.31 format for output data */
}
void cordic_calc(float angle, result_t * output)
{
/* Write angle and start CORDIC execution */
CORDIC->WDATA = (q31_t)((angle / PI32f) * 0x80000000);
// code here can be executed in parallel with CORDIC with no impact on timing
/* Read cosine */
q31_t cosOutput = (int32_t)CORDIC->RDATA;
// convert q31 result to float
output->cos = (float)cosOutput / (float)0x80000000;
/* Read sine */
q31_t sinOutput = (int32_t)CORDIC->RDATA;
// convert q31 results to float
output->sin = (float)sinOutput / (float)0x80000000;
}
void setup()
{
Serial.begin(115200);
while (!Serial)
;
delay(1000);
Serial.println("Starting...");
Serial.print("Initializing CORDIC... ");
CORDIC_Config();
Serial.println("CORDIC initialized.");
Serial.println();
}
void loop()
{
result_t cordic;
Serial.println("Timing CORDIC vs stdlib vs SimpleFOC vs Deku Sine and Cosine calculations...");
Serial.println();
Serial.println("CORDIC:");
float step = 1 / 1024.0f;
float res = 0.0;
int steps = 0;
long ts = micros();
for (float i = 0.0f; i < _PI; i += step)
{
cordic_calc(i, &cordic);
res += cordic.sin;
res += cordic.cos;
steps++;
}
long ts_end = micros();
Serial.print("CORDIC Time (us) for ");
Serial.print(steps);
Serial.print(" steps: ");
Serial.println(ts_end - ts);
Serial.print("Result: ");
Serial.println(res);
Serial.println();
Serial.println("SimpleFOC _sin _cos:");
steps = 0;
res = 0.0f;
ts = micros();
for (float i = 0.0f; i < _PI; i += step)
{
res += _sin(i);
res += _cos(i);
steps++;
}
ts_end = micros();
Serial.print("SimpleFOC _sin _cos time (us) for ");
Serial.print(steps);
Serial.print(" steps: ");
Serial.println(ts_end - ts);
Serial.print("Result: ");
Serial.println(res);
Serial.println();
Serial.println("Deku _sin _cos:");
steps = 0;
res = 0.0f;
ts = micros();
for (float i = 0.0f; i < _PI; i += step)
{
res += deku_sin129(i);
res += deku_cos129(i);
steps++;
}
ts_end = micros();
Serial.print("Deku _sin _cos time (us) for ");
Serial.print(steps);
Serial.print(" steps: ");
Serial.println(ts_end - ts);
Serial.print("Result: ");
Serial.println(res);
Serial.println();
Serial.println("stdlib sin:");
steps = 0;
res = 0.0f;
ts = micros();
for (float i = 0.0f; i < _PI; i += step)
{
res += sin(i);
res += cos(i);
steps++;
}
ts_end = micros();
Serial.print("stdlib sin time (us) for ");
Serial.print(steps);
Serial.print(" steps: ");
Serial.println(ts_end - ts);
Serial.print("Result: ");
Serial.println(res);
Serial.println();
Serial.println("Comparing accuracy...");
float rmsdiff1 = 0.0f;
float rmsdiff2 = 0.0f;
float rmsdiff3 = 0.0f;
steps = 0;
for (float i = 0.0f; i < _PI; i += step)
{
float diff1 = 0.0f;
float diff2 = 0.0f;
float diff3 = 0.0f;
cordic_calc(i, &cordic);
float res1 = cordic.sin;
float res2 = _sin(i);
float res3 = sin(i);
float res4 = deku_sin129(i);
diff1 = res3 - res1;
if (diff1 > 1.0)
{
Serial.print("CORDIC vs stdlib at i=");
Serial.print(i, 8);
Serial.print(": ");
Serial.println(diff1, 8);
}
diff2 = res3 - res2;
if (diff2 > 1.0)
{
Serial.print("SimFOC vs stdlib at i=");
Serial.print(i, 8);
Serial.print(": ");
Serial.println(diff2, 8);
}
diff3 = res3 - res4;
if (diff2 > 1.0)
{
Serial.print("Deku vs stdlib at i=");
Serial.print(i, 8);
Serial.print(": ");
Serial.println(diff2, 8);
}
rmsdiff1 += diff1 * diff1;
rmsdiff2 += diff2 * diff2;
rmsdiff3 += diff3 * diff3;
steps++;
}
rmsdiff1 = sqrt(rmsdiff1 / steps);
rmsdiff2 = sqrt(rmsdiff2 / steps);
rmsdiff3 = sqrt(rmsdiff3 / steps);
Serial.print("RMS difference between CORDIC and stdlib: ");
Serial.println(rmsdiff1, 8);
Serial.print("RMS difference between SimpleFOC and stdlib: ");
Serial.println(rmsdiff2, 8);
Serial.print("RMS difference between Deku129 and stdlib: ");
Serial.println(rmsdiff3, 8);
Serial.println("Test complete.");
while (1)
;
}