Playing around some more with this…
The performance winner so far on the STM32G4 is this:
unsigned short sine_array4[257] = {0, 201, 402, 603, 804, 1005, 1206, 1407, 1608, 1809, 2009, 2210, 2411, 2611, 2811, 3012, 3212, 3412, 3612, 3812, 4011, 4211, 4410, 4609, 4808, 5007, 5205, 5404, 5602, 5800, 5998, 6195, 6393, 6590, 6787, 6983, 7180, 7376, 7571, 7767, 7962, 8157, 8351, 8546, 8740, 8933, 9127, 9319, 9512, 9704, 9896, 10088, 10279, 10469, 10660, 10850, 11039, 11228, 11417, 11605, 11793, 11980, 12167, 12354, 12540, 12725, 12910, 13095, 13279, 13463, 13646, 13828, 14010, 14192, 14373, 14553, 14733, 14912, 15091, 15269, 15447, 15624, 15800, 15976, 16151, 16326, 16500, 16673, 16846, 17018, 17190, 17361, 17531, 17700, 17869, 18037, 18205, 18372, 18538, 18703, 18868, 19032, 19195, 19358, 19520, 19681, 19841, 20001, 20160, 20318, 20475, 20632, 20788, 20943, 21097, 21251, 21403, 21555, 21706, 21856, 22006, 22154, 22302, 22449, 22595, 22740, 22884, 23028, 23170, 23312, 23453, 23593, 23732, 23870, 24008, 24144, 24279, 24414, 24548, 24680, 24812, 24943, 25073, 25202, 25330, 25457, 25583, 25708, 25833, 25956, 26078, 26199, 26320, 26439, 26557, 26674, 26791, 26906, 27020, 27133, 27246, 27357, 27467, 27576, 27684, 27791, 27897, 28002, 28106, 28209, 28311, 28411, 28511, 28610, 28707, 28803, 28899, 28993, 29086, 29178, 29269, 29359, 29448, 29535, 29622, 29707, 29792, 29875, 29957, 30038, 30118, 30196, 30274, 30350, 30425, 30499, 30572, 30644, 30715, 30784, 30853, 30920, 30986, 31050, 31114, 31177, 31238, 31298, 31357, 31415, 31471, 31527, 31581, 31634, 31686, 31737, 31786, 31834, 31881, 31927, 31972, 32015, 32058, 32099, 32138, 32177, 32214, 32251, 32286, 32319, 32352, 32383, 32413, 32442, 32470, 32496, 32522, 32546, 32568, 32590, 32610, 32629, 32647, 32664, 32679, 32693, 32706, 32718, 32729, 32738, 32746, 32753, 32758, 32762, 32766, 32767, 32768};
float deku_sin257(float a) {
unsigned int i = ((unsigned int)(a * (256*8 /_2PI) + 1) >> 1) & 0x3ff;
if (i < 256) {
return (1/32768.0f)*sine_array4[i];
}
else if(i < 512) {
return (1/32768.0f)*sine_array4[512 - i];
}
else if(i < 768) {
return -(1/32768.0f)*sine_array4[-512 + i];
}
else {
return -(1/32768.0f)*sine_array4[1024 - i];
}
}
A close second is a pure float table:
float f_sine_array[257] = { 0.0f, 0.006135884649154475f, 0.012271538285719925f, 0.01840672990580482f, 0.024541228522912288f, 0.030674803176636626f, 0.03680722294135883f, 0.04293825693494082f, 0.049067674327418015f, 0.05519524434968994f, 0.06132073630220858f, 0.06744391956366405f, 0.07356456359966743f, 0.07968243797143013f, 0.0857973123444399f, 0.09190895649713272f, 0.0980171403295606f, 0.10412163387205459f, 0.11022220729388306f, 0.11631863091190475f, 0.1224106751992162f, 0.12849811079379317f, 0.13458070850712617f, 0.1406582393328492f, 0.14673047445536175f, 0.15279718525844344f, 0.15885814333386145f, 0.16491312048996992f, 0.17096188876030122f, 0.17700422041214875f, 0.18303988795514095f, 0.1890686641498062f, 0.19509032201612825f, 0.2011046348420919f, 0.20711137619221856f, 0.21311031991609136f, 0.2191012401568698f, 0.22508391135979283f, 0.2310581082806711f, 0.2370236059943672f, 0.24298017990326387f, 0.24892760574572015f, 0.25486565960451457f, 0.2607941179152755f, 0.26671275747489837f, 0.272621355449949f, 0.27851968938505306f, 0.2844075372112719f, 0.29028467725446233f, 0.2961508882436238f, 0.3020059493192281f, 0.30784964004153487f, 0.3136817403988915f, 0.3195020308160157f, 0.3253102921622629f, 0.33110630575987643f, 0.33688985339222005f, 0.3426607173119944f, 0.34841868024943456f, 0.35416352542049034f, 0.3598950365349881f, 0.36561299780477385f, 0.37131719395183754f, 0.37700741021641826f, 0.3826834323650898f, 0.38834504669882625f, 0.3939920400610481f, 0.3996241998456468f, 0.40524131400498986f, 0.4108431710579039f, 0.41642956009763715f, 0.4220002707997997f, 0.4275550934302821f, 0.43309381885315196f, 0.43861623853852766f, 0.4441221445704292f, 0.44961132965460654f, 0.45508358712634384f, 0.46053871095824f, 0.4659764957679662f, 0.47139673682599764f, 0.4767992300633221f, 0.4821837720791227f, 0.487550160148436f, 0.49289819222978404f, 0.49822766697278187f, 0.5035383837257176f, 0.508830142543107f, 0.5141027441932217f, 0.5193559901655896f, 0.524589682678469f, 0.5298036246862946f, 0.5349976198870972f, 0.5401714727298929f, 0.5453249884220465f, 0.5504579729366048f, 0.5555702330196022f, 0.560661576197336f, 0.5657318107836131f, 0.5707807458869673f, 0.5758081914178453f, 0.5808139580957645f, 0.5857978574564389f, 0.5907597018588742f, 0.5956993044924334f, 0.600616479383869f, 0.6055110414043255f, 0.6103828062763095f, 0.6152315905806268f, 0.6200572117632891f, 0.6248594881423863f, 0.629638238914927f, 0.6343932841636455f, 0.6391244448637757f, 0.6438315428897914f, 0.6485144010221124f, 0.6531728429537768f, 0.6578066932970786f, 0.6624157775901718f, 0.6669999223036375f, 0.6715589548470183f, 0.6760927035753159f, 0.680600997795453f, 0.6850836677727004f, 0.6895405447370668f, 0.6939714608896539f, 0.6983762494089729f, 0.7027547444572253f, 0.7071067811865475f, 0.7114321957452163f, 0.7157308252838186f, 0.7200025079613817f, 0.7242470829514669f, 0.7284643904482252f, 0.7326542716724127f, 0.7368165688773698f, 0.740951125354959f, 0.745057785441466f, 0.7491363945234593f, 0.7531867990436124f, 0.7572088465064845f, 0.7612023854842618f, 0.7651672656224588f, 0.7691033376455796f, 0.7730104533627369f, 0.7768884656732324f, 0.7807372285720944f, 0.7845565971555752f, 0.7883464276266062f, 0.7921065773002123f, 0.7958369046088835f, 0.799537269107905f, 0.8032075314806448f, 0.8068475535437992f, 0.8104571982525948f, 0.8140363297059483f, 0.8175848131515837f, 0.8211025149911046f, 0.8245893027850253f, 0.8280450452577557f, 0.8314696123025451f, 0.83486287498638f, 0.838224705554838f, 0.8415549774368983f, 0.844853565249707f, 0.8481203448032971f, 0.8513551931052652f, 0.8545579883654005f, 0.8577286100002721f, 0.8608669386377672f, 0.8639728561215867f, 0.8670462455156926f, 0.8700869911087113f, 0.87309497841829f, 0.8760700941954065f, 0.8790122264286334f, 0.8819212643483549f, 0.8847970984309378f, 0.8876396204028539f, 0.8904487232447579f, 0.8932243011955153f, 0.8959662497561851f, 0.8986744656939538f, 0.901348847046022f, 0.9039892931234433f, 0.9065957045149153f, 0.9091679830905224f, 0.9117060320054299f, 0.9142097557035307f, 0.9166790599210427f, 0.9191138516900578f, 0.9215140393420419f, 0.9238795325112867f, 0.9262102421383114f, 0.9285060804732155f, 0.9307669610789837f, 0.9329927988347388f, 0.9351835099389475f, 0.937339011912575f, 0.9394592236021899f, 0.9415440651830208f, 0.9435934581619604f, 0.9456073253805213f, 0.9475855910177411f, 0.9495281805930367f, 0.9514350209690083f, 0.9533060403541938f, 0.9551411683057707f, 0.9569403357322089f, 0.9587034748958716f, 0.9604305194155658f, 0.9621214042690416f, 0.9637760657954398f, 0.9653944416976894f, 0.9669764710448521f, 0.9685220942744173f, 0.970031253194544f, 0.9715038909862518f, 0.9729399522055601f, 0.9743393827855759f, 0.9757021300385286f, 0.9770281426577544f, 0.9783173707196277f, 0.9795697656854405f, 0.9807852804032304f, 0.9819638691095552f, 0.9831054874312163f, 0.984210092386929f, 0.9852776423889412f, 0.9863080972445987f, 0.9873014181578584f, 0.9882575677307495f, 0.989176509964781f, 0.9900582102622971f, 0.99090263542778f, 0.9917097536690995f, 0.99247953459871f, 0.9932119492347945f, 0.9939069700023561f, 0.9945645707342554f, 0.9951847266721968f, 0.9957674144676598f, 0.996312612182778f, 0.9968202992911657f, 0.9972904566786902f, 0.9977230666441916f, 0.9981181129001492f, 0.9984755805732948f, 0.9987954562051724f, 0.9990777277526454f, 0.9993223845883495f, 0.9995294175010931f, 0.9996988186962042f, 0.9998305817958234f, 0.9999247018391445f, 0.9999811752826011f, 1.0 };
float float_sine257(float a) {
unsigned int i = ((unsigned int)(a * (256*8 /_2PI) + 1) >> 1) & 0x3ff;
if (i < 256) {
return f_sine_array[i];
}
else if(i < 512) {
return f_sine_array[512 - i];
}
else if(i < 768) {
return -f_sine_array[-512 + i];
}
else {
return -f_sine_array[1024 - i];
}
}
Although it is not quite clear to me why.
The LUT with 129 elements is unfortunately less accurate than the others (as expected):
Starting...
Initializing CORDIC...
CORDIC initialized.
Timing CORDIC vs stdlib sin vs SimpleFOC Sine calculations...
CORDIC:
CORDIC Time (us) for 3217 steps: 6574
Result: 2048.00
SimpleFOC _sin:
SimpleFOC _sin time (us) for 3217 steps: 927
Result: 2047.98
stdlib sin:
stdlib sin time (us) for 3217 steps: 2713
Result: 2048.00
Deku sin:
Deku sin time (us) for 3217 steps: 793
Result: 2047.94
SimpleFOC sin + normalizeAngle:
SimpleFOC + normalizeAngle time (us) for 3217 steps: 3150
Result: 2047.98
Float257 Sine:
Float257 Sine time (us) for 3217 steps: 719
Result: 2048.00
Deku257 Sine:
Deku257 Sine time (us) for 3217 steps: 676
Result: 2048.00
Deku129 Sine:
Deku129 Sine time (us) for 3217 steps: 736
Result: 2047.99
Comparing accuracy...
RMS difference between CORDIC and stdlib: 0.00000046
RMS difference between SimpleFOC and stdlib: 0.00161161
RMS difference between Deku256 Sine and stdlib: 0.00125757
RMS difference between Float Sine and stdlib: 0.00125250
RMS difference between Deku257 Sine and stdlib: 0.00125253
RMS difference between Deku129 Sine and stdlib: 0.00250501
Test complete.
I’ll test some more MCUs tonight if I can.