In the follwing code I provide three versions of the function getSeconds().
Version 1 uses a simple cli-assembler instruction (volatile) and volatile restore of SREG and a volatile access of the seconds. Please note, that there is no general memory-barrier here.
Version 2 also uses cli() but with a memory-barrier, so there is no need for special access to seconds.
Version 3 is essentially the same as Version 2 but useful for C-Code.
My question is, if there might be situations where Version 1 is more efficient, since if forces only a reread of seconds in the case it is called, whereas Version 2 (and 3) impose a general memory-barrier and so force all cached register-values to be written to the memory.
#include <avr/cpufunc.h>
#include <util/atomic.h>
#include <stdint.h>
template<typename T>
volatile T& volatile_access(T& v) {
return static_cast<volatile T&>(v);
}
namespace Atomic {
struct DisableInterruptsRestore final {
inline DisableInterruptsRestore() {
cli(); // memory-barrier inside
}
inline ~DisableInterruptsRestore() {
SREG = save; // only-volatile
__asm__ volatile ("" ::: "memory"); // barrier added
}
private:
uint8_t save{SREG};
};
struct DisableInterruptsRestoreNoBarrier final {
inline DisableInterruptsRestoreNoBarrier() {
__asm__ __volatile__ ("cli"); // only volatile, no barrier
}
inline ~DisableInterruptsRestoreNoBarrier() {
SREG = save; // only volatile, no barrier
}
private:
uint8_t save{SREG};
};
}
namespace {
uint32_t seconds;
//V1
uint32_t getSeconds() {
Atomic::DisableInterruptsRestoreNoBarrier di; // volatile
return volatile_access(seconds);// volatile
}
// V2
// uint32_t getSeconds() {
// Atomic::DisableInterruptsRestore di; // is general memory-barrier
// return seconds;
// }
// V3
// uint32_t getSeconds() {
// ATOMIC_BLOCK(ATOMIC_RESTORESTATE) {
// return seconds;
// }
// return 0;
// }
}
ISR(TCA0_CMP1_vect) {
seconds += 1;
}
int main(void) {
while (true) {
if (getSeconds() > 100) {
GPR_GPR0 = 0x01;
}
}
}