Post by gtoal on Oct 7, 2019 22:45:06 GMT -5
Yes, Malban, I did use Vide! Thanks for all the advice everyone, and that LDU instead of LEAU is clearly the source of the broken release. Clearly I don't write enough 6809 assembler nowadays! I'll fix it up when I get back to this, probably at the weekend - as I've mentioned earlier (or maybe it was just on the FB group) I'm working on PiTrex right now and putting this aside for a few days.
Meanwhile since y'all are having such good fun with my attempts at embedded assembler, here's another one you can pick apart: I had some code where I'd optimised most of the arithmetic but was left with a divide by 3 that I couldn't get rid of, so I had a look at what sort of code gcc generates for that, both doing it the obvious way and trying some optimisations that would have worked on other systems. Only the pure asm solution was actually fast...
Meanwhile since y'all are having such good fun with my attempts at embedded assembler, here's another one you can pick apart: I had some code where I'd optimised most of the arithmetic but was left with a divide by 3 that I couldn't get rid of, so I had a look at what sort of code gcc generates for that, both doing it the obvious way and trying some optimisations that would have worked on other systems. Only the pure asm solution was actually fast...
#include <vectrex.h>
// what is the most efficient code for an 8 bit unsigned number divided by 3 on gcc6809?
#define macro_divide_by_3(i) ((unsigned int)((((unsigned long)(((unsigned long)i)+1UL))*85UL)>>8UL))
// high is 0 on intel, 1 on 6809
#define HIGH 1
#define LOW 0
static inline unsigned int inline_divide_by_3(unsigned int i) {
union twobytes {
unsigned long sixteenbit;
unsigned int eightbit[2];
} temp;
temp.sixteenbit = ((unsigned long)i+1L)*85L;
return temp.eightbit[LOW];
}
#define asm_divide_by_3(numerator, result) \
asm("\t ldb %0 \n" \
"\t incb \n" \
"\t lda #85 \n" \
"\t mul \n" \
"\t stb %1 \n" \
: /* in */ "=g"(numerator) \
: /* out */ "g"(result) \
: /* clobber */ "a", "b" \
)
int main(void)
{
unsigned int i, r0, r1, r2, r3;
while(1) {
Wait_Recal();
Print_Str_d(0, -70, "HELLO WORLD\x80");
i = 0;
for (;;) {
r0 = i/3;
// ldb 4,s ;, i
// clra ;zero_extendqihi: R:b -> R:d ;,
// std ,s ;,
// ldx #3 ; tmp41,
// pshs x ; tmp41
// ldx 2,s ;,
// jsr _udivhi3
// leas 2,s ;,,
// tfr x,d ;, tmp40
// stb 5,s ;movlsbqihi: R:d -> 5,s ; r0, tmp40
asm_divide_by_3(i, r1);
// ldb 4,s ; i
// incb
// lda #85
// mul
// stb 6,s ; r1
r2 = macro_divide_by_3(i);
// ldb 4,s ;, i
// clra ;zero_extendqihi: R:b -> R:d ;,
// std ,s ;, D.2920
// ldu ,s ;, D.2920
// leax 1,u ; D.2921,,
// stx 2,s ; D.2921,
// ldd 2,s ; tmp43,
// aslb ;
// rola ;
// aslb ;
// rola ;
// std 2,s ; tmp43,
// ldd 2,s ;,
// leax d,x ;,, D.2921
// stx 2,s ;,
// ldd 2,s ; tmp44,
// aslb ;
// rola ;
// aslb ;
// rola ;
// aslb ;
// rola ;
// aslb ;
// rola ;
// addd 2,s; addhi3,3 ; D.2922,
// tfr a,b ;,
// clra ;zero_extendqihi: R:b -> R:d ;,
// stb 7,s ;movlsbqihi: R:d -> 7,s ; r2, D.2923
r3 = inline_divide_by_3(i);
// ldb 4,s ;, i
// jsr _inline_divide_by_3
// stb 8,s ; r3.1, r3
// plus ...
//_inline_divide_by_3:
// pshs u ;
// leas -7,s ;,,
// stb 4,s ; i, i
// ldb 4,s ;, i
// clra ;zero_extendqihi: R:b -> R:d ;,
// std ,s ;, D.2904
// ldu ,s ;, D.2904
// leax 1,u ; D.2905,,
// stx 2,s ; D.2905,
// ldd 2,s ; tmp33,
// aslb ;
// rola ;
// aslb ;
// rola ;
// std 2,s ; tmp33,
// ldd 2,s ;,
// leax d,x ;,, D.2905
// stx 2,s ;,
// ldd 2,s ; tmp34,
// aslb ;
// rola ;
// aslb ;
// rola ;
// aslb ;
// rola ;
// aslb ;
// rola ;
// ldu 2,s ;,
// leax d,u ; D.2906, tmp34,
// stx 5,s ; D.2906, temp.sixteenbit
// ldb 5,s ; D.2907, temp.eightbit
// leas 7,s ;,,
// puls u,pc ;
i += 1;
if (i == 0) break;
}
}
return 0;
}