Apply faster popptr1 to functions and/or use register instead of stack to save accu.
This commit is contained in:
@@ -6,8 +6,8 @@
|
||||
|
||||
.export tosumulax, tosmulax
|
||||
.import mul8x16, mul8x16a ; in mul8.s
|
||||
.import popsreg
|
||||
.importzp sreg, tmp1, ptr4
|
||||
.import popptr1
|
||||
.importzp tmp1, ptr1, ptr4
|
||||
|
||||
|
||||
;---------------------------------------------------------------------------
|
||||
@@ -19,12 +19,12 @@ tosumulax:
|
||||
txa ; High byte zero
|
||||
beq @L3 ; Do 8x16 multiplication if high byte zero
|
||||
stx ptr4+1 ; Save right operand
|
||||
jsr popsreg ; Get left operand
|
||||
jsr popptr1 ; Get left operand (Y=0 by popptr1)
|
||||
|
||||
; Do ptr4:ptr4+1 * sreg:sreg+1 --> AX
|
||||
; Do ptr4:ptr4+1 * ptr1:ptr1+1 --> AX
|
||||
|
||||
lda #0
|
||||
ldx sreg+1 ; Get high byte into register for speed
|
||||
tya ; A = 0
|
||||
ldx ptr1+1 ; check if lhs is 8 bit only
|
||||
beq @L4 ; -> we can do 8x16 after swap
|
||||
sta tmp1
|
||||
ldy #16 ; Number of bits
|
||||
@@ -34,12 +34,12 @@ tosumulax:
|
||||
@L0: bcc @L1
|
||||
|
||||
clc
|
||||
adc sreg
|
||||
pha
|
||||
txa ; hi byte of left op
|
||||
adc ptr1
|
||||
tax
|
||||
lda ptr1+1 ; hi byte of left op
|
||||
adc tmp1
|
||||
sta tmp1
|
||||
pla
|
||||
txa
|
||||
|
||||
@L1: ror tmp1
|
||||
ror a
|
||||
@@ -59,9 +59,9 @@ tosumulax:
|
||||
; If the high byte of rhs is zero, swap the operands and use the 8x16
|
||||
; routine. On entry, A and X are zero
|
||||
|
||||
@L4: ldy sreg ; Save right operand (8 bit)
|
||||
@L4: ldy ptr1 ; Save right operand (8 bit)
|
||||
ldx ptr4 ; Copy left 16 bit operand to right
|
||||
stx sreg
|
||||
stx ptr1
|
||||
ldx ptr4+1 ; Don't store, this is done later
|
||||
sty ptr4 ; Copy low 8 bit of right op to left
|
||||
ldy #8
|
||||
|
||||
Reference in New Issue
Block a user