Merge pull request #657 from IrgendwerA8/VariousSpeedSizeOptimizations
Various speed size optimizations
This commit is contained in:
@@ -6,11 +6,11 @@
|
|||||||
;
|
;
|
||||||
|
|
||||||
.export _ltoa, _ultoa
|
.export _ltoa, _ultoa
|
||||||
.import popax
|
.import popax, popptr1, negeax
|
||||||
.import __hextab, __longminstr
|
.import __hextab, __longminstr
|
||||||
.importzp sreg, ptr1, ptr2, ptr3, tmp1
|
.importzp sreg, ptr1, ptr2, ptr3, tmp1
|
||||||
|
|
||||||
|
.macpack cpu
|
||||||
|
|
||||||
.code
|
.code
|
||||||
|
|
||||||
@@ -19,17 +19,15 @@
|
|||||||
;
|
;
|
||||||
|
|
||||||
dopop: sta tmp1 ; will loose high byte
|
dopop: sta tmp1 ; will loose high byte
|
||||||
jsr popax ; get s
|
jsr popax ; get s to ptr2
|
||||||
sta ptr1
|
|
||||||
stx ptr1+1
|
|
||||||
sta sreg ; save for return
|
|
||||||
stx sreg+1
|
|
||||||
jsr popax ; get low word of value
|
|
||||||
sta ptr2
|
sta ptr2
|
||||||
stx ptr2+1
|
stx ptr2+1
|
||||||
jsr popax ; get high word of value
|
sta ptr3 ; save for return
|
||||||
sta ptr3
|
|
||||||
stx ptr3+1
|
stx ptr3+1
|
||||||
|
jsr popptr1 ; get low word of value to ptr1
|
||||||
|
jsr popax ; get high word of value to sreg
|
||||||
|
sta sreg
|
||||||
|
stx sreg+1
|
||||||
rts
|
rts
|
||||||
|
|
||||||
;
|
;
|
||||||
@@ -41,20 +39,20 @@ _ltoa: jsr dopop ; pop the arguments
|
|||||||
; We must handle $80000000 in a special way, since it is the only negative
|
; We must handle $80000000 in a special way, since it is the only negative
|
||||||
; number that has no positive 32-bit counterpart
|
; number that has no positive 32-bit counterpart
|
||||||
|
|
||||||
ldx ptr3+1 ; get high byte
|
ldx sreg+1 ; get high byte
|
||||||
ldy tmp1 ; get radix
|
ldy tmp1 ; get radix
|
||||||
cpy #10
|
cpy #10
|
||||||
bne ultoa
|
bne ultoa
|
||||||
lda ptr3
|
lda sreg
|
||||||
ora ptr2+1
|
ora ptr1+1
|
||||||
ora ptr2
|
ora ptr1
|
||||||
bne L2
|
bne L2
|
||||||
cpx #$80
|
cpx #$80
|
||||||
bne L2
|
bne L2
|
||||||
|
|
||||||
ldy #11
|
ldy #11
|
||||||
L1: lda __longminstr,y ; copy -2147483648
|
L1: lda __longminstr,y ; copy -2147483648
|
||||||
sta (ptr1),y
|
sta (ptr2),y
|
||||||
dey
|
dey
|
||||||
bpl L1
|
bpl L1
|
||||||
jmp L10
|
jmp L10
|
||||||
@@ -65,29 +63,25 @@ L1: lda __longminstr,y ; copy -2147483648
|
|||||||
L2: txa ; get high byte
|
L2: txa ; get high byte
|
||||||
bpl ultoa
|
bpl ultoa
|
||||||
lda #'-'
|
lda #'-'
|
||||||
ldy #0
|
|
||||||
sta (ptr1),y ; store sign
|
|
||||||
inc ptr1
|
|
||||||
bne L3
|
|
||||||
inc ptr1+1
|
|
||||||
|
|
||||||
L3: lda ptr2 ; negate val
|
.if (.cpu .bitand CPU_ISET_65SC02)
|
||||||
eor #$FF
|
sta (ptr2)
|
||||||
clc
|
.else
|
||||||
adc #$01
|
ldy #0
|
||||||
sta ptr2
|
sta (ptr2),y ; store sign
|
||||||
lda ptr2+1
|
.endif
|
||||||
eor #$FF
|
|
||||||
adc #$00
|
inc ptr2
|
||||||
sta ptr2+1
|
bne L3
|
||||||
lda ptr3
|
inc ptr2+1
|
||||||
eor #$FF
|
|
||||||
adc #$00
|
L3: lda ptr1 ; negate val
|
||||||
sta ptr3
|
ldx ptr1+1
|
||||||
lda ptr3+1
|
|
||||||
eor #$FF
|
jsr negeax
|
||||||
adc #$00
|
|
||||||
sta ptr3+1
|
sta ptr1
|
||||||
|
stx ptr1+1
|
||||||
jmp ultoa
|
jmp ultoa
|
||||||
|
|
||||||
;
|
;
|
||||||
@@ -105,15 +99,15 @@ ultoa: lda #$00
|
|||||||
|
|
||||||
L5: ldy #32 ; 32 bit
|
L5: ldy #32 ; 32 bit
|
||||||
lda #0 ; remainder
|
lda #0 ; remainder
|
||||||
L6: asl ptr2
|
L6: asl ptr1
|
||||||
rol ptr2+1
|
rol ptr1+1
|
||||||
rol ptr3
|
rol sreg
|
||||||
rol ptr3+1
|
rol sreg+1
|
||||||
rol a
|
rol a
|
||||||
cmp tmp1
|
cmp tmp1
|
||||||
bcc L7
|
bcc L7
|
||||||
sbc tmp1
|
sbc tmp1
|
||||||
inc ptr2
|
inc ptr1
|
||||||
L7: dey
|
L7: dey
|
||||||
bne L6
|
bne L6
|
||||||
|
|
||||||
@@ -121,25 +115,25 @@ L7: dey
|
|||||||
lda __hextab,y ; get hex character
|
lda __hextab,y ; get hex character
|
||||||
pha ; save char value on stack
|
pha ; save char value on stack
|
||||||
|
|
||||||
lda ptr2
|
lda ptr1
|
||||||
ora ptr2+1
|
ora ptr1+1
|
||||||
ora ptr3
|
ora sreg
|
||||||
ora ptr3+1
|
ora sreg+1
|
||||||
bne L5
|
bne L5
|
||||||
|
|
||||||
; Get the characters from the stack into the string
|
; Get the characters from the stack into the string
|
||||||
|
|
||||||
ldy #0
|
ldy #0
|
||||||
L9: pla
|
L9: pla
|
||||||
sta (ptr1),y
|
sta (ptr2),y
|
||||||
beq L10 ; jump if sentinel
|
beq L10 ; jump if sentinel
|
||||||
iny
|
iny
|
||||||
bne L9 ; jump always
|
bne L9 ; jump always
|
||||||
|
|
||||||
; Done! Return the target string
|
; Done! Return the target string
|
||||||
|
|
||||||
L10: lda sreg
|
L10: lda ptr3
|
||||||
ldx sreg+1
|
ldx ptr3+1
|
||||||
rts
|
rts
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -6,40 +6,39 @@
|
|||||||
;
|
;
|
||||||
|
|
||||||
.export _strcspn
|
.export _strcspn
|
||||||
.import popax, _strlen
|
.import popptr1, _strlen
|
||||||
.importzp ptr1, ptr2, tmp1, tmp2
|
.importzp ptr1, ptr2, tmp1, tmp2
|
||||||
|
|
||||||
_strcspn:
|
_strcspn:
|
||||||
jsr _strlen ; get length in a/x and transfer s2 to ptr1
|
jsr _strlen ; get length in a/x and transfer s2 to ptr2
|
||||||
; Note: It does not make sense to
|
; Note: It does not make sense to
|
||||||
; have more than 255 test chars, so
|
; have more than 255 test chars, so
|
||||||
; we don't support a high byte here! (ptr1+1 is
|
; we don't support a high byte here! (ptr2+1 is
|
||||||
; also unchanged in strlen then (important!))
|
; also unchanged in strlen then (important!))
|
||||||
; -> the original implementation also
|
; -> the original implementation also
|
||||||
; ignored this case
|
; ignored this case
|
||||||
|
|
||||||
sta tmp1 ; tmp1 = strlen of test chars
|
sta tmp1 ; tmp1 = strlen of test chars
|
||||||
jsr popax ; get and save s1
|
jsr popptr1 ; get and save s1 to ptr1
|
||||||
sta ptr2 ; to ptr2
|
|
||||||
stx ptr2+1
|
|
||||||
ldx #0 ; low counter byte
|
ldx #0 ; low counter byte
|
||||||
stx tmp2 ; high counter byte
|
stx tmp2 ; high counter byte
|
||||||
|
|
||||||
loadChar:
|
loadChar:
|
||||||
ldy #0
|
ldy #0
|
||||||
lda (ptr2),y ; get next char from s1
|
lda (ptr1),y ; get next char from s1
|
||||||
beq leave ; handly byte of s1
|
beq leave ; handly byte of s1
|
||||||
advance:
|
advance:
|
||||||
inc ptr2 ; advance string position to test
|
inc ptr1 ; advance string position to test
|
||||||
bne check
|
bne check
|
||||||
inc ptr2+1
|
inc ptr1+1
|
||||||
dey ; correct next iny (faster/shorter than bne...)
|
dey ; correct next iny (faster/shorter than bne...)
|
||||||
|
|
||||||
checkNext:
|
checkNext:
|
||||||
iny
|
iny
|
||||||
check: cpy tmp1 ; compare with length of test character string
|
check: cpy tmp1 ; compare with length of test character string
|
||||||
beq endOfTestChars
|
beq endOfTestChars
|
||||||
cmp (ptr1),y ; found matching char?
|
cmp (ptr2),y ; found matching char?
|
||||||
bne checkNext
|
bne checkNext
|
||||||
|
|
||||||
leave: txa ; restore position of finding
|
leave: txa ; restore position of finding
|
||||||
|
|||||||
@@ -2,26 +2,26 @@
|
|||||||
; Ullrich von Bassewitz, 31.05.1998
|
; Ullrich von Bassewitz, 31.05.1998
|
||||||
;
|
;
|
||||||
; Note: strspn & strcspn call internally this function and rely on
|
; Note: strspn & strcspn call internally this function and rely on
|
||||||
; the usage of only ptr1 here! Keep in mind when appling changes
|
; the usage of only ptr2 here! Keep in mind when appling changes
|
||||||
; and check the other implementations too!
|
; and check the other implementations too!
|
||||||
;
|
;
|
||||||
; int strlen (const char* s);
|
; int strlen (const char* s);
|
||||||
;
|
;
|
||||||
|
|
||||||
.export _strlen
|
.export _strlen
|
||||||
.importzp ptr1
|
.importzp ptr2
|
||||||
|
|
||||||
_strlen:
|
_strlen:
|
||||||
sta ptr1 ; Save s
|
sta ptr2 ; Save s
|
||||||
stx ptr1+1
|
stx ptr2+1
|
||||||
ldx #0 ; YX used as counter
|
ldx #0 ; YX used as counter
|
||||||
ldy #0
|
ldy #0
|
||||||
|
|
||||||
L1: lda (ptr1),y
|
L1: lda (ptr2),y
|
||||||
beq L9
|
beq L9
|
||||||
iny
|
iny
|
||||||
bne L1
|
bne L1
|
||||||
inc ptr1+1
|
inc ptr2+1
|
||||||
inx
|
inx
|
||||||
bne L1
|
bne L1
|
||||||
|
|
||||||
|
|||||||
@@ -6,40 +6,39 @@
|
|||||||
;
|
;
|
||||||
|
|
||||||
.export _strspn
|
.export _strspn
|
||||||
.import popax, _strlen
|
.import popptr1, _strlen
|
||||||
.importzp ptr1, ptr2, tmp1, tmp2
|
.importzp ptr1, ptr2, tmp1, tmp2
|
||||||
|
|
||||||
_strspn:
|
_strspn:
|
||||||
jsr _strlen ; get length in a/x and transfer s2 to ptr1
|
jsr _strlen ; get length in a/x and transfer s2 to ptr2
|
||||||
; Note: It does not make sense to
|
; Note: It does not make sense to
|
||||||
; have more than 255 test chars, so
|
; have more than 255 test chars, so
|
||||||
; we don't support a high byte here! (ptr1+1 is
|
; we don't support a high byte here! (ptr2+1 is
|
||||||
; also unchanged in strlen then (important!))
|
; also unchanged in strlen then (important!))
|
||||||
; -> the original implementation also
|
; -> the original implementation also
|
||||||
; ignored this case
|
; ignored this case
|
||||||
|
|
||||||
sta tmp1 ; tmp1 = strlen of test chars
|
sta tmp1 ; tmp1 = strlen of test chars
|
||||||
jsr popax ; get and save s1
|
jsr popptr1 ; get and save s1 to ptr1
|
||||||
sta ptr2 ; to ptr2
|
|
||||||
stx ptr2+1
|
|
||||||
ldx #0 ; low counter byte
|
ldx #0 ; low counter byte
|
||||||
stx tmp2 ; high counter byte
|
stx tmp2 ; high counter byte
|
||||||
|
|
||||||
loadChar:
|
loadChar:
|
||||||
ldy #0
|
ldy #0
|
||||||
lda (ptr2),y ; get next char from s1
|
lda (ptr1),y ; get next char from s1
|
||||||
beq leave ; handly byte of s1
|
beq leave ; handly byte of s1
|
||||||
advance:
|
advance:
|
||||||
inc ptr2 ; advance string position to test
|
inc ptr1 ; advance string position to test
|
||||||
bne check
|
bne check
|
||||||
inc ptr2+1
|
inc ptr1+1
|
||||||
dey ; correct next iny (faster/shorter than bne...)
|
dey ; correct next iny (faster/shorter than bne...)
|
||||||
|
|
||||||
checkNext:
|
checkNext:
|
||||||
iny
|
iny
|
||||||
check: cpy tmp1 ; compare with length of test character string
|
check: cpy tmp1 ; compare with length of test character string
|
||||||
beq leave
|
beq leave
|
||||||
cmp (ptr1),y ; found matching char?
|
cmp (ptr2),y ; found matching char?
|
||||||
bne checkNext
|
bne checkNext
|
||||||
|
|
||||||
foundTestChar:
|
foundTestChar:
|
||||||
|
|||||||
@@ -6,29 +6,30 @@
|
|||||||
|
|
||||||
.export _screensize
|
.export _screensize
|
||||||
|
|
||||||
.import popsreg
|
.import popptr1
|
||||||
.import screensize
|
.import screensize
|
||||||
.importzp ptr1, sreg
|
.importzp ptr1, ptr2
|
||||||
|
|
||||||
|
.macpack cpu
|
||||||
|
|
||||||
.proc _screensize
|
.proc _screensize
|
||||||
|
|
||||||
sta ptr1 ; Store the y pointer
|
sta ptr2 ; Store the y pointer
|
||||||
stx ptr1+1
|
stx ptr2+1
|
||||||
jsr popsreg ; Get the x pointer into sreg
|
jsr popptr1 ; Get the x pointer into ptr1
|
||||||
jsr screensize ; Get screensize into X/Y
|
jsr screensize ; Get screensize into X/Y
|
||||||
tya ; Get Y size into A
|
tya ; Get Y size into A
|
||||||
|
|
||||||
.IFP02
|
.if (.cpu .bitand ::CPU_ISET_65SC02)
|
||||||
ldy #0
|
sta (ptr2)
|
||||||
sta (ptr1),y
|
|
||||||
txa
|
txa
|
||||||
sta (sreg),y
|
|
||||||
.ELSE
|
|
||||||
sta (ptr1)
|
sta (ptr1)
|
||||||
|
.else
|
||||||
|
ldy #0
|
||||||
|
sta (ptr2),y
|
||||||
txa
|
txa
|
||||||
sta (sreg)
|
sta (ptr1),y
|
||||||
.ENDIF
|
.endif
|
||||||
|
|
||||||
rts
|
rts
|
||||||
|
|
||||||
.endproc
|
.endproc
|
||||||
|
|||||||
@@ -6,8 +6,8 @@
|
|||||||
|
|
||||||
.export tosumulax, tosmulax
|
.export tosumulax, tosmulax
|
||||||
.import mul8x16, mul8x16a ; in mul8.s
|
.import mul8x16, mul8x16a ; in mul8.s
|
||||||
.import popsreg
|
.import popptr1
|
||||||
.importzp sreg, tmp1, ptr4
|
.importzp tmp1, ptr1, ptr4
|
||||||
|
|
||||||
|
|
||||||
;---------------------------------------------------------------------------
|
;---------------------------------------------------------------------------
|
||||||
@@ -19,12 +19,12 @@ tosumulax:
|
|||||||
txa ; High byte zero
|
txa ; High byte zero
|
||||||
beq @L3 ; Do 8x16 multiplication if high byte zero
|
beq @L3 ; Do 8x16 multiplication if high byte zero
|
||||||
stx ptr4+1 ; Save right operand
|
stx ptr4+1 ; Save right operand
|
||||||
jsr popsreg ; Get left operand
|
jsr popptr1 ; Get left operand (Y=0 by popptr1)
|
||||||
|
|
||||||
; Do ptr4:ptr4+1 * sreg:sreg+1 --> AX
|
; Do ptr4:ptr4+1 * ptr1:ptr1+1 --> AX
|
||||||
|
|
||||||
lda #0
|
tya ; A = 0
|
||||||
ldx sreg+1 ; Get high byte into register for speed
|
ldx ptr1+1 ; check if lhs is 8 bit only
|
||||||
beq @L4 ; -> we can do 8x16 after swap
|
beq @L4 ; -> we can do 8x16 after swap
|
||||||
sta tmp1
|
sta tmp1
|
||||||
ldy #16 ; Number of bits
|
ldy #16 ; Number of bits
|
||||||
@@ -34,12 +34,12 @@ tosumulax:
|
|||||||
@L0: bcc @L1
|
@L0: bcc @L1
|
||||||
|
|
||||||
clc
|
clc
|
||||||
adc sreg
|
adc ptr1
|
||||||
pha
|
tax
|
||||||
txa ; hi byte of left op
|
lda ptr1+1 ; hi byte of left op
|
||||||
adc tmp1
|
adc tmp1
|
||||||
sta tmp1
|
sta tmp1
|
||||||
pla
|
txa
|
||||||
|
|
||||||
@L1: ror tmp1
|
@L1: ror tmp1
|
||||||
ror a
|
ror a
|
||||||
@@ -59,10 +59,11 @@ tosumulax:
|
|||||||
; If the high byte of rhs is zero, swap the operands and use the 8x16
|
; If the high byte of rhs is zero, swap the operands and use the 8x16
|
||||||
; routine. On entry, A and X are zero
|
; routine. On entry, A and X are zero
|
||||||
|
|
||||||
@L4: ldy sreg ; Save right operand (8 bit)
|
@L4: ldy ptr1 ; Save right operand (8 bit)
|
||||||
ldx ptr4 ; Copy left 16 bit operand to right
|
ldx ptr4 ; Copy left 16 bit operand to right
|
||||||
stx sreg
|
stx ptr1
|
||||||
ldx ptr4+1 ; Don't store, this is done later
|
ldx ptr4+1 ; swap high-byte too
|
||||||
|
stx ptr1+1
|
||||||
sty ptr4 ; Copy low 8 bit of right op to left
|
sty ptr4 ; Copy low 8 bit of right op to left
|
||||||
ldy #8
|
ldy #8
|
||||||
jmp mul8x16a
|
jmp mul8x16a
|
||||||
|
|||||||
@@ -6,8 +6,8 @@
|
|||||||
|
|
||||||
.export tosumula0, tosmula0
|
.export tosumula0, tosmula0
|
||||||
.export mul8x16, mul8x16a
|
.export mul8x16, mul8x16a
|
||||||
.import popsreg
|
.import popptr1
|
||||||
.importzp sreg, ptr4
|
.importzp ptr1, ptr4
|
||||||
|
|
||||||
|
|
||||||
;---------------------------------------------------------------------------
|
;---------------------------------------------------------------------------
|
||||||
@@ -16,11 +16,11 @@
|
|||||||
tosmula0:
|
tosmula0:
|
||||||
tosumula0:
|
tosumula0:
|
||||||
sta ptr4
|
sta ptr4
|
||||||
mul8x16:jsr popsreg ; Get left operand
|
mul8x16:jsr popptr1 ; Get left operand (Y=0 by popptr1)
|
||||||
|
|
||||||
lda #0 ; Clear byte 1
|
tya ; Clear byte 1
|
||||||
ldy #8 ; Number of bits
|
ldy #8 ; Number of bits
|
||||||
ldx sreg+1 ; Get into register for speed
|
ldx ptr1+1 ; check if lhs is 8 bit only
|
||||||
beq mul8x8 ; Do 8x8 multiplication if high byte zero
|
beq mul8x8 ; Do 8x8 multiplication if high byte zero
|
||||||
mul8x16a:
|
mul8x16a:
|
||||||
sta ptr4+1 ; Clear byte 2
|
sta ptr4+1 ; Clear byte 2
|
||||||
@@ -29,12 +29,12 @@ mul8x16a:
|
|||||||
@L0: bcc @L1
|
@L0: bcc @L1
|
||||||
|
|
||||||
clc
|
clc
|
||||||
adc sreg
|
adc ptr1
|
||||||
pha
|
tax
|
||||||
txa ; hi byte of left op
|
lda ptr1+1 ; hi byte of left op
|
||||||
adc ptr4+1
|
adc ptr4+1
|
||||||
sta ptr4+1
|
sta ptr4+1
|
||||||
pla
|
txa
|
||||||
|
|
||||||
@L1: ror ptr4+1
|
@L1: ror ptr4+1
|
||||||
ror a
|
ror a
|
||||||
@@ -52,7 +52,7 @@ mul8x8:
|
|||||||
lsr ptr4 ; Get first bit into carry
|
lsr ptr4 ; Get first bit into carry
|
||||||
@L0: bcc @L1
|
@L0: bcc @L1
|
||||||
clc
|
clc
|
||||||
adc sreg
|
adc ptr1
|
||||||
@L1: ror
|
@L1: ror
|
||||||
ror ptr4
|
ror ptr4
|
||||||
dey
|
dey
|
||||||
|
|||||||
@@ -3,6 +3,7 @@
|
|||||||
;
|
;
|
||||||
; CC65 runtime: Multiply the primary register by 5
|
; CC65 runtime: Multiply the primary register by 5
|
||||||
;
|
;
|
||||||
|
; Don't touch the Y-register here, the optimizer relies on it!
|
||||||
|
|
||||||
.export mulax5
|
.export mulax5
|
||||||
.importzp ptr1
|
.importzp ptr1
|
||||||
|
|||||||
@@ -4,6 +4,7 @@
|
|||||||
;
|
;
|
||||||
; CC65 runtime: Multiply the primary register by 7
|
; CC65 runtime: Multiply the primary register by 7
|
||||||
;
|
;
|
||||||
|
; Don't touch the Y-register here, the optimizer relies on it!
|
||||||
|
|
||||||
.export mulax7
|
.export mulax7
|
||||||
.importzp ptr1
|
.importzp ptr1
|
||||||
|
|||||||
@@ -4,6 +4,7 @@
|
|||||||
;
|
;
|
||||||
; CC65 runtime: Multiply the primary register by 9
|
; CC65 runtime: Multiply the primary register by 9
|
||||||
;
|
;
|
||||||
|
; Don't touch the Y-register here, the optimizer relies on it!
|
||||||
|
|
||||||
.export mulax9
|
.export mulax9
|
||||||
.importzp ptr1
|
.importzp ptr1
|
||||||
|
|||||||
Reference in New Issue
Block a user