Merge pull request #389 from IrgendwerA8/stringimprovements
Optimization of string functions (size & speed).
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
;
|
||||
; 2003-08-20, Ullrich von Bassewitz
|
||||
; 2009-09-13, Christian Krueger -- performance increase (about 20%)
|
||||
; 2009-09-13, Christian Krueger -- performance increase (about 20%), 2013-07-25 improved unrolling
|
||||
; 2015-10-23, Greg King
|
||||
;
|
||||
; void* __fastcall__ memmove (void* dest, const void* src, size_t size);
|
||||
@@ -61,13 +61,10 @@ PageSizeCopy: ; assert Y = 0
|
||||
dec ptr1+1 ; adjust base...
|
||||
dec ptr2+1
|
||||
dey ; in entry case: 0 -> FF
|
||||
lda (ptr1),y ; need to copy this 'intro byte'
|
||||
sta (ptr2),y ; to 'land' later on Y=0! (as a result of the '.repeat'-block!)
|
||||
dey ; FF ->FE
|
||||
@copyBytes:
|
||||
.repeat 2 ; Unroll this a bit to make it faster...
|
||||
lda (ptr1),y
|
||||
sta (ptr2),y
|
||||
.repeat 3 ; unroll this a bit to make it faster...
|
||||
lda (ptr1),y ; important: unrolling three times gives a nice
|
||||
sta (ptr2),y ; 255/3 = 85 loop which ends at 0
|
||||
dey
|
||||
.endrepeat
|
||||
@copyEntry: ; in entry case: 0 -> FF
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
;
|
||||
; Ullrich von Bassewitz, 31.05.1998
|
||||
; Christian Krueger: 2013-Jul-24, minor optimizations
|
||||
;
|
||||
; char* strcat (char* dest, const char* src);
|
||||
;
|
||||
@@ -7,49 +8,44 @@
|
||||
.export _strcat
|
||||
.import popax
|
||||
.importzp ptr1, ptr2, tmp3
|
||||
.macpack cpu
|
||||
|
||||
_strcat:
|
||||
sta ptr1 ; Save src
|
||||
stx ptr1+1
|
||||
jsr popax ; Get dest
|
||||
sta ptr2
|
||||
stx ptr2+1
|
||||
sta tmp3 ; Remember for function return
|
||||
ldy #0
|
||||
sta ptr1 ; Save src
|
||||
stx ptr1+1
|
||||
jsr popax ; Get dest
|
||||
sta tmp3 ; Remember for function return
|
||||
tay
|
||||
.if (.cpu .bitand ::CPU_ISET_65SC02)
|
||||
stz ptr2
|
||||
.else
|
||||
lda #0
|
||||
sta ptr2 ; access from page start, y contains low byte
|
||||
.endif
|
||||
stx ptr2+1
|
||||
|
||||
; find end of dest
|
||||
|
||||
sc1: lda (ptr2),y
|
||||
beq sc2
|
||||
findEndOfDest:
|
||||
lda (ptr2),y
|
||||
beq endOfDestFound
|
||||
iny
|
||||
bne sc1
|
||||
inc ptr2+1
|
||||
bne sc1
|
||||
bne findEndOfDest
|
||||
inc ptr2+1
|
||||
bne findEndOfDest
|
||||
|
||||
; end found, get offset in y into pointer
|
||||
endOfDestFound:
|
||||
sty ptr2 ; advance pointer to last y position
|
||||
ldy #0 ; reset new y-offset
|
||||
|
||||
sc2: tya
|
||||
clc
|
||||
adc ptr2
|
||||
sta ptr2
|
||||
bcc sc3
|
||||
inc ptr2+1
|
||||
|
||||
; copy src
|
||||
|
||||
sc3: ldy #0
|
||||
sc4: lda (ptr1),y
|
||||
sta (ptr2),y
|
||||
beq sc5
|
||||
copyByte:
|
||||
lda (ptr1),y
|
||||
sta (ptr2),y
|
||||
beq done
|
||||
iny
|
||||
bne sc4
|
||||
inc ptr1+1
|
||||
inc ptr2+1
|
||||
bne sc4
|
||||
bne copyByte
|
||||
inc ptr1+1
|
||||
inc ptr2+1
|
||||
bne copyByte ; like bra here
|
||||
|
||||
; done, return pointer to dest
|
||||
|
||||
sc5: lda tmp3 ; X does still contain high byte
|
||||
; return pointer to dest
|
||||
done: lda tmp3 ; X does still contain high byte
|
||||
rts
|
||||
|
||||
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
;
|
||||
; Ullrich von Bassewitz, 31.05.1998
|
||||
; Christian Krueger, 2013-Aug-04, minor optimization
|
||||
;
|
||||
; const char* strchr (const char* s, int c);
|
||||
;
|
||||
@@ -7,42 +8,45 @@
|
||||
.export _strchr
|
||||
.import popax
|
||||
.importzp ptr1, tmp1
|
||||
.macpack cpu
|
||||
|
||||
_strchr:
|
||||
sta tmp1 ; Save c
|
||||
jsr popax ; get s
|
||||
sta ptr1
|
||||
stx ptr1+1
|
||||
ldy #0
|
||||
sta tmp1 ; Save c
|
||||
jsr popax ; get s
|
||||
tay ; low byte of pointer to y
|
||||
stx ptr1+1
|
||||
.if (.cpu .bitand ::CPU_ISET_65SC02)
|
||||
stz ptr1
|
||||
.else
|
||||
lda #0
|
||||
sta ptr1 ; access from page start, y contains low byte
|
||||
.endif
|
||||
|
||||
Loop: lda (ptr1),y ; Get next char
|
||||
beq EOS ; Jump on end of string
|
||||
cmp tmp1 ; Found?
|
||||
beq Found ; Jump if yes
|
||||
Loop: lda (ptr1),y ; Get next char
|
||||
beq EOS ; Jump on end of string
|
||||
cmp tmp1 ; Found?
|
||||
beq Found ; Jump if yes
|
||||
iny
|
||||
bne Loop
|
||||
inc ptr1+1
|
||||
bne Loop ; Branch always
|
||||
bne Loop
|
||||
inc ptr1+1
|
||||
bne Loop ; Branch always
|
||||
|
||||
; End of string. Check if we're searching for the terminating zero
|
||||
|
||||
EOS: lda tmp1 ; Get the char we're searching for
|
||||
bne NotFound ; Jump if not searching for terminator
|
||||
EOS:
|
||||
lda tmp1 ; Get the char we're searching for
|
||||
bne NotFound ; Jump if not searching for terminator
|
||||
|
||||
; Found. Calculate pointer to c.
|
||||
; Found. Set pointer to c.
|
||||
|
||||
Found: ldx ptr1+1 ; Load high byte of pointer
|
||||
tya ; Low byte offset
|
||||
clc
|
||||
adc ptr1
|
||||
bcc Found1
|
||||
inx
|
||||
Found1: rts
|
||||
Found:
|
||||
ldx ptr1+1 ; Load high byte of pointer
|
||||
tya ; low byte is in y
|
||||
rts
|
||||
|
||||
; Not found, return NULL
|
||||
|
||||
NotFound:
|
||||
lda #0
|
||||
lda #0
|
||||
tax
|
||||
rts
|
||||
|
||||
|
||||
@@ -1,54 +1,54 @@
|
||||
;
|
||||
; Ullrich von Bassewitz, 11.06.1998
|
||||
; Christian Krueger: 05-Aug-2013, optimization
|
||||
;
|
||||
; size_t strcspn (const char* s1, const char* s2);
|
||||
;
|
||||
|
||||
.export _strcspn
|
||||
.import popax
|
||||
.importzp ptr1, ptr2, tmp1, tmp2, tmp3
|
||||
.import popax, _strlen
|
||||
.importzp ptr1, ptr2, tmp1, tmp2
|
||||
|
||||
_strcspn:
|
||||
sta ptr2 ; Save s2
|
||||
stx ptr2+1
|
||||
jsr popax ; Get s1
|
||||
sta ptr1
|
||||
stx ptr1+1
|
||||
ldx #0 ; low counter byte
|
||||
stx tmp1 ; high counter byte
|
||||
ldy #$00
|
||||
jsr _strlen ; get length in a/x and transfer s2 to ptr1
|
||||
; Note: It does not make sense to
|
||||
; have more than 255 test chars, so
|
||||
; we don't support a high byte here! (ptr1+1 is
|
||||
; also unchanged in strlen then (important!))
|
||||
; -> the original implementation also
|
||||
; ignored this case
|
||||
|
||||
L1: lda (ptr1),y ; get next char from s1
|
||||
beq L6 ; jump if done
|
||||
sta tmp2 ; save char
|
||||
sta tmp1 ; tmp1 = strlen of test chars
|
||||
jsr popax ; get and save s1
|
||||
sta ptr2 ; to ptr2
|
||||
stx ptr2+1
|
||||
ldx #0 ; low counter byte
|
||||
stx tmp2 ; high counter byte
|
||||
|
||||
loadChar:
|
||||
ldy #0
|
||||
lda (ptr2),y ; get next char from s1
|
||||
beq leave ; handly byte of s1
|
||||
advance:
|
||||
inc ptr2 ; advance string position to test
|
||||
bne check
|
||||
inc ptr2+1
|
||||
dey ; correct next iny (faster/shorter than bne...)
|
||||
|
||||
checkNext:
|
||||
iny
|
||||
bne L2
|
||||
inc ptr1+1
|
||||
L2: sty tmp3 ; save index into s1
|
||||
check: cpy tmp1 ; compare with length of test character string
|
||||
beq endOfTestChars
|
||||
cmp (ptr1),y ; found matching char?
|
||||
bne checkNext
|
||||
|
||||
ldy #0 ; get index into s2
|
||||
L3: lda (ptr2),y ;
|
||||
beq L4 ; jump if done
|
||||
cmp tmp2
|
||||
beq L6
|
||||
iny
|
||||
bne L3
|
||||
|
||||
; The character was not found in s2. Increment the counter and start over
|
||||
|
||||
L4: ldy tmp3 ; reload index
|
||||
inx
|
||||
bne L1
|
||||
inc tmp1
|
||||
bne L1
|
||||
|
||||
; The character was found, or we reached the end of s1. Return count of
|
||||
; characters
|
||||
|
||||
L6: txa ; get low counter byte
|
||||
ldx tmp1 ; get high counter byte
|
||||
leave: txa ; restore position of finding
|
||||
ldx tmp2 ; and return
|
||||
rts
|
||||
|
||||
|
||||
|
||||
endOfTestChars:
|
||||
inx
|
||||
bne loadChar
|
||||
inc tmp2
|
||||
bne loadChar ; like bra...
|
||||
|
||||
|
||||
@@ -1,6 +1,10 @@
|
||||
;
|
||||
; Ullrich von Bassewitz, 31.05.1998
|
||||
;
|
||||
; Note: strspn & strcspn call internally this function and rely on
|
||||
; the usage of only ptr1 here! Keep in mind when appling changes
|
||||
; and check the other implementations too!
|
||||
;
|
||||
; int strlen (const char* s);
|
||||
;
|
||||
|
||||
@@ -23,4 +27,3 @@ L1: lda (ptr1),y
|
||||
|
||||
L9: tya ; get low byte of counter, hi's all set
|
||||
rts
|
||||
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
;
|
||||
; Ullrich von Bassewitz, 31.05.1998
|
||||
; Christian Krueger: 12-Aug-2013, minor optimizations
|
||||
;
|
||||
; char* strncat (char* dest, const char* src, size_t n);
|
||||
;
|
||||
@@ -7,66 +8,68 @@
|
||||
.export _strncat
|
||||
.import popax
|
||||
.importzp ptr1, ptr2, ptr3, tmp1, tmp2
|
||||
|
||||
.macpack cpu
|
||||
|
||||
_strncat:
|
||||
eor #$FF ; one's complement to count upwards
|
||||
sta tmp1
|
||||
txa
|
||||
eor #$FF
|
||||
sta tmp2
|
||||
jsr popax ; get src
|
||||
sta ptr1
|
||||
stx ptr1+1
|
||||
jsr popax ; get dest
|
||||
sta ptr2
|
||||
stx ptr2+1
|
||||
sta ptr3 ; remember for function return
|
||||
stx ptr3+1
|
||||
ldy #0
|
||||
eor #$FF ; one's complement to count upwards
|
||||
sta tmp1
|
||||
txa
|
||||
eor #$FF
|
||||
sta tmp2
|
||||
|
||||
jsr popax ; get src
|
||||
sta ptr1
|
||||
stx ptr1+1
|
||||
|
||||
jsr popax ; get dest
|
||||
sta ptr3 ; remember for function return
|
||||
stx ptr3+1
|
||||
stx ptr2+1
|
||||
tay ; low byte as offset in Y
|
||||
.if (.cpu .bitand ::CPU_ISET_65SC02)
|
||||
stz ptr2
|
||||
.else
|
||||
ldx #0
|
||||
stx ptr2 ; destination on page boundary
|
||||
.endif
|
||||
|
||||
; find end of dest
|
||||
|
||||
L1: lda (ptr2),y
|
||||
beq L2
|
||||
iny
|
||||
bne L1
|
||||
inc ptr2+1
|
||||
bne L1
|
||||
L1: lda (ptr2),y
|
||||
beq L2
|
||||
iny
|
||||
bne L1
|
||||
inc ptr2+1
|
||||
bne L1
|
||||
|
||||
; end found, get offset in y into pointer
|
||||
|
||||
L2: tya
|
||||
clc
|
||||
adc ptr2
|
||||
sta ptr2
|
||||
bcc L3
|
||||
inc ptr2+1
|
||||
; end found, apply offset to dest ptr and reset y
|
||||
L2: sty ptr2
|
||||
|
||||
; copy src. We've put the ones complement of the count into the counter, so
|
||||
; we'll increment the counter on top of the loop
|
||||
|
||||
L3: ldy #0
|
||||
ldx tmp1 ; low counter byte
|
||||
L3: ldy #0
|
||||
ldx tmp1 ; low counter byte
|
||||
|
||||
L4: inx
|
||||
bne L5
|
||||
inc tmp2
|
||||
beq L6 ; jump if done
|
||||
L5: lda (ptr1),y
|
||||
sta (ptr2),y
|
||||
beq L7
|
||||
iny
|
||||
bne L4
|
||||
inc ptr1+1
|
||||
inc ptr2+1
|
||||
bne L4
|
||||
L4: inx
|
||||
bne L5
|
||||
inc tmp2
|
||||
beq L6 ; jump if done
|
||||
L5: lda (ptr1),y
|
||||
sta (ptr2),y
|
||||
beq L7
|
||||
iny
|
||||
bne L4
|
||||
inc ptr1+1
|
||||
inc ptr2+1
|
||||
bne L4
|
||||
|
||||
; done, set the trailing zero and return pointer to dest
|
||||
|
||||
L6: lda #0
|
||||
sta (ptr2),y
|
||||
L7: lda ptr3
|
||||
ldx ptr3+1
|
||||
rts
|
||||
L6: lda #0
|
||||
sta (ptr2),y
|
||||
L7: lda ptr3
|
||||
ldx ptr3+1
|
||||
rts
|
||||
|
||||
|
||||
|
||||
@@ -1,47 +1,41 @@
|
||||
;
|
||||
; Ullrich von Bassewitz, 31.05.1998
|
||||
; Christian Krueger: 2013-Aug-01, optimization
|
||||
;
|
||||
; char* strrchr (const char* s, int c);
|
||||
;
|
||||
|
||||
.export _strrchr
|
||||
.import popax
|
||||
.importzp ptr1, ptr2, tmp1
|
||||
.importzp ptr1, tmp1, tmp2
|
||||
|
||||
_strrchr:
|
||||
sta tmp1 ; Save c
|
||||
jsr popax ; get s
|
||||
sta ptr1
|
||||
stx ptr1+1
|
||||
lda #0 ; function result = NULL
|
||||
sta ptr2
|
||||
sta ptr2+1
|
||||
tay
|
||||
sta tmp1 ; Save c
|
||||
jsr popax ; get s
|
||||
tay ; low byte to y
|
||||
stx ptr1+1
|
||||
ldx #0 ; default function result is NULL, X is high byte...
|
||||
stx tmp2 ; tmp2 is low-byte
|
||||
stx ptr1 ; low-byte of source string is in Y, so clear real one...
|
||||
|
||||
testChar:
|
||||
lda (ptr1),y ; get char
|
||||
beq finished ; jump if end of string
|
||||
cmp tmp1 ; found?
|
||||
bne nextChar ; jump if no
|
||||
|
||||
L1: lda (ptr1),y ; get next char
|
||||
beq L3 ; jump if end of string
|
||||
cmp tmp1 ; found?
|
||||
bne L2 ; jump if no
|
||||
charFound:
|
||||
sty tmp2 ; y has low byte of location, save it
|
||||
ldx ptr1+1 ; x holds high-byte of result
|
||||
|
||||
; Remember a pointer to the character
|
||||
nextChar:
|
||||
iny
|
||||
bne testChar
|
||||
inc ptr1+1
|
||||
bne testChar ; here like bra...
|
||||
|
||||
tya
|
||||
clc
|
||||
adc ptr1
|
||||
sta ptr2
|
||||
lda ptr1+1
|
||||
adc #$00
|
||||
sta ptr2+1
|
||||
; return the pointer to the last occurrence
|
||||
|
||||
; Next char
|
||||
|
||||
L2: iny
|
||||
bne L1
|
||||
inc ptr1+1
|
||||
bne L1 ; jump always
|
||||
|
||||
; Return the pointer to the last occurrence
|
||||
|
||||
L3: lda ptr2
|
||||
ldx ptr2+1
|
||||
finished:
|
||||
lda tmp2 ; high byte in X is already correct...
|
||||
rts
|
||||
|
||||
@@ -1,56 +1,54 @@
|
||||
;
|
||||
; Ullrich von Bassewitz, 11.06.1998
|
||||
; Christian Krueger: 08-Aug-2013, optimization
|
||||
;
|
||||
; size_t strspn (const char* s1, const char* s2);
|
||||
;
|
||||
|
||||
.export _strspn
|
||||
.import popax
|
||||
.importzp ptr1, ptr2, tmp1, tmp2, tmp3
|
||||
.import popax, _strlen
|
||||
.importzp ptr1, ptr2, tmp1, tmp2
|
||||
|
||||
_strspn:
|
||||
sta ptr2 ; Save s2
|
||||
stx ptr2+1
|
||||
jsr popax ; get s1
|
||||
sta ptr1
|
||||
stx ptr1+1
|
||||
ldx #0 ; low counter byte
|
||||
stx tmp1 ; high counter byte
|
||||
ldy #$00
|
||||
jsr _strlen ; get length in a/x and transfer s2 to ptr1
|
||||
; Note: It does not make sense to
|
||||
; have more than 255 test chars, so
|
||||
; we don't support a high byte here! (ptr1+1 is
|
||||
; also unchanged in strlen then (important!))
|
||||
; -> the original implementation also
|
||||
; ignored this case
|
||||
|
||||
L1: lda (ptr1),y ; get next char from s1
|
||||
beq L6 ; jump if done
|
||||
sta tmp2 ; save char
|
||||
sta tmp1 ; tmp1 = strlen of test chars
|
||||
jsr popax ; get and save s1
|
||||
sta ptr2 ; to ptr2
|
||||
stx ptr2+1
|
||||
ldx #0 ; low counter byte
|
||||
stx tmp2 ; high counter byte
|
||||
|
||||
loadChar:
|
||||
ldy #0
|
||||
lda (ptr2),y ; get next char from s1
|
||||
beq leave ; handly byte of s1
|
||||
advance:
|
||||
inc ptr2 ; advance string position to test
|
||||
bne check
|
||||
inc ptr2+1
|
||||
dey ; correct next iny (faster/shorter than bne...)
|
||||
|
||||
checkNext:
|
||||
iny
|
||||
bne L2
|
||||
inc ptr1+1
|
||||
L2: sty tmp3 ; save index into s1
|
||||
check: cpy tmp1 ; compare with length of test character string
|
||||
beq leave
|
||||
cmp (ptr1),y ; found matching char?
|
||||
bne checkNext
|
||||
|
||||
ldy #0 ; get index into s2
|
||||
L3: lda (ptr2),y ;
|
||||
beq L6 ; jump if done
|
||||
cmp tmp2
|
||||
beq L4
|
||||
iny
|
||||
bne L3
|
||||
|
||||
; The character was found in s2. Increment the counter and start over
|
||||
|
||||
L4: ldy tmp3 ; reload index
|
||||
foundTestChar:
|
||||
inx
|
||||
bne L1
|
||||
inc tmp1
|
||||
bne L1
|
||||
bne loadChar
|
||||
inc tmp2
|
||||
bne loadChar ; like bra...
|
||||
|
||||
; The character was not found, or we reached the end of s1. Return count of
|
||||
; characters
|
||||
|
||||
L6: txa ; get low counter byte
|
||||
ldx tmp1 ; get high counter byte
|
||||
leave: txa ; restore position of finding
|
||||
ldx tmp2 ; and return
|
||||
rts
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
;
|
||||
; Ullrich von Bassewitz, 25.10.2000
|
||||
; Christian Krueger, 02-Mar-2017, some bytes saved
|
||||
;
|
||||
; CC65 runtime: Convert int in ax into a long
|
||||
;
|
||||
@@ -9,18 +10,13 @@
|
||||
|
||||
; Convert AX from int to long in EAX
|
||||
|
||||
axlong: ldy #$ff
|
||||
cpx #$80 ; Positive?
|
||||
bcs store ; No, apply $FF
|
||||
|
||||
axulong:
|
||||
ldy #0
|
||||
sty sreg
|
||||
store: sty sreg
|
||||
sty sreg+1
|
||||
rts
|
||||
|
||||
axlong: cpx #$80 ; Positive?
|
||||
bcc axulong ; Yes, handle like unsigned type
|
||||
ldy #$ff
|
||||
sty sreg
|
||||
sty sreg+1
|
||||
rts
|
||||
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user