Merge pull request #389 from IrgendwerA8/stringimprovements

Optimization of string functions (size & speed).
This commit is contained in:
Oliver Schmidt
2017-04-04 15:39:37 +02:00
committed by GitHub
20 changed files with 607 additions and 267 deletions

View File

@@ -1,6 +1,6 @@
;
; 2003-08-20, Ullrich von Bassewitz
; 2009-09-13, Christian Krueger -- performance increase (about 20%)
; 2009-09-13, Christian Krueger -- performance increase (about 20%), 2013-07-25 improved unrolling
; 2015-10-23, Greg King
;
; void* __fastcall__ memmove (void* dest, const void* src, size_t size);
@@ -61,13 +61,10 @@ PageSizeCopy: ; assert Y = 0
dec ptr1+1 ; adjust base...
dec ptr2+1
dey ; in entry case: 0 -> FF
lda (ptr1),y ; need to copy this 'intro byte'
sta (ptr2),y ; to 'land' later on Y=0! (as a result of the '.repeat'-block!)
dey ; FF ->FE
@copyBytes:
.repeat 2 ; Unroll this a bit to make it faster...
lda (ptr1),y
sta (ptr2),y
.repeat 3 ; unroll this a bit to make it faster...
lda (ptr1),y ; important: unrolling three times gives a nice
sta (ptr2),y ; 255/3 = 85 loop which ends at 0
dey
.endrepeat
@copyEntry: ; in entry case: 0 -> FF

View File

@@ -1,5 +1,6 @@
;
; Ullrich von Bassewitz, 31.05.1998
; Christian Krueger: 2013-Jul-24, minor optimizations
;
; char* strcat (char* dest, const char* src);
;
@@ -7,49 +8,44 @@
.export _strcat
.import popax
.importzp ptr1, ptr2, tmp3
.macpack cpu
_strcat:
sta ptr1 ; Save src
stx ptr1+1
jsr popax ; Get dest
sta ptr2
stx ptr2+1
sta tmp3 ; Remember for function return
ldy #0
sta ptr1 ; Save src
stx ptr1+1
jsr popax ; Get dest
sta tmp3 ; Remember for function return
tay
.if (.cpu .bitand ::CPU_ISET_65SC02)
stz ptr2
.else
lda #0
sta ptr2 ; access from page start, y contains low byte
.endif
stx ptr2+1
; find end of dest
sc1: lda (ptr2),y
beq sc2
findEndOfDest:
lda (ptr2),y
beq endOfDestFound
iny
bne sc1
inc ptr2+1
bne sc1
bne findEndOfDest
inc ptr2+1
bne findEndOfDest
; end found, get offset in y into pointer
endOfDestFound:
sty ptr2 ; advance pointer to last y position
ldy #0 ; reset new y-offset
sc2: tya
clc
adc ptr2
sta ptr2
bcc sc3
inc ptr2+1
; copy src
sc3: ldy #0
sc4: lda (ptr1),y
sta (ptr2),y
beq sc5
copyByte:
lda (ptr1),y
sta (ptr2),y
beq done
iny
bne sc4
inc ptr1+1
inc ptr2+1
bne sc4
bne copyByte
inc ptr1+1
inc ptr2+1
bne copyByte ; like bra here
; done, return pointer to dest
sc5: lda tmp3 ; X does still contain high byte
; return pointer to dest
done: lda tmp3 ; X does still contain high byte
rts

View File

@@ -1,5 +1,6 @@
;
; Ullrich von Bassewitz, 31.05.1998
; Christian Krueger, 2013-Aug-04, minor optimization
;
; const char* strchr (const char* s, int c);
;
@@ -7,42 +8,45 @@
.export _strchr
.import popax
.importzp ptr1, tmp1
.macpack cpu
_strchr:
sta tmp1 ; Save c
jsr popax ; get s
sta ptr1
stx ptr1+1
ldy #0
sta tmp1 ; Save c
jsr popax ; get s
tay ; low byte of pointer to y
stx ptr1+1
.if (.cpu .bitand ::CPU_ISET_65SC02)
stz ptr1
.else
lda #0
sta ptr1 ; access from page start, y contains low byte
.endif
Loop: lda (ptr1),y ; Get next char
beq EOS ; Jump on end of string
cmp tmp1 ; Found?
beq Found ; Jump if yes
Loop: lda (ptr1),y ; Get next char
beq EOS ; Jump on end of string
cmp tmp1 ; Found?
beq Found ; Jump if yes
iny
bne Loop
inc ptr1+1
bne Loop ; Branch always
bne Loop
inc ptr1+1
bne Loop ; Branch always
; End of string. Check if we're searching for the terminating zero
EOS: lda tmp1 ; Get the char we're searching for
bne NotFound ; Jump if not searching for terminator
EOS:
lda tmp1 ; Get the char we're searching for
bne NotFound ; Jump if not searching for terminator
; Found. Calculate pointer to c.
; Found. Set pointer to c.
Found: ldx ptr1+1 ; Load high byte of pointer
tya ; Low byte offset
clc
adc ptr1
bcc Found1
inx
Found1: rts
Found:
ldx ptr1+1 ; Load high byte of pointer
tya ; low byte is in y
rts
; Not found, return NULL
NotFound:
lda #0
lda #0
tax
rts

View File

@@ -1,54 +1,54 @@
;
; Ullrich von Bassewitz, 11.06.1998
; Christian Krueger: 05-Aug-2013, optimization
;
; size_t strcspn (const char* s1, const char* s2);
;
.export _strcspn
.import popax
.importzp ptr1, ptr2, tmp1, tmp2, tmp3
.import popax, _strlen
.importzp ptr1, ptr2, tmp1, tmp2
_strcspn:
sta ptr2 ; Save s2
stx ptr2+1
jsr popax ; Get s1
sta ptr1
stx ptr1+1
ldx #0 ; low counter byte
stx tmp1 ; high counter byte
ldy #$00
jsr _strlen ; get length in a/x and transfer s2 to ptr1
; Note: It does not make sense to
; have more than 255 test chars, so
; we don't support a high byte here! (ptr1+1 is
; also unchanged in strlen then (important!))
; -> the original implementation also
; ignored this case
L1: lda (ptr1),y ; get next char from s1
beq L6 ; jump if done
sta tmp2 ; save char
sta tmp1 ; tmp1 = strlen of test chars
jsr popax ; get and save s1
sta ptr2 ; to ptr2
stx ptr2+1
ldx #0 ; low counter byte
stx tmp2 ; high counter byte
loadChar:
ldy #0
lda (ptr2),y ; get next char from s1
beq leave ; handly byte of s1
advance:
inc ptr2 ; advance string position to test
bne check
inc ptr2+1
dey ; correct next iny (faster/shorter than bne...)
checkNext:
iny
bne L2
inc ptr1+1
L2: sty tmp3 ; save index into s1
check: cpy tmp1 ; compare with length of test character string
beq endOfTestChars
cmp (ptr1),y ; found matching char?
bne checkNext
ldy #0 ; get index into s2
L3: lda (ptr2),y ;
beq L4 ; jump if done
cmp tmp2
beq L6
iny
bne L3
; The character was not found in s2. Increment the counter and start over
L4: ldy tmp3 ; reload index
inx
bne L1
inc tmp1
bne L1
; The character was found, or we reached the end of s1. Return count of
; characters
L6: txa ; get low counter byte
ldx tmp1 ; get high counter byte
leave: txa ; restore position of finding
ldx tmp2 ; and return
rts
endOfTestChars:
inx
bne loadChar
inc tmp2
bne loadChar ; like bra...

View File

@@ -1,6 +1,10 @@
;
; Ullrich von Bassewitz, 31.05.1998
;
; Note: strspn & strcspn call internally this function and rely on
; the usage of only ptr1 here! Keep in mind when appling changes
; and check the other implementations too!
;
; int strlen (const char* s);
;
@@ -23,4 +27,3 @@ L1: lda (ptr1),y
L9: tya ; get low byte of counter, hi's all set
rts

View File

@@ -1,5 +1,6 @@
;
; Ullrich von Bassewitz, 31.05.1998
; Christian Krueger: 12-Aug-2013, minor optimizations
;
; char* strncat (char* dest, const char* src, size_t n);
;
@@ -7,66 +8,68 @@
.export _strncat
.import popax
.importzp ptr1, ptr2, ptr3, tmp1, tmp2
.macpack cpu
_strncat:
eor #$FF ; one's complement to count upwards
sta tmp1
txa
eor #$FF
sta tmp2
jsr popax ; get src
sta ptr1
stx ptr1+1
jsr popax ; get dest
sta ptr2
stx ptr2+1
sta ptr3 ; remember for function return
stx ptr3+1
ldy #0
eor #$FF ; one's complement to count upwards
sta tmp1
txa
eor #$FF
sta tmp2
jsr popax ; get src
sta ptr1
stx ptr1+1
jsr popax ; get dest
sta ptr3 ; remember for function return
stx ptr3+1
stx ptr2+1
tay ; low byte as offset in Y
.if (.cpu .bitand ::CPU_ISET_65SC02)
stz ptr2
.else
ldx #0
stx ptr2 ; destination on page boundary
.endif
; find end of dest
L1: lda (ptr2),y
beq L2
iny
bne L1
inc ptr2+1
bne L1
L1: lda (ptr2),y
beq L2
iny
bne L1
inc ptr2+1
bne L1
; end found, get offset in y into pointer
L2: tya
clc
adc ptr2
sta ptr2
bcc L3
inc ptr2+1
; end found, apply offset to dest ptr and reset y
L2: sty ptr2
; copy src. We've put the ones complement of the count into the counter, so
; we'll increment the counter on top of the loop
L3: ldy #0
ldx tmp1 ; low counter byte
L3: ldy #0
ldx tmp1 ; low counter byte
L4: inx
bne L5
inc tmp2
beq L6 ; jump if done
L5: lda (ptr1),y
sta (ptr2),y
beq L7
iny
bne L4
inc ptr1+1
inc ptr2+1
bne L4
L4: inx
bne L5
inc tmp2
beq L6 ; jump if done
L5: lda (ptr1),y
sta (ptr2),y
beq L7
iny
bne L4
inc ptr1+1
inc ptr2+1
bne L4
; done, set the trailing zero and return pointer to dest
L6: lda #0
sta (ptr2),y
L7: lda ptr3
ldx ptr3+1
rts
L6: lda #0
sta (ptr2),y
L7: lda ptr3
ldx ptr3+1
rts

View File

@@ -1,47 +1,41 @@
;
; Ullrich von Bassewitz, 31.05.1998
; Christian Krueger: 2013-Aug-01, optimization
;
; char* strrchr (const char* s, int c);
;
.export _strrchr
.import popax
.importzp ptr1, ptr2, tmp1
.importzp ptr1, tmp1, tmp2
_strrchr:
sta tmp1 ; Save c
jsr popax ; get s
sta ptr1
stx ptr1+1
lda #0 ; function result = NULL
sta ptr2
sta ptr2+1
tay
sta tmp1 ; Save c
jsr popax ; get s
tay ; low byte to y
stx ptr1+1
ldx #0 ; default function result is NULL, X is high byte...
stx tmp2 ; tmp2 is low-byte
stx ptr1 ; low-byte of source string is in Y, so clear real one...
testChar:
lda (ptr1),y ; get char
beq finished ; jump if end of string
cmp tmp1 ; found?
bne nextChar ; jump if no
L1: lda (ptr1),y ; get next char
beq L3 ; jump if end of string
cmp tmp1 ; found?
bne L2 ; jump if no
charFound:
sty tmp2 ; y has low byte of location, save it
ldx ptr1+1 ; x holds high-byte of result
; Remember a pointer to the character
nextChar:
iny
bne testChar
inc ptr1+1
bne testChar ; here like bra...
tya
clc
adc ptr1
sta ptr2
lda ptr1+1
adc #$00
sta ptr2+1
; return the pointer to the last occurrence
; Next char
L2: iny
bne L1
inc ptr1+1
bne L1 ; jump always
; Return the pointer to the last occurrence
L3: lda ptr2
ldx ptr2+1
finished:
lda tmp2 ; high byte in X is already correct...
rts

View File

@@ -1,56 +1,54 @@
;
; Ullrich von Bassewitz, 11.06.1998
; Christian Krueger: 08-Aug-2013, optimization
;
; size_t strspn (const char* s1, const char* s2);
;
.export _strspn
.import popax
.importzp ptr1, ptr2, tmp1, tmp2, tmp3
.import popax, _strlen
.importzp ptr1, ptr2, tmp1, tmp2
_strspn:
sta ptr2 ; Save s2
stx ptr2+1
jsr popax ; get s1
sta ptr1
stx ptr1+1
ldx #0 ; low counter byte
stx tmp1 ; high counter byte
ldy #$00
jsr _strlen ; get length in a/x and transfer s2 to ptr1
; Note: It does not make sense to
; have more than 255 test chars, so
; we don't support a high byte here! (ptr1+1 is
; also unchanged in strlen then (important!))
; -> the original implementation also
; ignored this case
L1: lda (ptr1),y ; get next char from s1
beq L6 ; jump if done
sta tmp2 ; save char
sta tmp1 ; tmp1 = strlen of test chars
jsr popax ; get and save s1
sta ptr2 ; to ptr2
stx ptr2+1
ldx #0 ; low counter byte
stx tmp2 ; high counter byte
loadChar:
ldy #0
lda (ptr2),y ; get next char from s1
beq leave ; handly byte of s1
advance:
inc ptr2 ; advance string position to test
bne check
inc ptr2+1
dey ; correct next iny (faster/shorter than bne...)
checkNext:
iny
bne L2
inc ptr1+1
L2: sty tmp3 ; save index into s1
check: cpy tmp1 ; compare with length of test character string
beq leave
cmp (ptr1),y ; found matching char?
bne checkNext
ldy #0 ; get index into s2
L3: lda (ptr2),y ;
beq L6 ; jump if done
cmp tmp2
beq L4
iny
bne L3
; The character was found in s2. Increment the counter and start over
L4: ldy tmp3 ; reload index
foundTestChar:
inx
bne L1
inc tmp1
bne L1
bne loadChar
inc tmp2
bne loadChar ; like bra...
; The character was not found, or we reached the end of s1. Return count of
; characters
L6: txa ; get low counter byte
ldx tmp1 ; get high counter byte
leave: txa ; restore position of finding
ldx tmp2 ; and return
rts

View File

@@ -1,5 +1,6 @@
;
; Ullrich von Bassewitz, 25.10.2000
; Christian Krueger, 02-Mar-2017, some bytes saved
;
; CC65 runtime: Convert int in ax into a long
;
@@ -9,18 +10,13 @@
; Convert AX from int to long in EAX
axlong: ldy #$ff
cpx #$80 ; Positive?
bcs store ; No, apply $FF
axulong:
ldy #0
sty sreg
store: sty sreg
sty sreg+1
rts
axlong: cpx #$80 ; Positive?
bcc axulong ; Yes, handle like unsigned type
ldy #$ff
sty sreg
sty sreg+1
rts