- no necesita reservar memoria para una tabla temporal de 156 bytes.
- no añade una tabla de 26 bytes fijos al comienzo de los datos comprimidos, por lo que tiene más posibilidades de comprimir bloques pequeños de datos
- para bloques grandes, en muy raras ocasiones, comprime mejor (pero el 95% de las veces gana exomizer, que conste)
- más rápida (aunque ya no tanto tras las últimas optimizaciones de exomizer)
Podeis encontrar el programa compresor (appack.exe) aquí, junto con el descompresor original sin optimizar (muy muy lento):
http://www.smspower.org/maxim/uploads/S ... plib12.zip
Rutina optimizada en tamaño (156 bytes):
Código: Seleccionar todo
; aPPack decompressor
; original source by dwedit
; very slightly adapted by utopian
; optimized by Metalbrain
;hl = source
;de = dest
depack: ld ixl,128
apbranch1: ldi
aploop0: ld ixh,1 ;LWM = 0
aploop: call ap_getbit
jr nc,apbranch1
call ap_getbit2
jr nc,apbranch2
ld bc,16
call ap_getbit2
jr nc,apbranch3
apget4bits: call ap_getbit2
rl c
jr nc,apget4bits
ld a,b
jr z,apwritebyte
and a
ex de,hl ;write a previous byte (1-15 away from dest)
sbc hl,bc
ld a,(hl)
add hl,bc
ex de,hl
apwritebyte: ld (de),a ;write a 0
inc de
jr aploop0
apbranch3: ld c,(hl) ;use 7 bit offset, length = 2 or 3
rr c
ret z ;if a zero is found here, it's EOF
inc hl
ld a,b
adc a,2
push hl
push bc
pop iy
ld h,d
ld l,e
sbc hl,bc
ld c,a
jr ap_finishup2
apbranch2: call ap_getgamma ;use a gamma code * 256 for offset, another gamma code for length
dec c
ld a,c
sub ixh
jr z,ap_r0_gamma ;if gamma code is 2, use old r0 offset,
dec a
;do I even need this code?
;bc=bc*256+(hl), lazy 16bit way
ld b,a
ld c,(hl)
inc hl
push bc
pop iy
push bc
call ap_getgamma
ex (sp),hl ;bc = len, hl=offs
push de
ex de,hl
ld a,4
cp d
jr nc,apskip2
inc bc
or a
apskip2: ld hl,127
sbc hl,de
jr c,apskip3
inc bc
inc bc
apskip3: pop hl ;bc = len, de = offs, hl=junk
push hl
or a
ap_finishup: sbc hl,de
pop de ;hl=dest-offs, bc=len, de = dest
ap_finishup2: ldir
pop hl
ld ixh,b
jr aploop
ap_r0_gamma: call ap_getgamma ;and a new gamma code for length
push hl
push de
ex de,hl
push iy
pop de
jr ap_finishup
ap_getbit: ld a,ixl
ap_getbit2: add a,a
jr nz,ap_endbit
ld a,(hl)
inc hl
rla
ap_endbit: ld ixl,a
ret
ap_getgamma: ld bc,1
ap_getgammaloop:call ap_getbit
rl c
rl b
call ap_getbit2
jr c,ap_getgammaloop
ret
Rutina optimizada en velocidad (197 bytes):
Código: Seleccionar todo
; aPPack decompressor
; original source by dwedit
; very slightly adapted by utopian
; optimized by Metalbrain
;hl = source
;de = dest
depack: ld a,128
apbranch1: ldi
aploop2: ld ixh,1
aploop: add a,a
jr nz,apnogetbit1
ld a,(hl)
inc hl
rla
apnogetbit1: jr nc,apbranch1
add a,a
jr nz,apnogetbit2
ld a,(hl)
inc hl
rla
apnogetbit2: jr nc,apbranch2
add a,a
jr nz,apnogetbit3
ld a,(hl)
inc hl
rla
apnogetbit3: jr nc,apbranch3
ld bc,16 ;get an offset
apget4bits: add a,a
jr nz,apnogetbit4
ld a,(hl)
inc hl
rla
apnogetbit4: rl c
jr nc,apget4bits
jr nz,apbranch4
ex de,hl
ld (hl),b ;write a 0
ex de,hl
inc de
jp aploop2
apbranch4: ex af,af'
ex de,hl ;write a previous byte (1-15 away from dest)
sbc hl,bc
ld a,(hl)
add hl,bc
ld (hl),a
ex af,af'
ex de,hl
inc de
jp aploop2
apbranch3: ld c,(hl) ;use 7 bit offset, length = 2 or 3
inc hl
ex af,af'
rr c
ret z ;if a zero is found here, it's EOF
ld a,2
ld b,0
adc a,b
push hl
ld iyh,b
ld iyl,c
ld h,d
ld l,e
sbc hl,bc
ld c,a
ex af,af'
ldir
pop hl
ld ixh,b
jp aploop
apbranch2: call ap_getgamma ;use a gamma code * 256 for offset, another gamma code for length
dec c
ex af,af'
ld a,c
sub ixh
jr z,ap_r0_gamma
dec a
;do I even need this code?
;bc=bc*256+(hl), lazy 16bit way
ld b,a
ld c,(hl)
inc hl
ld iyh,b
ld iyl,c
push bc
call ap_getgamma2
ex (sp),hl ;bc = len, hl=offs
push de
ex de,hl
ex af,af'
ld a,4
cp d
jr nc,apskip2
inc bc
or a
apskip2: ld hl,127
sbc hl,de
jr c,apskip3
inc bc
inc bc
apskip3: pop hl ;bc = len, de = offs, hl=junk
push hl
or a
sbc hl,de
ex af,af'
pop de ;hl=dest-offs, bc=len, de = dest
ldir
pop hl
ld ixh,b
jp aploop
ap_r0_gamma: call ap_getgamma2 ;and a new gamma code for length
push hl
push de
ex de,hl
ld d,iyh
ld e,iyl
sbc hl,de
pop de ;hl=dest-offs, bc=len, de = dest
ldir
pop hl
ld ixh,b
jp aploop
ap_getgamma2: ex af,af'
ap_getgamma: ld bc,1
ap_getgammaloop:add a,a
jr nz,apnogetbit5
ld a,(hl)
inc hl
rla
apnogetbit5: rl c
rl b
add a,a
jr nz,apnogetbit6
ld a,(hl)
inc hl
rla
apnogetbit6: jr c,ap_getgammaloop
ret
Se puede aumentar la velocidad más todavía, expandiendo la rutina ap_getgamma de forma que pongamos inline los primeros pasos de la rutina, al precio de 15 bytes por cada bit optimizado (hasta el octavo, luego 17 hasta el 15 que sería el último). Aquí pongo un ejemplo con 2 bits optimizados (227 bytes):
Código: Seleccionar todo
; aPPack decompressor
; original source by dwedit
; very slightly adapted by utopian
; optimized by Metalbrain
;hl = source
;de = dest
depack: ld a,128
apbranch1: ldi
aploop2: ld ixh,1
aploop: add a,a
jr nz,apnogetbit1
ld a,(hl)
inc hl
rla
apnogetbit1: jr nc,apbranch1
add a,a
jr nz,apnogetbit2
ld a,(hl)
inc hl
rla
apnogetbit2: jr nc,apbranch2
add a,a
jr nz,apnogetbit3
ld a,(hl)
inc hl
rla
apnogetbit3: jr nc,apbranch3
ld bc,16 ;get an offset
apget4bits: add a,a
jr nz,apnogetbit4
ld a,(hl)
inc hl
rla
apnogetbit4: rl c
jr nc,apget4bits
jr nz,apbranch4
ex de,hl
ld (hl),b ;write a 0
ex de,hl
inc de
jp aploop2
apbranch4: ex af,af'
ex de,hl ;write a previous byte (1-15 away from dest)
sbc hl,bc
ld a,(hl)
add hl,bc
ld (hl),a
ex af,af'
ex de,hl
inc de
jp aploop2
apbranch3: ld c,(hl) ;use 7 bit offset, length = 2 or 3
inc hl
ex af,af'
rr c
ret z ;if a zero is found here, it's EOF
ld a,2
ld b,0
adc a,b
push hl
ld iyh,b
ld iyl,c
ld h,d
ld l,e
sbc hl,bc
ld c,a
ex af,af'
ldir
pop hl
ld ixh,b
jp aploop
apbranch2: call ap_getgamma ;use a gamma code * 256 for offset, another gamma code for length
dec c
ex af,af'
ld a,c
sub ixh
jr z,ap_r0_gamma
dec a
;do I even need this code?
;bc=bc*256+(hl), lazy 16bit way
ld b,a
ld c,(hl)
inc hl
ld iyh,b
ld iyl,c
push bc
call ap_getgamma2
ex (sp),hl ;bc = len, hl=offs
push de
ex de,hl
ex af,af'
ld a,4
cp d
jr nc,apskip2
inc bc
or a
apskip2: ld hl,127
sbc hl,de
jr c,apskip3
inc bc
inc bc
apskip3: pop hl ;bc = len, de = offs, hl=junk
push hl
or a
sbc hl,de
ex af,af'
pop de ;hl=dest-offs, bc=len, de = dest
ldir
pop hl
ld ixh,b
jp aploop
ap_r0_gamma: call ap_getgamma2 ;and a new gamma code for length
push hl
push de
ex de,hl
ld d,iyh
ld e,iyl
sbc hl,de
pop de ;hl=dest-offs, bc=len, de = dest
ldir
pop hl
ld ixh,b
jp aploop
ap_getgamma2: ex af,af'
ap_getgamma: ld bc,1
add a,a
jr nz,apnogetbit5
ld a,(hl)
inc hl
rla
apnogetbit5: rl c
add a,a
jr nz,apnogetbit6
ld a,(hl)
inc hl
rla
apnogetbit6: ret nc
add a,a
jr nz,apnogetbit7
ld a,(hl)
inc hl
rla
apnogetbit7: rl c
add a,a
jr nz,apnogetbit8
ld a,(hl)
inc hl
rla
apnogetbit8: ret nc
ap_getgammaloop:add a,a
jr nz,apnogetbit9
ld a,(hl)
inc hl
rla
apnogetbit9: rl c
rl b
add a,a
jr nz,apnogetbit10
ld a,(hl)
inc hl
rla
apnogetbit10: jr c,ap_getgammaloop
ret
Y por último una idea de Antonio Villena, gastando todavía más bytes, para favorecer el caso más común (247 bytes, 2 bits optimizados como en la anterior):
Código: Seleccionar todo
; aPPack decompressor
; original source by dwedit
; very slightly adapted by utopian
; optimized by Metalbrain & Antonio Villena
;hl = source
;de = dest
depack: ld a,128
apbranch1: ldi
aploop2: ld ixh,1
aploop: add a,a
jr nz,apnogetbit1
ld a,(hl)
inc hl
rla
apnogetbit1: jr nc,apbranch1
add a,a
jr nz,apnogetbit2
ld a,(hl)
inc hl
rla
apnogetbit2: jr nc,apbranch2
add a,a
jr nz,apnogetbit3
ld a,(hl)
inc hl
rla
apnogetbit3: jr nc,apbranch3
ld bc,16 ;get an offset
apget4bits: add a,a
jr nz,apnogetbit4
ld a,(hl)
inc hl
rla
apnogetbit4: rl c
jr nc,apget4bits
jr nz,apbranch4
ex de,hl
ld (hl),b ;write a 0
ex de,hl
inc de
jp aploop2
apbranch4: ex af,af'
ex de,hl ;write a previous byte (1-15 away from dest)
sbc hl,bc
ld a,(hl)
add hl,bc
ld (hl),a
ex af,af'
ex de,hl
inc de
jp aploop2
apbranch3: ld c,(hl) ;use 7 bit offset, length = 2 or 3
inc hl
ex af,af'
rr c
ret z ;if a zero is found here, it's EOF
ld a,2
ld b,0
adc a,b
push hl
ld iyh,b
ld iyl,c
ld h,d
ld l,e
sbc hl,bc
ld c,a
ex af,af'
ldir
pop hl
ld ixh,b
jp aploop
apbranch2: call ap_getgamma ;use a gamma code * 256 for offset, another gamma code for length
dec c
ex af,af'
ld a,c
sub ixh
jr z,ap_r0_gamma
dec a
;do I even need this code?
;bc=bc*256+(hl), lazy 16bit way
ld b,a
ld c,(hl)
inc hl
ld iyh,b
ld iyl,c
push bc
call ap_getgamma2
ex (sp),hl ;bc = len, hl=offs
push de
ex de,hl
ex af,af'
ld a,4
cp d
jr nc,apskip2
inc bc
or a
apskip2: ld hl,127
sbc hl,de
jr c,apskip3
inc bc
inc bc
apskip3: pop hl ;bc = len, de = offs, hl=junk
push hl
or a
sbc hl,de
ex af,af'
pop de ;hl=dest-offs, bc=len, de = dest
ldir
pop hl
ld ixh,b
jp aploop
ap_r0_gamma: call ap_getgamma2 ;and a new gamma code for length
push hl
push de
ex de,hl
ld d,iyh
ld e,iyl
sbc hl,de
pop de ;hl=dest-offs, bc=len, de = dest
ldir
pop hl
ld ixh,b
jp aploop
ap5: ld a,(hl)
inc hl
rla
jp apnogetbit5
ap6: ld a, (hl)
inc hl
rla
jp apnogetbit6
ap7: ld a, (hl)
inc hl
rla
jp apnogetbit7
ap8: ld a, (hl)
inc hl
rla
jp apnogetbit8
ap9: ld a, (hl)
inc hl
rla
jp apnogetbit9
ap10: ld a, (hl)
inc hl
rla
ret nc
jp ap_getgammaloop
ap_getgamma2: ex af, af'
ap_getgamma: ld bc, 1
add a, a
jr z, ap5
apnogetbit5: rl c
add a, a
jr z, ap6
apnogetbit6: ret nc
add a, a
jr z, ap7
apnogetbit7: rl c
add a, a
jr z, ap8
apnogetbit8: ret nc
ap_getgammaloop:add a, a
jr z, ap9
apnogetbit9: rl c
rl b
add a, a
jr z, ap10
apnogetbit10: ret nc
jp ap_getgammaloop