Skip to content

Commit dcbeba1

Browse files
committed
Fixed scoring for compression codecs
1 parent 224b2d0 commit dcbeba1

5 files changed

Lines changed: 13 additions & 7 deletions

File tree

codext/__common__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1402,8 +1402,8 @@ def __score(prev_input, input, prev_encoding, encoding, codec, heuristic=False,
14021402
except TypeError:
14031403
entr = entr(obj.entropy)
14041404
if entr is not None:
1405-
# use a quadratic heuristic to compute a weight for the entropy delta, aligned on (100w,.1) and (200w,1)
1406-
d_entr = min(5.958194e-06 * obj.len**2 - .002381 * obj.len, 1) * abs(entr - entropy(new_input))
1405+
# use a quadratic heuristic to compute a weight for the entropy delta, aligned on (256,.2) and (512,1)
1406+
d_entr = min(3.04575e-06 * obj.len**2 + .000394 * obj.len, 1) * abs(entr - entropy(new_input))
14071407
if d_entr <= .5:
14081408
s += .5 - d_entr
14091409
# finally, if relevant, apply a custom bonus (e.g. when a regex pattern is matched)

codext/compressions/__init__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,9 @@
44
from .lz78 import *
55
from .pkzip import *
66

7+
8+
for e in list_encodings("compression"):
9+
ci = lookup(e, False)
10+
ci.parameters['scoring']['entropy'] = 7.9
11+
ci.parameters['scoring']['expansion_factor'] = lambda f: f
12+

codext/compressions/gzipp.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,5 +40,5 @@ def gzip_decompress(data, errors="strict"):
4040
return r, len(r)
4141

4242

43-
add("gzip", gzip_compress, gzip_decompress, entropy=7.9)
43+
add("gzip", gzip_compress, gzip_decompress)
4444

codext/compressions/lz77.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,5 +70,5 @@ def lz77_decompress(input, errors="strict"):
7070
return out, len(out)
7171

7272

73-
add("lz77", lz77_compress, lz77_decompress, entropy=7.9)
73+
add("lz77", lz77_compress, lz77_decompress)
7474

codext/compressions/pkzip.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,11 +46,11 @@ def _decode(data, errors="strict"):
4646

4747

4848
add("pkzip_deflate", pkzip_encode(8), pkzip_decode(8, "deflate"), r"(?:(?:pk)?zip[-_])?deflate",
49-
entropy=7.9, examples=__examples1__, guess=["deflate"])
49+
examples=__examples1__, guess=["deflate"])
5050

5151
add("pkzip_bzip2", pkzip_encode(12), pkzip_decode(12, "bzip2"), r"(?:(?:pk)?zip[-_])?bz(?:ip)?2",
52-
entropy=7.9, examples=__examples2__, guess=["bz2"])
52+
examples=__examples2__, guess=["bz2"])
5353

5454
add("pkzip_lzma", pkzip_encode(14), pkzip_decode(14, "lzma"), r"(?:(?:pk)?zip[-_])?lzma",
55-
entropy=7.9, examples=__examples3__, guess=["lzma"])
55+
examples=__examples3__, guess=["lzma"])
5656

0 commit comments

Comments
 (0)