Skip to content

Commit 0f4f72c

Browse files
authored
Merge pull request #16 from oxinabox/ox/eq
Make equality checks faster and fix hash
2 parents f7d4f41 + 3296160 commit 0f4f72c

4 files changed

Lines changed: 133 additions & 99 deletions

File tree

src/base.jl

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ end
4040

4141
String(s::ShortString) = String(reinterpret(UInt8, [s.size_content|>ntoh])[1:sizeof(s)])
4242

43-
Base.codeunit(s::ShortString) = codeunit(String(s))
43+
Base.codeunit(s::ShortString) = UInt8
4444
Base.codeunit(s::ShortString, i) = codeunits(String(s), i)
4545
Base.codeunit(s::ShortString, i::Integer) = codeunit(String(s), i)
4646
Base.codeunits(s::ShortString) = codeunits(String(s))
@@ -52,18 +52,19 @@ Base.isvalid(s::ShortString, i::Integer) = isvalid(String(s), i)
5252
Base.iterate(s::ShortString) = iterate(String(s))
5353
Base.iterate(s::ShortString, i::Integer) = iterate(String(s), i)
5454
Base.lastindex(s::ShortString) = sizeof(s)
55-
Base.ncodeunits(s::ShortString) = ncodeunits(String(s))
55+
Base.ncodeunits(s::ShortString) = sizeof(s)
5656
Base.print(s::ShortString) = print(String(s))
5757
Base.show(io::IO, str::ShortString) = show(io, String(str))
58-
Base.sizeof(s::ShortString{T}) where T = Int(s.size_content & size_mask(T))
58+
Base.sizeof(s::ShortString{T}) where T = Int(s.size_content & (size_mask(s) % UInt))
5959

6060
size_nibbles(::Type{<:Union{UInt16, UInt32, UInt64, UInt128}}) = 1
6161
size_nibbles(::Type{<:Union{Int16, Int32, Int64, Int128}}) = 1
6262
size_nibbles(::Type{<:Union{UInt256, UInt512, UInt1024}}) = 2
6363
size_nibbles(::Type{<:Union{Int256, Int512, Int1024}}) = 2
6464
size_nibbles(::Type{T}) where T = ceil(log2(sizeof(T))/4)
6565

66-
size_mask(T) = UInt(exp2(4*size_nibbles(T)) - 1)
66+
size_mask(T) = T(exp2(4*size_nibbles(T)) - 1)
67+
size_mask(s::ShortString{T}) where T = size_mask(T)
6768

6869

6970
# function Base.getindex(s::ShortString, i::Integer)
@@ -76,9 +77,27 @@ size_mask(T) = UInt(exp2(4*size_nibbles(T)) - 1)
7677

7778
Base.collect(s::ShortString) = collect(String(s))
7879

79-
==(s::ShortString, b::AbstractString) = begin
80-
String(s) == b
80+
function ==(s::ShortString{S}, b::Union{String, SubString{String}}) where S
81+
ncodeunits(b) == ncodeunits(s) || return false
82+
return s == ShortString{S}(b)
8183
end
84+
function ==(s::ShortString, b::AbstractString)
85+
# Could be a string type that might not use UTF8 encoding and that we don't have a
86+
# constructor for. Defer to equality that type probably has defined on `String`
87+
return String(s) == b
88+
end
89+
90+
==(a::AbstractString, b::ShortString) = b == a
91+
function ==(a::ShortString{S}, b::ShortString{S}) where S
92+
return a.size_content == b.size_content
93+
end
94+
function ==(a::ShortString{A}, b::ShortString{B}) where {A,B}
95+
ncodeunits(a) == ncodeunits(b) || return false
96+
# compare if equal after dropping size bits and
97+
# flipping so that the empty bytes are at the start
98+
ntoh(a.size_content & ~size_mask(A)) == ntoh(b.size_content & ~size_mask(B))
99+
end
100+
82101

83102
function Base.cmp(a::ShortString{S}, b::ShortString{S}) where S
84103
return cmp(a.size_content, b.size_content)

src/hash.jl

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,4 @@ export hash
22

33
import Base.hash
44

5-
Base.hash(x::ShortString, args...; kwargs...) = hash(x.size_content, args...; kwargs...)
6-
7-
Base.hash(x::ShortString, h::UInt) = hash(x.size_content, h)
5+
Base.hash(x::ShortString, h::UInt) = hash(String(x), h)

test/hash.jl

Lines changed: 0 additions & 5 deletions
This file was deleted.

test/runtests.jl

Lines changed: 107 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -1,85 +1,107 @@
1-
using ShortStrings
2-
using BitIntegers: UInt256, UInt512, UInt1024, @define_integers
3-
using Test, Random
4-
5-
include("getindex.jl")
6-
include("hash.jl")
7-
8-
function basic_test(constructor, max_len)
9-
@testset "$constructor" begin
10-
for string_type in (String, SubString{String})
11-
@testset "$string_type" begin
12-
basic_test(string_type, constructor, max_len)
13-
end
14-
end
15-
end
16-
end
17-
18-
function basic_test(string_type, constructor, max_len)
19-
r = string_type.(randstring.(1:max_len))
20-
@test all(constructor.(r) .== r)
21-
a = constructor.(r)
22-
@test fsort(a) |> issorted
23-
24-
@test collect(constructor("z"^max_len)) == fill('z', max_len)
25-
@test_throws ErrorException constructor("a"^(max_len+1))
26-
end
27-
28-
29-
basic_test(ShortString3, 3)
30-
basic_test(ShortString7, 7)
31-
basic_test(ShortString15, 15)
32-
basic_test(ShortString30, 30)
33-
basic_test(ShortString62, 62)
34-
basic_test(ShortString126, 126)
35-
36-
basic_test(ShortString{UInt16}, 1)
37-
basic_test(ShortString{UInt32}, 3)
38-
basic_test(ShortString{UInt64}, 7)
39-
basic_test(ShortString{UInt128}, 15)
40-
basic_test(ShortString{UInt256}, 30)
41-
basic_test(ShortString{UInt512}, 62)
42-
basic_test(ShortString{UInt1024}, 126)
43-
44-
@define_integers 2048 MyInt2048 MyUInt2048
45-
basic_test(ShortString{MyUInt2048}, 254)
46-
47-
@test ss126"Be honest, do you actually need a string longer than this. Seriously. C'mon this is pretty long." === ShortString126("Be honest, do you actually need a string longer than this. Seriously. C'mon this is pretty long.")
48-
@test ss62"Basically a failly long string really" === ShortString62("Basically a failly long string really")
49-
@test ss30"A Longer String!!!" === ShortString30("A Longer String!!!")
50-
51-
@test ss15"Short String!!!" === ShortString15("Short String!!!")
52-
@test ss7"ShrtStr" === ShortString7("ShrtStr")
53-
@test ss3"ss3" === ShortString3("ss3")
54-
55-
56-
@testset "cmp" begin
57-
@test cmp(ShortString3("abc"), ShortString3("abc")) == 0
58-
@test cmp(ShortString3("ab"), ShortString3("abc")) == -1
59-
@test cmp(ShortString3("abc"), ShortString3("ab")) == 1
60-
@test cmp(ShortString3("ab"), ShortString3("ac")) == -1
61-
@test cmp(ShortString3("ac"), ShortString3("ab")) == 1
62-
@test cmp(ShortString3("α"), ShortString3("a")) == 1
63-
@test cmp(ShortString3("b"), ShortString3("β")) == -1
64-
65-
@test cmp(ShortString3("abc"), "abc") == 0
66-
@test cmp(ShortString3("ab"), "abc") == -1
67-
@test cmp(ShortString3("abc"), "ab") == 1
68-
@test cmp(ShortString3("ab"), "ac") == -1
69-
@test cmp(ShortString3("ac"), "ab") == 1
70-
@test cmp(ShortString3("α"), "a") == 1
71-
@test cmp(ShortString3("b"), "β") == -1
72-
end
73-
74-
@testset "Construction from other ShortStrings" begin
75-
@test ShortString7(ShortString3("ab")) == "ab"
76-
@test ShortString7(ShortString3("ab")) isa ShortString7
77-
78-
@test ShortString3(ShortString7("ab")) == "ab"
79-
@test ShortString3(ShortString7("ab")) isa ShortString3
80-
81-
@test ShortString7(ShortString7("ab")) == "ab"
82-
@test ShortString7(ShortString7("ab")) isa ShortString7
83-
84-
@test_throws ErrorException ShortString3(ShortString7("123456"))
85-
end
1+
using ShortStrings
2+
using BitIntegers: UInt256, UInt512, UInt1024, @define_integers
3+
using Test, Random
4+
5+
include("getindex.jl")
6+
7+
function basic_test(constructor, max_len)
8+
@testset "$constructor" begin
9+
for string_type in (String, SubString{String})
10+
@testset "$string_type" begin
11+
basic_test(string_type, constructor, max_len)
12+
end
13+
end
14+
end
15+
end
16+
17+
function basic_test(string_type, constructor, max_len)
18+
r = string_type.(randstring.(1:max_len))
19+
@test all(constructor.(r) .== r)
20+
@test all(hash(constructor.(r)) .== hash(r))
21+
a = constructor.(r)
22+
@test fsort(a) |> issorted
23+
24+
@test collect(constructor("z"^max_len)) == fill('z', max_len)
25+
@test_throws ErrorException constructor("a"^(max_len+1))
26+
27+
# equality
28+
@test constructor("c"^max_len) == "c"^max_len
29+
@test "c"^max_len == constructor("c"^max_len)
30+
@test constructor("c"^max_len) == constructor("c"^max_len)
31+
@test constructor("c"^max_len) != constructor("d"^max_len)
32+
@test constructor("c"^max_len) != constructor("c"^(max_len-1))
33+
@test constructor("c"^(max_len-1)) != constructor("c"^max_len)
34+
@test constructor("c"^max_len) != "c"^(max_len-1)
35+
@test constructor("c"^(max_len-1)) != "c"^max_len
36+
end
37+
38+
39+
basic_test(ShortString3, 3)
40+
basic_test(ShortString7, 7)
41+
basic_test(ShortString15, 15)
42+
basic_test(ShortString30, 30)
43+
basic_test(ShortString62, 62)
44+
basic_test(ShortString126, 126)
45+
46+
basic_test(ShortString{UInt16}, 1)
47+
basic_test(ShortString{UInt32}, 3)
48+
basic_test(ShortString{UInt64}, 7)
49+
basic_test(ShortString{UInt128}, 15)
50+
basic_test(ShortString{UInt256}, 30)
51+
basic_test(ShortString{UInt512}, 62)
52+
basic_test(ShortString{UInt1024}, 126)
53+
54+
@define_integers 2048 MyInt2048 MyUInt2048
55+
basic_test(ShortString{MyUInt2048}, 254)
56+
57+
@test ss126"Be honest, do you actually need a string longer than this. Seriously. C'mon this is pretty long." === ShortString126("Be honest, do you actually need a string longer than this. Seriously. C'mon this is pretty long.")
58+
@test ss62"Basically a failly long string really" === ShortString62("Basically a failly long string really")
59+
@test ss30"A Longer String!!!" === ShortString30("A Longer String!!!")
60+
61+
@test ss15"Short String!!!" === ShortString15("Short String!!!")
62+
@test ss7"ShrtStr" === ShortString7("ShrtStr")
63+
@test ss3"ss3" === ShortString3("ss3")
64+
65+
66+
@testset "equality of different sized ShortStrings" begin
67+
@test ShortString15("ab") == ShortString3("ab")
68+
@test ShortString3("ab") == ShortString15("ab")
69+
70+
@test ShortString30("x") != ShortString3("y")
71+
@test ShortString30("y") != ShortString3("x")
72+
73+
# this one is too big to fit in the other
74+
@test ShortString15("abcd") != ShortString3("ab")
75+
@test ShortString3("ab") != ShortString15("abcd")
76+
end
77+
78+
@testset "cmp" begin
79+
@test cmp(ShortString3("abc"), ShortString3("abc")) == 0
80+
@test cmp(ShortString3("ab"), ShortString3("abc")) == -1
81+
@test cmp(ShortString3("abc"), ShortString3("ab")) == 1
82+
@test cmp(ShortString3("ab"), ShortString3("ac")) == -1
83+
@test cmp(ShortString3("ac"), ShortString3("ab")) == 1
84+
@test cmp(ShortString3("α"), ShortString3("a")) == 1
85+
@test cmp(ShortString3("b"), ShortString3("β")) == -1
86+
87+
@test cmp(ShortString3("abc"), "abc") == 0
88+
@test cmp(ShortString3("ab"), "abc") == -1
89+
@test cmp(ShortString3("abc"), "ab") == 1
90+
@test cmp(ShortString3("ab"), "ac") == -1
91+
@test cmp(ShortString3("ac"), "ab") == 1
92+
@test cmp(ShortString3("α"), "a") == 1
93+
@test cmp(ShortString3("b"), "β") == -1
94+
end
95+
96+
@testset "Construction from other ShortStrings" begin
97+
@test ShortString7(ShortString3("ab")) == "ab"
98+
@test ShortString7(ShortString3("ab")) isa ShortString7
99+
100+
@test ShortString3(ShortString7("ab")) == "ab"
101+
@test ShortString3(ShortString7("ab")) isa ShortString3
102+
103+
@test ShortString7(ShortString7("ab")) == "ab"
104+
@test ShortString7(ShortString7("ab")) isa ShortString7
105+
106+
@test_throws ErrorException ShortString3(ShortString7("123456"))
107+
end

0 commit comments

Comments
 (0)