Skip to content

Commit ef2cfb1

Browse files
Update docstirngs and python version
Signed-off-by: Ayan Sinha Mahapatra <ayansmahapatra@gmail.com>
1 parent d04df2c commit ef2cfb1

2 files changed

Lines changed: 63 additions & 55 deletions

File tree

setup.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ zip_safe = false
3737

3838
setup_requires = setuptools_scm[toml] >= 4
3939

40-
python_requires = >=3.8
40+
python_requires = >=3.9
4141

4242
install_requires =
4343
attrs >= 18.1, !=20.1.0

src/commoncode/hash.py

Lines changed: 62 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -26,42 +26,8 @@
2626
Checksums are operating on files.
2727
"""
2828

29-
30-
class Hashable:
31-
"""
32-
A mixin for hashers that provides the base methods.
33-
"""
34-
35-
# digest_size = length of binary digest for this hash
36-
# binh = binary hasher module
37-
# msg_len = length in bytes of the messages hashed
38-
# total_length = total length in bytes of the messages hashed
39-
40-
def digest(self):
41-
"""
42-
Return a bytes string digest for this hash.
43-
"""
44-
if not self.msg_len:
45-
return
46-
return self.binh.digest()[: self.digest_size]
47-
48-
def hexdigest(self):
49-
"""
50-
Return a string hex digest for this hash.
51-
"""
52-
return self.msg_len and binascii.hexlify(self.digest()).decode("utf-8")
53-
54-
def b64digest(self):
55-
"""
56-
Return a string base64 digest for this hash.
57-
"""
58-
return self.msg_len and urlsafe_b64encode(self.digest()).decode("utf-8")
59-
60-
def intdigest(self):
61-
"""
62-
Return a int digest for this hash.
63-
"""
64-
return self.msg_len and int(bin_to_num(self.digest()))
29+
# This is ~16 MB
30+
FILE_CHUNK_SIZE = 2**24
6531

6632

6733
def _hash_mod(bitsize, hmodule):
@@ -78,9 +44,15 @@ def __init__(self, msg=None, **kwargs):
7844
Return a hasher, populated with an initial ``msg`` bytes string.
7945
Close on the bitsize and hmodule
8046
"""
47+
# length of binary digest for this hash
8148
self.digest_size = bitsize // 8
49+
50+
# binh = binary hasher module
8251
self.binh = hmodule()
52+
53+
# msg_len = length in bytes of the message hashed
8354
self.msg_len = 0
55+
8456
if msg:
8557
self.update(msg)
8658

@@ -95,13 +67,43 @@ def update(self, msg=None):
9567
return hasher
9668

9769

70+
class Hashable:
71+
"""
72+
A mixin for hashers that provides the base methods.
73+
"""
74+
75+
def digest(self):
76+
"""
77+
Return a bytes string digest for this hash.
78+
"""
79+
if not self.msg_len:
80+
return
81+
return self.binh.digest()[: self.digest_size]
82+
83+
def hexdigest(self):
84+
"""
85+
Return a string hex digest for this hash.
86+
"""
87+
return self.msg_len and binascii.hexlify(self.digest()).decode("utf-8")
88+
89+
def b64digest(self):
90+
"""
91+
Return a string base64 digest for this hash.
92+
"""
93+
return self.msg_len and urlsafe_b64encode(self.digest()).decode("utf-8")
94+
95+
def intdigest(self):
96+
"""
97+
Return a int digest for this hash.
98+
"""
99+
return self.msg_len and int(bin_to_num(self.digest()))
100+
101+
98102
# for FIPS support, we declare that "usedforsecurity" is False
99103
sys_v0 = sys.version_info[0]
100104
sys_v1 = sys.version_info[1]
101-
if sys_v0 == 3 and sys_v1 >= 9:
102-
md5_hasher = partial(hashlib.md5, usedforsecurity=False)
103-
else:
104-
md5_hasher = hashlib.md5
105+
md5_hasher = partial(hashlib.md5, usedforsecurity=False)
106+
105107

106108
# Base hashers for each bit size
107109
_hashmodules_by_bitsize = {
@@ -135,6 +137,9 @@ def __init__(self, msg=None, total_length=0, **kwargs):
135137
Initialize a sha1_git_hasher with an optional ``msg`` byte string. The ``total_length`` of
136138
all content that will be hashed, combining the ``msg`` length plus any later call to
137139
update() with additional messages.
140+
141+
Here ``total_length`` is total length in bytes of all the messages (chunks) hashed
142+
in contrast to ``msg_len`` which is the length in bytes for the optional message.
138143
"""
139144
self.digest_size = 160 // 8
140145
self.msg_len = 0
@@ -235,6 +240,19 @@ def checksum_from_chunks(chunks, name, total_length=0, base64=False):
235240
return hasher.hexdigest()
236241

237242

243+
def binary_chunks(location, size=FILE_CHUNK_SIZE):
244+
"""
245+
Read file at ``location`` as binary and yield bytes of up to ``size`` length in bytes,
246+
defaulting to 2**24 bytes, e.g., about 16 MB.
247+
"""
248+
with open(location, "rb") as f:
249+
while True:
250+
chunk = f.read(size)
251+
if not chunk:
252+
break
253+
yield chunk
254+
255+
238256
def md5(location):
239257
return checksum(location, name="md5", base64=False)
240258

@@ -259,26 +277,16 @@ def sha1_git(location):
259277
return checksum(location, name="sha1_git", base64=False)
260278

261279

262-
def binary_chunks(location, size=2**24):
263-
"""
264-
Read file at ``location`` as binary and yield bytes of up to ``size`` length in bytes,
265-
defaulting to 2**24 bytes, e.g., about 16 MB.
266-
"""
267-
with open(location, "rb") as f:
268-
while True:
269-
chunk = f.read(size)
270-
if not chunk:
271-
break
272-
yield chunk
273-
274-
275280
def multi_checksums(location, checksum_names=("md5", "sha1", "sha256", "sha512", "sha1_git")):
276281
"""
277282
Return a mapping of hexdigest checksum strings keyed by checksum algorithm name from hashing the
278283
content of the file at ``location``. Use the ``checksum_names`` list of checksum names. The
279284
mapping is guaranted to contains all the requested names as keys. If the location is not a file,
280285
or if the file is empty, the values are None.
281-
The purpose of this function is
286+
287+
The purpose of this function is to return a set of checksums for a supported set of checksum
288+
algorithms for a given location. This is an API function used in ScanCode --info plugin to get
289+
checksum values.
282290
"""
283291
if not filetype.is_file(location):
284292
return {name: None for name in checksum_names}

0 commit comments

Comments
 (0)