2626Checksums are operating on files.
2727"""
2828
29-
30- class Hashable :
31- """
32- A mixin for hashers that provides the base methods.
33- """
34-
35- # digest_size = length of binary digest for this hash
36- # binh = binary hasher module
37- # msg_len = length in bytes of the messages hashed
38- # total_length = total length in bytes of the messages hashed
39-
40- def digest (self ):
41- """
42- Return a bytes string digest for this hash.
43- """
44- if not self .msg_len :
45- return
46- return self .binh .digest ()[: self .digest_size ]
47-
48- def hexdigest (self ):
49- """
50- Return a string hex digest for this hash.
51- """
52- return self .msg_len and binascii .hexlify (self .digest ()).decode ("utf-8" )
53-
54- def b64digest (self ):
55- """
56- Return a string base64 digest for this hash.
57- """
58- return self .msg_len and urlsafe_b64encode (self .digest ()).decode ("utf-8" )
59-
60- def intdigest (self ):
61- """
62- Return a int digest for this hash.
63- """
64- return self .msg_len and int (bin_to_num (self .digest ()))
29+ # This is ~16 MB
30+ FILE_CHUNK_SIZE = 2 ** 24
6531
6632
6733def _hash_mod (bitsize , hmodule ):
@@ -78,9 +44,15 @@ def __init__(self, msg=None, **kwargs):
7844 Return a hasher, populated with an initial ``msg`` bytes string.
7945 Close on the bitsize and hmodule
8046 """
47+ # length of binary digest for this hash
8148 self .digest_size = bitsize // 8
49+
50+ # binh = binary hasher module
8251 self .binh = hmodule ()
52+
53+ # msg_len = length in bytes of the message hashed
8354 self .msg_len = 0
55+
8456 if msg :
8557 self .update (msg )
8658
@@ -95,13 +67,43 @@ def update(self, msg=None):
9567 return hasher
9668
9769
70+ class Hashable :
71+ """
72+ A mixin for hashers that provides the base methods.
73+ """
74+
75+ def digest (self ):
76+ """
77+ Return a bytes string digest for this hash.
78+ """
79+ if not self .msg_len :
80+ return
81+ return self .binh .digest ()[: self .digest_size ]
82+
83+ def hexdigest (self ):
84+ """
85+ Return a string hex digest for this hash.
86+ """
87+ return self .msg_len and binascii .hexlify (self .digest ()).decode ("utf-8" )
88+
89+ def b64digest (self ):
90+ """
91+ Return a string base64 digest for this hash.
92+ """
93+ return self .msg_len and urlsafe_b64encode (self .digest ()).decode ("utf-8" )
94+
95+ def intdigest (self ):
96+ """
97+ Return a int digest for this hash.
98+ """
99+ return self .msg_len and int (bin_to_num (self .digest ()))
100+
101+
98102# for FIPS support, we declare that "usedforsecurity" is False
99103sys_v0 = sys .version_info [0 ]
100104sys_v1 = sys .version_info [1 ]
101- if sys_v0 == 3 and sys_v1 >= 9 :
102- md5_hasher = partial (hashlib .md5 , usedforsecurity = False )
103- else :
104- md5_hasher = hashlib .md5
105+ md5_hasher = partial (hashlib .md5 , usedforsecurity = False )
106+
105107
106108# Base hashers for each bit size
107109_hashmodules_by_bitsize = {
@@ -135,6 +137,9 @@ def __init__(self, msg=None, total_length=0, **kwargs):
135137 Initialize a sha1_git_hasher with an optional ``msg`` byte string. The ``total_length`` of
136138 all content that will be hashed, combining the ``msg`` length plus any later call to
137139 update() with additional messages.
140+
141+ Here ``total_length`` is total length in bytes of all the messages (chunks) hashed
142+ in contrast to ``msg_len`` which is the length in bytes for the optional message.
138143 """
139144 self .digest_size = 160 // 8
140145 self .msg_len = 0
@@ -235,6 +240,19 @@ def checksum_from_chunks(chunks, name, total_length=0, base64=False):
235240 return hasher .hexdigest ()
236241
237242
243+ def binary_chunks (location , size = FILE_CHUNK_SIZE ):
244+ """
245+ Read file at ``location`` as binary and yield bytes of up to ``size`` length in bytes,
246+ defaulting to 2**24 bytes, e.g., about 16 MB.
247+ """
248+ with open (location , "rb" ) as f :
249+ while True :
250+ chunk = f .read (size )
251+ if not chunk :
252+ break
253+ yield chunk
254+
255+
238256def md5 (location ):
239257 return checksum (location , name = "md5" , base64 = False )
240258
@@ -259,26 +277,16 @@ def sha1_git(location):
259277 return checksum (location , name = "sha1_git" , base64 = False )
260278
261279
262- def binary_chunks (location , size = 2 ** 24 ):
263- """
264- Read file at ``location`` as binary and yield bytes of up to ``size`` length in bytes,
265- defaulting to 2**24 bytes, e.g., about 16 MB.
266- """
267- with open (location , "rb" ) as f :
268- while True :
269- chunk = f .read (size )
270- if not chunk :
271- break
272- yield chunk
273-
274-
275280def multi_checksums (location , checksum_names = ("md5" , "sha1" , "sha256" , "sha512" , "sha1_git" )):
276281 """
277282 Return a mapping of hexdigest checksum strings keyed by checksum algorithm name from hashing the
278283 content of the file at ``location``. Use the ``checksum_names`` list of checksum names. The
279284 mapping is guaranted to contains all the requested names as keys. If the location is not a file,
280285 or if the file is empty, the values are None.
281- The purpose of this function is
286+
287+ The purpose of this function is to return a set of checksums for a supported set of checksum
288+ algorithms for a given location. This is an API function used in ScanCode --info plugin to get
289+ checksum values.
282290 """
283291 if not filetype .is_file (location ):
284292 return {name : None for name in checksum_names }
0 commit comments