Skip to content

Commit 9060b4a

Browse files
gh-125346: Deprecate accepting standard Base64 alphabet when alternative alphabet is used (GH-141128)
Emit a warning in base64.urlsafe_b64decode() and base64.b64decode() when the "+" or "/" characters occur in the Base64 data with alternative alphabet if they are not the part of the alternative alphabet. It is a DeprecationWarning in the strict mode (will be error) and a FutureWarning in non-strict mode (will be ignored).
1 parent 48795b6 commit 9060b4a

File tree

5 files changed

+86
-17
lines changed

5 files changed

+86
-17
lines changed

Doc/library/base64.rst

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -84,15 +84,20 @@ POST request.
8484
A :exc:`binascii.Error` exception is raised
8585
if *s* is incorrectly padded.
8686

87-
If *validate* is ``False`` (the default), characters that are neither
87+
If *validate* is false (the default), characters that are neither
8888
in the normal base-64 alphabet nor the alternative alphabet are
89-
discarded prior to the padding check. If *validate* is ``True``,
90-
these non-alphabet characters in the input result in a
91-
:exc:`binascii.Error`.
89+
discarded prior to the padding check, but the ``+`` and ``/`` characters
90+
keep their meaning if they are not in *altchars* (they will be discarded
91+
in future Python versions).
92+
If *validate* is true, these non-alphabet characters in the input
93+
result in a :exc:`binascii.Error`.
9294

9395
For more information about the strict base64 check, see :func:`binascii.a2b_base64`
9496

95-
May assert or raise a :exc:`ValueError` if the length of *altchars* is not 2.
97+
.. deprecated:: next
98+
Accepting the ``+`` and ``/`` characters with an alternative alphabet
99+
is now deprecated.
100+
96101

97102
.. function:: standard_b64encode(s)
98103

@@ -123,6 +128,9 @@ POST request.
123128
``/`` in the standard Base64 alphabet, and return the decoded
124129
:class:`bytes`.
125130

131+
.. deprecated:: next
132+
Accepting the ``+`` and ``/`` characters is now deprecated.
133+
126134

127135
.. function:: b32encode(s)
128136

Doc/whatsnew/3.15.rst

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1149,6 +1149,15 @@ Deprecated
11491149
New deprecations
11501150
----------------
11511151

1152+
* :mod:`base64`:
1153+
1154+
* Accepting the ``+`` and ``/`` characters with an alternative alphabet in
1155+
:func:`~base64.b64decode` and :func:`~base64.urlsafe_b64decode` is now
1156+
deprecated.
1157+
In future Python versions they will be errors in the strict mode and
1158+
discarded in the non-strict mode.
1159+
(Contributed by Serhiy Storchaka in :gh:`125346`.)
1160+
11521161
* CLI:
11531162

11541163
* Deprecate :option:`-b` and :option:`!-bb` command-line options

Lib/base64.py

Lines changed: 35 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -72,20 +72,39 @@ def b64decode(s, altchars=None, validate=False):
7272
The result is returned as a bytes object. A binascii.Error is raised if
7373
s is incorrectly padded.
7474
75-
If validate is False (the default), characters that are neither in the
75+
If validate is false (the default), characters that are neither in the
7676
normal base-64 alphabet nor the alternative alphabet are discarded prior
77-
to the padding check. If validate is True, these non-alphabet characters
77+
to the padding check. If validate is true, these non-alphabet characters
7878
in the input result in a binascii.Error.
7979
For more information about the strict base64 check, see:
8080
8181
https://docs.python.org/3.11/library/binascii.html#binascii.a2b_base64
8282
"""
8383
s = _bytes_from_decode_data(s)
84+
badchar = None
8485
if altchars is not None:
8586
altchars = _bytes_from_decode_data(altchars)
86-
assert len(altchars) == 2, repr(altchars)
87+
if len(altchars) != 2:
88+
raise ValueError(f'invalid altchars: {altchars!r}')
89+
for b in b'+/':
90+
if b not in altchars and b in s:
91+
badchar = b
92+
break
8793
s = s.translate(bytes.maketrans(altchars, b'+/'))
88-
return binascii.a2b_base64(s, strict_mode=validate)
94+
result = binascii.a2b_base64(s, strict_mode=validate)
95+
if badchar is not None:
96+
import warnings
97+
if validate:
98+
warnings.warn(f'invalid character {chr(badchar)!a} in Base64 data '
99+
f'with altchars={altchars!r} and validate=True '
100+
f'will be an error in future Python versions',
101+
DeprecationWarning, stacklevel=2)
102+
else:
103+
warnings.warn(f'invalid character {chr(badchar)!a} in Base64 data '
104+
f'with altchars={altchars!r} and validate=False '
105+
f'will be discarded in future Python versions',
106+
FutureWarning, stacklevel=2)
107+
return result
89108

90109

91110
def standard_b64encode(s):
@@ -130,8 +149,19 @@ def urlsafe_b64decode(s):
130149
The alphabet uses '-' instead of '+' and '_' instead of '/'.
131150
"""
132151
s = _bytes_from_decode_data(s)
152+
badchar = None
153+
for b in b'+/':
154+
if b in s:
155+
badchar = b
156+
break
133157
s = s.translate(_urlsafe_decode_translation)
134-
return b64decode(s)
158+
result = binascii.a2b_base64(s, strict_mode=False)
159+
if badchar is not None:
160+
import warnings
161+
warnings.warn(f'invalid character {chr(badchar)!a} in URL-safe Base64 data '
162+
f'will be discarded in future Python versions',
163+
FutureWarning, stacklevel=2)
164+
return result
135165

136166

137167

Lib/test/test_base64.py

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,11 @@ def test_b64decode_altchars(self):
292292
eq(base64.b64decode(data, altchars=altchars_str), res)
293293
eq(base64.b64decode(data_str, altchars=altchars_str), res)
294294

295+
self.assertRaises(ValueError, base64.b64decode, b'', altchars=b'+')
296+
self.assertRaises(ValueError, base64.b64decode, b'', altchars=b'+/-')
297+
self.assertRaises(ValueError, base64.b64decode, '', altchars='+')
298+
self.assertRaises(ValueError, base64.b64decode, '', altchars='+/-')
299+
295300
def test_b64decode_padding_error(self):
296301
self.assertRaises(binascii.Error, base64.b64decode, b'abc')
297302
self.assertRaises(binascii.Error, base64.b64decode, 'abc')
@@ -323,13 +328,25 @@ def test_b64decode_invalid_chars(self):
323328
with self.assertRaises(binascii.Error):
324329
base64.b64decode(bstr.decode('ascii'), validate=True)
325330

326-
# Normal alphabet characters not discarded when alternative given
327-
res = b'\xfb\xef\xff'
328-
self.assertEqual(base64.b64decode(b'++//', validate=True), res)
329-
self.assertEqual(base64.b64decode(b'++//', '-_', validate=True), res)
330-
self.assertEqual(base64.b64decode(b'--__', '-_', validate=True), res)
331-
self.assertEqual(base64.urlsafe_b64decode(b'++//'), res)
332-
self.assertEqual(base64.urlsafe_b64decode(b'--__'), res)
331+
# Normal alphabet characters will be discarded when alternative given
332+
with self.assertWarns(FutureWarning):
333+
self.assertEqual(base64.b64decode(b'++++', altchars=b'-_'),
334+
b'\xfb\xef\xbe')
335+
with self.assertWarns(FutureWarning):
336+
self.assertEqual(base64.b64decode(b'////', altchars=b'-_'),
337+
b'\xff\xff\xff')
338+
with self.assertWarns(DeprecationWarning):
339+
self.assertEqual(base64.b64decode(b'++++', altchars=b'-_', validate=True),
340+
b'\xfb\xef\xbe')
341+
with self.assertWarns(DeprecationWarning):
342+
self.assertEqual(base64.b64decode(b'////', altchars=b'-_', validate=True),
343+
b'\xff\xff\xff')
344+
with self.assertWarns(FutureWarning):
345+
self.assertEqual(base64.urlsafe_b64decode(b'++++'), b'\xfb\xef\xbe')
346+
with self.assertWarns(FutureWarning):
347+
self.assertEqual(base64.urlsafe_b64decode(b'////'), b'\xff\xff\xff')
348+
with self.assertRaises(binascii.Error):
349+
base64.b64decode(b'+/!', altchars=b'-_')
333350

334351
def _altchars_strategy():
335352
"""Generate 'altchars' for base64 encoding."""
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Accepting ``+`` and ``/`` characters with an alternative alphabet in
2+
:func:`base64.b64decode` and :func:`base64.urlsafe_b64decode` is now
3+
deprecated.
4+
In future Python versions they will be errors in the strict mode and
5+
discarded in the non-strict mode.

0 commit comments

Comments
 (0)