https://github.com/bleachbit/bleachbit/commit/73e6c69dce3893396026d55bf86b54e1c4f6fb26 From 73e6c69dce3893396026d55bf86b54e1c4f6fb26 Mon Sep 17 00:00:00 2001 From: Andrew Ziem Date: Sun, 15 Mar 2026 15:59:13 -0600 Subject: [PATCH] Fix test_detect_encoding() with chardet 6.0.0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The old version was tested with chardet verisons including 5.2.0, which worked. The test failed on OpenSUSE Tumbleweed with python313-chardet-6.0.0-1.1 [ 86s] FAIL: test_detect_encoding (tests.TestFileUtilities.FileUtilitiesTestCase.test_detect_encoding) (encoding='EUC-KR') [ 86s] Unit test for detect_encoding [ 86s] ---------------------------------------------------------------------- [ 86s] Traceback (most recent call last): [ 86s] File "/srv/hal9000/rpmbuild/BUILD/bleachbit-5.1.1-build/bleachbit-5.1.1/tests/TestFileUtilities.py", line 1003, in test_detect_encoding [ 86s] self.assertEqual( [ 86s] ~~~~~~~~~~~~~~~~^ [ 86s] det, expected_encoding, [ 86s] ^^^^^^^^^^^^^^^^^^^^^^^ [ 86s] f"{file_contents} -> {det}, check that chardet is available") [ 86s] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ [ 86s] AssertionError: 'CP949' != 'EUC-KR' [ 86s] - CP949 [ 86s] + EUC-KR [ 86s] : 나는 유리를 먹을 수 있어요. 그래도 아프지 않아요 -> CP949, check that chardet is available --- a/tests/TestFileUtilities.py +++ b/tests/TestFileUtilities.py @@ -989,20 +989,25 @@ def test_detect_encoding(self): """Unit test for detect_encoding""" eat_glass = '나는 유리를 먹을 수 있어요. 그래도 아프지 않아요' bom = '\ufeff' + eat_glass # Add BOM for utf-8-sig - tests = (('This is just an ASCII file', 'ascii'), - (eat_glass, 'utf-8'), - (eat_glass, 'EUC-KR'), - (bom, 'UTF-8-SIG')) - for file_contents, expected_encoding in tests: - with self.subTest(encoding=expected_encoding): + tests = (('This is just an ASCII file', ['ascii']), + (eat_glass, ['utf-8']), + # Accept both EUC-KR and CP949 for Korean + (eat_glass, ['EUC-KR', 'CP949']), + (bom, ['UTF-8-SIG'])) + for file_contents, expected_encodings in tests: + with self.subTest(encoding=expected_encodings): + # Use first encoding for writing + write_encoding = expected_encodings[0] + with tempfile.NamedTemporaryFile(mode='w', delete=False, - encoding=expected_encoding) as temp: + encoding=write_encoding) as temp: temp.write(file_contents) temp.flush() det = detect_encoding(temp.name) - self.assertEqual( - det, expected_encoding, - f"{file_contents} -> {det}, check that chardet is available") + + self.assertIn( + det, expected_encodings, + f"{file_contents} -> {det}, expected one of {expected_encodings}") @common.skipIfWindows def test_ego_owner(self):