From 5f253e3e04e57d72f470eead8591a2606f98d396 Mon Sep 17 00:00:00 2001 From: "Yann E. MORIN" Date: Thu, 29 Feb 2024 18:12:57 +0100 Subject: [PATCH] support/scripts/cve: fix running on older ijson versions Commit 22b69455526f (support/scripts/cve.py: switch from NVD to FKIE for the JSON files) had to change the decompressor from gz to xz, as the new location is using xz compression. That commit mentioned that it was spawning an external xz process to do the decompression, on the pretence that "there is no xz decompressor in Python stdlib." Before version 3.1, ijson.items() only accepted a file-like object as input (that file-like object could yield bytes() or str(), both were supported). Starting with version 3.1, ijson.items() also accepts that it be directly passed bytes() or str() directly. subprocess.check_output() means we are now passing bytes() to ijson.items(), so it fails on ijson versions before 3.1, with failures such as: [...] File "/usr/lib/python3/dist-packages/ijson/backends/python.py", line 25, in Lexer if type(f.read(0)) == bytetype: AttributeError: 'bytes' object has no attribute 'read' Ubuntu 20.04, on which the pkg-stats run to generate the daily report, only has ijson 2.3. More recent distros have more recent versions of ijson, like Fedora 39 that has 3.2.3, recent enough to support being fed bytes(). Commit 22b69455526f was tested on Fedora 39, so did not catch the issue. However, the reasoning in 22b69455526f is wrong: there *is* the lzma module, at least since python 3.3 (that is, aeons ago), which is able to read xz-compressed files; it also has an API similar to the gzip module, and can provide a file-like object that exposes the decompressed data. So, do just that: provide an lzma-wrapped file-like object to ijson, so that we can eventually recover our daily reports that everything is broken! :-] Note that this construct still works on recent versions! Reported-by: Thomas Petazzoni Signed-off-by: Yann E. MORIN Cc: Arnout Vandecappelle (Essensium/Mind) Signed-off-by: Peter Korsgaard --- support/scripts/cve.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/support/scripts/cve.py b/support/scripts/cve.py index 1a3c307e12..7167ecbc6a 100755 --- a/support/scripts/cve.py +++ b/support/scripts/cve.py @@ -21,8 +21,8 @@ import datetime import os import requests # URL checking import distutils.version +import lzma import time -import subprocess import sys import operator @@ -134,8 +134,7 @@ class CVE: for year in range(NVD_START_YEAR, datetime.datetime.now().year + 1): filename = CVE.download_nvd_year(nvd_dir, year) try: - uncompressed = subprocess.check_output(["xz", "-d", "-c", filename]) - content = ijson.items(uncompressed, 'cve_items.item') + content = ijson.items(lzma.LZMAFile(filename), 'cve_items.item') except: # noqa: E722 print("ERROR: cannot read %s. Please remove the file then rerun this script" % filename) raise