support/scripts/cve.py: switch from NVD to FKIE for the JSON files

NVD will deprecate the v1.1 API which allows us to download the full
database as individual JSON files. Instead, there's a horribly crappy
API that is extremely slow and subject to race conditions.

Fortunately, there is a project, Fraunhofer FKIE - Cyber Analysis and
Defense [1], that goes through the effort of adapting to this new API
and regenerating the convenient JSON files. The JSON files and meta
files are re-generated daily.

Instead of implementing the NVD v2 API, we decided to just use the JSON
files generatd by fkie-cad. That saves us the effort of solving the race
conditions, devising a cache mechanism that works, handling the frequent
gateway timeouts on the NVD servers, dealing with the rate limiting, and
keeping up with changes in the API.

Switch to this repository on github as NVD_BASE_URL. The file name is
also slightly different (CVE-20XX.json instead of nvdcve-1.1-20XX.json).

The fkie-cad repository compresses with xz instead of gz. Therefore:
 - rename the filename variables to _xz instead of _gz;
 - use xz as a subprocess because there is no xz decompressor in Python
   stdlib.

[1] https://www.fkie.fraunhofer.de/en/departments/cad.html

Cc: Daniel Lang <dalang@gmx.at>
Signed-off-by: Arnout Vandecappelle <arnout@mind.be>
Signed-off-by: Thomas Petazzoni <thomas.petazzoni@bootlin.com>
This commit is contained in:
Arnout Vandecappelle 2024-02-07 16:35:18 +01:00 committed by Thomas Petazzoni
parent 2bf75464d6
commit 22b6945552

View File

@ -22,7 +22,7 @@ import os
import requests # URL checking import requests # URL checking
import distutils.version import distutils.version
import time import time
import gzip import subprocess
import sys import sys
import operator import operator
@ -41,8 +41,7 @@ except ImportError:
sys.path.append('utils/') sys.path.append('utils/')
NVD_START_YEAR = 2002 NVD_START_YEAR = 2002
NVD_JSON_VERSION = "1.1" NVD_BASE_URL = "https://github.com/fkie-cad/nvd-json-data-feeds/releases/latest/download"
NVD_BASE_URL = "https://nvd.nist.gov/feeds/json/cve/" + NVD_JSON_VERSION
ops = { ops = {
'>=': operator.ge, '>=': operator.ge,
@ -83,15 +82,15 @@ class CVE:
@staticmethod @staticmethod
def download_nvd_year(nvd_path, year): def download_nvd_year(nvd_path, year):
metaf = "nvdcve-%s-%s.meta" % (NVD_JSON_VERSION, year) metaf = "CVE-%s.meta" % year
path_metaf = os.path.join(nvd_path, metaf) path_metaf = os.path.join(nvd_path, metaf)
jsonf_gz = "nvdcve-%s-%s.json.gz" % (NVD_JSON_VERSION, year) jsonf_xz = "CVE-%s.json.xz" % year
path_jsonf_gz = os.path.join(nvd_path, jsonf_gz) path_jsonf_xz = os.path.join(nvd_path, jsonf_xz)
# If the database file is less than a day old, we assume the NVD data # If the database file is less than a day old, we assume the NVD data
# locally available is recent enough. # locally available is recent enough.
if os.path.exists(path_jsonf_gz) and os.stat(path_jsonf_gz).st_mtime >= time.time() - 86400: if os.path.exists(path_jsonf_xz) and os.stat(path_jsonf_xz).st_mtime >= time.time() - 86400:
return path_jsonf_gz return path_jsonf_xz
# If not, we download the meta file # If not, we download the meta file
url = "%s/%s" % (NVD_BASE_URL, metaf) url = "%s/%s" % (NVD_BASE_URL, metaf)
@ -104,19 +103,19 @@ class CVE:
# we need to re-download the database. # we need to re-download the database.
# If the database does not exist locally, we need to redownload it in # If the database does not exist locally, we need to redownload it in
# any case. # any case.
if os.path.exists(path_metaf) and os.path.exists(path_jsonf_gz): if os.path.exists(path_metaf) and os.path.exists(path_jsonf_xz):
meta_known = open(path_metaf, "r").read() meta_known = open(path_metaf, "r").read()
if page_meta.text == meta_known: if page_meta.text == meta_known:
return path_jsonf_gz return path_jsonf_xz
# Grab the compressed JSON NVD, and write files to disk # Grab the compressed JSON NVD, and write files to disk
url = "%s/%s" % (NVD_BASE_URL, jsonf_gz) url = "%s/%s" % (NVD_BASE_URL, jsonf_xz)
print("Getting %s" % url) print("Getting %s" % url)
page_json = requests.get(url) page_json = requests.get(url)
page_json.raise_for_status() page_json.raise_for_status()
open(path_jsonf_gz, "wb").write(page_json.content) open(path_jsonf_xz, "wb").write(page_json.content)
open(path_metaf, "w").write(page_meta.text) open(path_metaf, "w").write(page_meta.text)
return path_jsonf_gz return path_jsonf_xz
@classmethod @classmethod
def read_nvd_dir(cls, nvd_dir): def read_nvd_dir(cls, nvd_dir):
@ -128,7 +127,8 @@ class CVE:
for year in range(NVD_START_YEAR, datetime.datetime.now().year + 1): for year in range(NVD_START_YEAR, datetime.datetime.now().year + 1):
filename = CVE.download_nvd_year(nvd_dir, year) filename = CVE.download_nvd_year(nvd_dir, year)
try: try:
content = ijson.items(gzip.GzipFile(filename), 'CVE_Items.item') uncompressed = subprocess.check_output(["xz", "-d", "-c", filename])
content = ijson.items(uncompressed, 'CVE_Items.item')
except: # noqa: E722 except: # noqa: E722
print("ERROR: cannot read %s. Please remove the file then rerun this script" % filename) print("ERROR: cannot read %s. Please remove the file then rerun this script" % filename)
raise raise