support/scripts/cve.py: switch from NVD to FKIE for the JSON files
NVD will deprecate the v1.1 API which allows us to download the full database as individual JSON files. Instead, there's a horribly crappy API that is extremely slow and subject to race conditions. Fortunately, there is a project, Fraunhofer FKIE - Cyber Analysis and Defense [1], that goes through the effort of adapting to this new API and regenerating the convenient JSON files. The JSON files and meta files are re-generated daily. Instead of implementing the NVD v2 API, we decided to just use the JSON files generatd by fkie-cad. That saves us the effort of solving the race conditions, devising a cache mechanism that works, handling the frequent gateway timeouts on the NVD servers, dealing with the rate limiting, and keeping up with changes in the API. Switch to this repository on github as NVD_BASE_URL. The file name is also slightly different (CVE-20XX.json instead of nvdcve-1.1-20XX.json). The fkie-cad repository compresses with xz instead of gz. Therefore: - rename the filename variables to _xz instead of _gz; - use xz as a subprocess because there is no xz decompressor in Python stdlib. [1] https://www.fkie.fraunhofer.de/en/departments/cad.html Cc: Daniel Lang <dalang@gmx.at> Signed-off-by: Arnout Vandecappelle <arnout@mind.be> Signed-off-by: Thomas Petazzoni <thomas.petazzoni@bootlin.com>
This commit is contained in:
parent
2bf75464d6
commit
22b6945552
@ -22,7 +22,7 @@ import os
|
|||||||
import requests # URL checking
|
import requests # URL checking
|
||||||
import distutils.version
|
import distutils.version
|
||||||
import time
|
import time
|
||||||
import gzip
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
import operator
|
import operator
|
||||||
|
|
||||||
@ -41,8 +41,7 @@ except ImportError:
|
|||||||
sys.path.append('utils/')
|
sys.path.append('utils/')
|
||||||
|
|
||||||
NVD_START_YEAR = 2002
|
NVD_START_YEAR = 2002
|
||||||
NVD_JSON_VERSION = "1.1"
|
NVD_BASE_URL = "https://github.com/fkie-cad/nvd-json-data-feeds/releases/latest/download"
|
||||||
NVD_BASE_URL = "https://nvd.nist.gov/feeds/json/cve/" + NVD_JSON_VERSION
|
|
||||||
|
|
||||||
ops = {
|
ops = {
|
||||||
'>=': operator.ge,
|
'>=': operator.ge,
|
||||||
@ -83,15 +82,15 @@ class CVE:
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def download_nvd_year(nvd_path, year):
|
def download_nvd_year(nvd_path, year):
|
||||||
metaf = "nvdcve-%s-%s.meta" % (NVD_JSON_VERSION, year)
|
metaf = "CVE-%s.meta" % year
|
||||||
path_metaf = os.path.join(nvd_path, metaf)
|
path_metaf = os.path.join(nvd_path, metaf)
|
||||||
jsonf_gz = "nvdcve-%s-%s.json.gz" % (NVD_JSON_VERSION, year)
|
jsonf_xz = "CVE-%s.json.xz" % year
|
||||||
path_jsonf_gz = os.path.join(nvd_path, jsonf_gz)
|
path_jsonf_xz = os.path.join(nvd_path, jsonf_xz)
|
||||||
|
|
||||||
# If the database file is less than a day old, we assume the NVD data
|
# If the database file is less than a day old, we assume the NVD data
|
||||||
# locally available is recent enough.
|
# locally available is recent enough.
|
||||||
if os.path.exists(path_jsonf_gz) and os.stat(path_jsonf_gz).st_mtime >= time.time() - 86400:
|
if os.path.exists(path_jsonf_xz) and os.stat(path_jsonf_xz).st_mtime >= time.time() - 86400:
|
||||||
return path_jsonf_gz
|
return path_jsonf_xz
|
||||||
|
|
||||||
# If not, we download the meta file
|
# If not, we download the meta file
|
||||||
url = "%s/%s" % (NVD_BASE_URL, metaf)
|
url = "%s/%s" % (NVD_BASE_URL, metaf)
|
||||||
@ -104,19 +103,19 @@ class CVE:
|
|||||||
# we need to re-download the database.
|
# we need to re-download the database.
|
||||||
# If the database does not exist locally, we need to redownload it in
|
# If the database does not exist locally, we need to redownload it in
|
||||||
# any case.
|
# any case.
|
||||||
if os.path.exists(path_metaf) and os.path.exists(path_jsonf_gz):
|
if os.path.exists(path_metaf) and os.path.exists(path_jsonf_xz):
|
||||||
meta_known = open(path_metaf, "r").read()
|
meta_known = open(path_metaf, "r").read()
|
||||||
if page_meta.text == meta_known:
|
if page_meta.text == meta_known:
|
||||||
return path_jsonf_gz
|
return path_jsonf_xz
|
||||||
|
|
||||||
# Grab the compressed JSON NVD, and write files to disk
|
# Grab the compressed JSON NVD, and write files to disk
|
||||||
url = "%s/%s" % (NVD_BASE_URL, jsonf_gz)
|
url = "%s/%s" % (NVD_BASE_URL, jsonf_xz)
|
||||||
print("Getting %s" % url)
|
print("Getting %s" % url)
|
||||||
page_json = requests.get(url)
|
page_json = requests.get(url)
|
||||||
page_json.raise_for_status()
|
page_json.raise_for_status()
|
||||||
open(path_jsonf_gz, "wb").write(page_json.content)
|
open(path_jsonf_xz, "wb").write(page_json.content)
|
||||||
open(path_metaf, "w").write(page_meta.text)
|
open(path_metaf, "w").write(page_meta.text)
|
||||||
return path_jsonf_gz
|
return path_jsonf_xz
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def read_nvd_dir(cls, nvd_dir):
|
def read_nvd_dir(cls, nvd_dir):
|
||||||
@ -128,7 +127,8 @@ class CVE:
|
|||||||
for year in range(NVD_START_YEAR, datetime.datetime.now().year + 1):
|
for year in range(NVD_START_YEAR, datetime.datetime.now().year + 1):
|
||||||
filename = CVE.download_nvd_year(nvd_dir, year)
|
filename = CVE.download_nvd_year(nvd_dir, year)
|
||||||
try:
|
try:
|
||||||
content = ijson.items(gzip.GzipFile(filename), 'CVE_Items.item')
|
uncompressed = subprocess.check_output(["xz", "-d", "-c", filename])
|
||||||
|
content = ijson.items(uncompressed, 'CVE_Items.item')
|
||||||
except: # noqa: E722
|
except: # noqa: E722
|
||||||
print("ERROR: cannot read %s. Please remove the file then rerun this script" % filename)
|
print("ERROR: cannot read %s. Please remove the file then rerun this script" % filename)
|
||||||
raise
|
raise
|
||||||
|
Loading…
Reference in New Issue
Block a user