kumquat-buildroot/support/scripts/pkg-stats
Victor Huesca 500e1d6241 support/scripts/pkg-stats: add support for json output
Pkg-stats is a great script that get a lot of interesting info from
buildroot packages. Unfortunately it is currently designed to output a
static HTML page only. While this is great to include on the
buildroot's website, the HTML is not designed to be easily parsable and
thus it is difficult to reuse it in other scripts.

This patch provide a new option to output a JSON file in addition to the
HTML one.

The old 'output' option has been renamed to 'html' to distinguish from
the new 'json' option.

Signed-off-by: Victor Huesca <victor.huesca@bootlin.com>
Signed-off-by: Thomas Petazzoni <thomas.petazzoni@bootlin.com>
2019-08-01 11:10:41 +02:00

780 lines
25 KiB
Python
Executable File

#!/usr/bin/env python
# Copyright (C) 2009 by Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
import argparse
import datetime
import fnmatch
import os
from collections import defaultdict
import re
import subprocess
import requests # URL checking
import json
import certifi
from urllib3 import HTTPSConnectionPool
from urllib3.exceptions import HTTPError
from multiprocessing import Pool
INFRA_RE = re.compile(r"\$\(eval \$\(([a-z-]*)-package\)\)")
URL_RE = re.compile(r"\s*https?://\S*\s*$")
RM_API_STATUS_ERROR = 1
RM_API_STATUS_FOUND_BY_DISTRO = 2
RM_API_STATUS_FOUND_BY_PATTERN = 3
RM_API_STATUS_NOT_FOUND = 4
class Package:
all_licenses = list()
all_license_files = list()
all_versions = dict()
def __init__(self, name, path):
self.name = name
self.path = path
self.infras = None
self.has_license = False
self.has_license_files = False
self.has_hash = False
self.patch_count = 0
self.warnings = 0
self.current_version = None
self.url = None
self.url_status = None
self.url_worker = None
self.latest_version = (RM_API_STATUS_ERROR, None, None)
def pkgvar(self):
return self.name.upper().replace("-", "_")
def set_url(self):
"""
Fills in the .url field
"""
self.url_status = "No Config.in"
for filename in os.listdir(os.path.dirname(self.path)):
if fnmatch.fnmatch(filename, 'Config.*'):
fp = open(os.path.join(os.path.dirname(self.path), filename), "r")
for config_line in fp:
if URL_RE.match(config_line):
self.url = config_line.strip()
self.url_status = "Found"
fp.close()
return
self.url_status = "Missing"
fp.close()
def set_infra(self):
"""
Fills in the .infras field
"""
self.infras = list()
with open(self.path, 'r') as f:
lines = f.readlines()
for l in lines:
match = INFRA_RE.match(l)
if not match:
continue
infra = match.group(1)
if infra.startswith("host-"):
self.infras.append(("host", infra[5:]))
else:
self.infras.append(("target", infra))
def set_license(self):
"""
Fills in the .has_license and .has_license_files fields
"""
var = self.pkgvar()
if var in self.all_licenses:
self.has_license = True
if var in self.all_license_files:
self.has_license_files = True
def set_hash_info(self):
"""
Fills in the .has_hash field
"""
hashpath = self.path.replace(".mk", ".hash")
self.has_hash = os.path.exists(hashpath)
def set_patch_count(self):
"""
Fills in the .patch_count field
"""
self.patch_count = 0
pkgdir = os.path.dirname(self.path)
for subdir, _, _ in os.walk(pkgdir):
self.patch_count += len(fnmatch.filter(os.listdir(subdir), '*.patch'))
def set_current_version(self):
"""
Fills in the .current_version field
"""
var = self.pkgvar()
if var in self.all_versions:
self.current_version = self.all_versions[var]
def set_check_package_warnings(self):
"""
Fills in the .warnings field
"""
cmd = ["./utils/check-package"]
pkgdir = os.path.dirname(self.path)
for root, dirs, files in os.walk(pkgdir):
for f in files:
if f.endswith(".mk") or f.endswith(".hash") or f == "Config.in" or f == "Config.in.host":
cmd.append(os.path.join(root, f))
o = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()[1]
lines = o.splitlines()
for line in lines:
m = re.match("^([0-9]*) warnings generated", line)
if m:
self.warnings = int(m.group(1))
return
def __eq__(self, other):
return self.path == other.path
def __lt__(self, other):
return self.path < other.path
def __str__(self):
return "%s (path='%s', license='%s', license_files='%s', hash='%s', patches=%d)" % \
(self.name, self.path, self.has_license, self.has_license_files, self.has_hash, self.patch_count)
def get_pkglist(npackages, package_list):
"""
Builds the list of Buildroot packages, returning a list of Package
objects. Only the .name and .path fields of the Package object are
initialized.
npackages: limit to N packages
package_list: limit to those packages in this list
"""
WALK_USEFUL_SUBDIRS = ["boot", "linux", "package", "toolchain"]
WALK_EXCLUDES = ["boot/common.mk",
"linux/linux-ext-.*.mk",
"package/freescale-imx/freescale-imx.mk",
"package/gcc/gcc.mk",
"package/gstreamer/gstreamer.mk",
"package/gstreamer1/gstreamer1.mk",
"package/gtk2-themes/gtk2-themes.mk",
"package/matchbox/matchbox.mk",
"package/opengl/opengl.mk",
"package/qt5/qt5.mk",
"package/x11r7/x11r7.mk",
"package/doc-asciidoc.mk",
"package/pkg-.*.mk",
"package/nvidia-tegra23/nvidia-tegra23.mk",
"toolchain/toolchain-external/pkg-toolchain-external.mk",
"toolchain/toolchain-external/toolchain-external.mk",
"toolchain/toolchain.mk",
"toolchain/helpers.mk",
"toolchain/toolchain-wrapper.mk"]
packages = list()
count = 0
for root, dirs, files in os.walk("."):
rootdir = root.split("/")
if len(rootdir) < 2:
continue
if rootdir[1] not in WALK_USEFUL_SUBDIRS:
continue
for f in files:
if not f.endswith(".mk"):
continue
# Strip ending ".mk"
pkgname = f[:-3]
if package_list and pkgname not in package_list:
continue
pkgpath = os.path.join(root, f)
skip = False
for exclude in WALK_EXCLUDES:
# pkgpath[2:] strips the initial './'
if re.match(exclude, pkgpath[2:]):
skip = True
continue
if skip:
continue
p = Package(pkgname, pkgpath)
packages.append(p)
count += 1
if npackages and count == npackages:
return packages
return packages
def package_init_make_info():
# Licenses
o = subprocess.check_output(["make", "BR2_HAVE_DOT_CONFIG=y",
"-s", "printvars", "VARS=%_LICENSE"])
for l in o.splitlines():
# Get variable name and value
pkgvar, value = l.split("=")
# If present, strip HOST_ from variable name
if pkgvar.startswith("HOST_"):
pkgvar = pkgvar[5:]
# Strip _LICENSE
pkgvar = pkgvar[:-8]
# If value is "unknown", no license details available
if value == "unknown":
continue
Package.all_licenses.append(pkgvar)
# License files
o = subprocess.check_output(["make", "BR2_HAVE_DOT_CONFIG=y",
"-s", "printvars", "VARS=%_LICENSE_FILES"])
for l in o.splitlines():
# Get variable name and value
pkgvar, value = l.split("=")
# If present, strip HOST_ from variable name
if pkgvar.startswith("HOST_"):
pkgvar = pkgvar[5:]
if pkgvar.endswith("_MANIFEST_LICENSE_FILES"):
continue
# Strip _LICENSE_FILES
pkgvar = pkgvar[:-14]
Package.all_license_files.append(pkgvar)
# Version
o = subprocess.check_output(["make", "BR2_HAVE_DOT_CONFIG=y",
"-s", "printvars", "VARS=%_VERSION"])
# We process first the host package VERSION, and then the target
# package VERSION. This means that if a package exists in both
# target and host variants, with different version numbers
# (unlikely), we'll report the target version number.
version_list = o.splitlines()
version_list = [x for x in version_list if x.startswith("HOST_")] + \
[x for x in version_list if not x.startswith("HOST_")]
for l in version_list:
# Get variable name and value
pkgvar, value = l.split("=")
# If present, strip HOST_ from variable name
if pkgvar.startswith("HOST_"):
pkgvar = pkgvar[5:]
if pkgvar.endswith("_DL_VERSION"):
continue
# Strip _VERSION
pkgvar = pkgvar[:-8]
Package.all_versions[pkgvar] = value
def check_url_status_worker(url, url_status):
if url_status != "Missing" and url_status != "No Config.in":
try:
url_status_code = requests.head(url, timeout=30).status_code
if url_status_code >= 400:
return "Invalid(%s)" % str(url_status_code)
except requests.exceptions.RequestException:
return "Invalid(Err)"
return "Ok"
return url_status
def check_package_urls(packages):
Package.pool = Pool(processes=64)
for pkg in packages:
pkg.url_worker = pkg.pool.apply_async(check_url_status_worker, (pkg.url, pkg.url_status))
for pkg in packages:
pkg.url_status = pkg.url_worker.get(timeout=3600)
def release_monitoring_get_latest_version_by_distro(pool, name):
try:
req = pool.request('GET', "/api/project/Buildroot/%s" % name)
except HTTPError:
return (RM_API_STATUS_ERROR, None, None)
if req.status != 200:
return (RM_API_STATUS_NOT_FOUND, None, None)
data = json.loads(req.data)
if 'version' in data:
return (RM_API_STATUS_FOUND_BY_DISTRO, data['version'], data['id'])
else:
return (RM_API_STATUS_FOUND_BY_DISTRO, None, data['id'])
def release_monitoring_get_latest_version_by_guess(pool, name):
try:
req = pool.request('GET', "/api/projects/?pattern=%s" % name)
except HTTPError:
return (RM_API_STATUS_ERROR, None, None)
if req.status != 200:
return (RM_API_STATUS_NOT_FOUND, None, None)
data = json.loads(req.data)
projects = data['projects']
projects.sort(key=lambda x: x['id'])
for p in projects:
if p['name'] == name and 'version' in p:
return (RM_API_STATUS_FOUND_BY_PATTERN, p['version'], p['id'])
return (RM_API_STATUS_NOT_FOUND, None, None)
def check_package_latest_version(packages):
"""
Fills in the .latest_version field of all Package objects
This field has a special format:
(status, version, id)
with:
- status: one of RM_API_STATUS_ERROR,
RM_API_STATUS_FOUND_BY_DISTRO, RM_API_STATUS_FOUND_BY_PATTERN,
RM_API_STATUS_NOT_FOUND
- version: string containing the latest version known by
release-monitoring.org for this package
- id: string containing the id of the project corresponding to this
package, as known by release-monitoring.org
"""
pool = HTTPSConnectionPool('release-monitoring.org', port=443,
cert_reqs='CERT_REQUIRED', ca_certs=certifi.where(),
timeout=30)
count = 0
for pkg in packages:
v = release_monitoring_get_latest_version_by_distro(pool, pkg.name)
if v[0] == RM_API_STATUS_NOT_FOUND:
v = release_monitoring_get_latest_version_by_guess(pool, pkg.name)
pkg.latest_version = v
print("[%d/%d] Package %s" % (count, len(packages), pkg.name))
count += 1
def calculate_stats(packages):
stats = defaultdict(int)
for pkg in packages:
# If packages have multiple infra, take the first one. For the
# vast majority of packages, the target and host infra are the
# same. There are very few packages that use a different infra
# for the host and target variants.
if len(pkg.infras) > 0:
infra = pkg.infras[0][1]
stats["infra-%s" % infra] += 1
else:
stats["infra-unknown"] += 1
if pkg.has_license:
stats["license"] += 1
else:
stats["no-license"] += 1
if pkg.has_license_files:
stats["license-files"] += 1
else:
stats["no-license-files"] += 1
if pkg.has_hash:
stats["hash"] += 1
else:
stats["no-hash"] += 1
if pkg.latest_version[0] == RM_API_STATUS_FOUND_BY_DISTRO:
stats["rmo-mapping"] += 1
else:
stats["rmo-no-mapping"] += 1
if not pkg.latest_version[1]:
stats["version-unknown"] += 1
elif pkg.latest_version[1] == pkg.current_version:
stats["version-uptodate"] += 1
else:
stats["version-not-uptodate"] += 1
stats["patches"] += pkg.patch_count
return stats
html_header = """
<head>
<script src=\"https://www.kryogenix.org/code/browser/sorttable/sorttable.js\"></script>
<style type=\"text/css\">
table {
width: 100%;
}
td {
border: 1px solid black;
}
td.centered {
text-align: center;
}
td.wrong {
background: #ff9a69;
}
td.correct {
background: #d2ffc4;
}
td.nopatches {
background: #d2ffc4;
}
td.somepatches {
background: #ffd870;
}
td.lotsofpatches {
background: #ff9a69;
}
td.good_url {
background: #d2ffc4;
}
td.missing_url {
background: #ffd870;
}
td.invalid_url {
background: #ff9a69;
}
td.version-good {
background: #d2ffc4;
}
td.version-needs-update {
background: #ff9a69;
}
td.version-unknown {
background: #ffd870;
}
td.version-error {
background: #ccc;
}
</style>
<title>Statistics of Buildroot packages</title>
</head>
<a href=\"#results\">Results</a><br/>
<p id=\"sortable_hint\"></p>
"""
html_footer = """
</body>
<script>
if (typeof sorttable === \"object\") {
document.getElementById(\"sortable_hint\").innerHTML =
\"hint: the table can be sorted by clicking the column headers\"
}
</script>
</html>
"""
def infra_str(infra_list):
if not infra_list:
return "Unknown"
elif len(infra_list) == 1:
return "<b>%s</b><br/>%s" % (infra_list[0][1], infra_list[0][0])
elif infra_list[0][1] == infra_list[1][1]:
return "<b>%s</b><br/>%s + %s" % \
(infra_list[0][1], infra_list[0][0], infra_list[1][0])
else:
return "<b>%s</b> (%s)<br/><b>%s</b> (%s)" % \
(infra_list[0][1], infra_list[0][0],
infra_list[1][1], infra_list[1][0])
def boolean_str(b):
if b:
return "Yes"
else:
return "No"
def dump_html_pkg(f, pkg):
f.write(" <tr>\n")
f.write(" <td>%s</td>\n" % pkg.path[2:])
# Patch count
td_class = ["centered"]
if pkg.patch_count == 0:
td_class.append("nopatches")
elif pkg.patch_count < 5:
td_class.append("somepatches")
else:
td_class.append("lotsofpatches")
f.write(" <td class=\"%s\">%s</td>\n" %
(" ".join(td_class), str(pkg.patch_count)))
# Infrastructure
infra = infra_str(pkg.infras)
td_class = ["centered"]
if infra == "Unknown":
td_class.append("wrong")
else:
td_class.append("correct")
f.write(" <td class=\"%s\">%s</td>\n" %
(" ".join(td_class), infra_str(pkg.infras)))
# License
td_class = ["centered"]
if pkg.has_license:
td_class.append("correct")
else:
td_class.append("wrong")
f.write(" <td class=\"%s\">%s</td>\n" %
(" ".join(td_class), boolean_str(pkg.has_license)))
# License files
td_class = ["centered"]
if pkg.has_license_files:
td_class.append("correct")
else:
td_class.append("wrong")
f.write(" <td class=\"%s\">%s</td>\n" %
(" ".join(td_class), boolean_str(pkg.has_license_files)))
# Hash
td_class = ["centered"]
if pkg.has_hash:
td_class.append("correct")
else:
td_class.append("wrong")
f.write(" <td class=\"%s\">%s</td>\n" %
(" ".join(td_class), boolean_str(pkg.has_hash)))
# Current version
if len(pkg.current_version) > 20:
current_version = pkg.current_version[:20] + "..."
else:
current_version = pkg.current_version
f.write(" <td class=\"centered\">%s</td>\n" % current_version)
# Latest version
if pkg.latest_version[0] == RM_API_STATUS_ERROR:
td_class.append("version-error")
if pkg.latest_version[1] is None:
td_class.append("version-unknown")
elif pkg.latest_version[1] != pkg.current_version:
td_class.append("version-needs-update")
else:
td_class.append("version-good")
if pkg.latest_version[0] == RM_API_STATUS_ERROR:
latest_version_text = "<b>Error</b>"
elif pkg.latest_version[0] == RM_API_STATUS_NOT_FOUND:
latest_version_text = "<b>Not found</b>"
else:
if pkg.latest_version[1] is None:
latest_version_text = "<b>Found, but no version</b>"
else:
latest_version_text = "<a href=\"https://release-monitoring.org/project/%s\"><b>%s</b></a>" % \
(pkg.latest_version[2], str(pkg.latest_version[1]))
latest_version_text += "<br/>"
if pkg.latest_version[0] == RM_API_STATUS_FOUND_BY_DISTRO:
latest_version_text += "found by <a href=\"https://release-monitoring.org/distro/Buildroot/\">distro</a>"
else:
latest_version_text += "found by guess"
f.write(" <td class=\"%s\">%s</td>\n" %
(" ".join(td_class), latest_version_text))
# Warnings
td_class = ["centered"]
if pkg.warnings == 0:
td_class.append("correct")
else:
td_class.append("wrong")
f.write(" <td class=\"%s\">%d</td>\n" %
(" ".join(td_class), pkg.warnings))
# URL status
td_class = ["centered"]
url_str = pkg.url_status
if pkg.url_status == "Missing" or pkg.url_status == "No Config.in":
td_class.append("missing_url")
elif pkg.url_status.startswith("Invalid"):
td_class.append("invalid_url")
url_str = "<a href=%s>%s</a>" % (pkg.url, pkg.url_status)
else:
td_class.append("good_url")
url_str = "<a href=%s>Link</a>" % pkg.url
f.write(" <td class=\"%s\">%s</td>\n" %
(" ".join(td_class), url_str))
f.write(" </tr>\n")
def dump_html_all_pkgs(f, packages):
f.write("""
<table class=\"sortable\">
<tr>
<td>Package</td>
<td class=\"centered\">Patch count</td>
<td class=\"centered\">Infrastructure</td>
<td class=\"centered\">License</td>
<td class=\"centered\">License files</td>
<td class=\"centered\">Hash file</td>
<td class=\"centered\">Current version</td>
<td class=\"centered\">Latest version</td>
<td class=\"centered\">Warnings</td>
<td class=\"centered\">Upstream URL</td>
</tr>
""")
for pkg in sorted(packages):
dump_html_pkg(f, pkg)
f.write("</table>")
def dump_html_stats(f, stats):
f.write("<a id=\"results\"></a>\n")
f.write("<table>\n")
infras = [infra[6:] for infra in stats.keys() if infra.startswith("infra-")]
for infra in infras:
f.write(" <tr><td>Packages using the <i>%s</i> infrastructure</td><td>%s</td></tr>\n" %
(infra, stats["infra-%s" % infra]))
f.write(" <tr><td>Packages having license information</td><td>%s</td></tr>\n" %
stats["license"])
f.write(" <tr><td>Packages not having license information</td><td>%s</td></tr>\n" %
stats["no-license"])
f.write(" <tr><td>Packages having license files information</td><td>%s</td></tr>\n" %
stats["license-files"])
f.write(" <tr><td>Packages not having license files information</td><td>%s</td></tr>\n" %
stats["no-license-files"])
f.write(" <tr><td>Packages having a hash file</td><td>%s</td></tr>\n" %
stats["hash"])
f.write(" <tr><td>Packages not having a hash file</td><td>%s</td></tr>\n" %
stats["no-hash"])
f.write(" <tr><td>Total number of patches</td><td>%s</td></tr>\n" %
stats["patches"])
f.write("<tr><td>Packages having a mapping on <i>release-monitoring.org</i></td><td>%s</td></tr>\n" %
stats["rmo-mapping"])
f.write("<tr><td>Packages lacking a mapping on <i>release-monitoring.org</i></td><td>%s</td></tr>\n" %
stats["rmo-no-mapping"])
f.write("<tr><td>Packages that are up-to-date</td><td>%s</td></tr>\n" %
stats["version-uptodate"])
f.write("<tr><td>Packages that are not up-to-date</td><td>%s</td></tr>\n" %
stats["version-not-uptodate"])
f.write("<tr><td>Packages with no known upstream version</td><td>%s</td></tr>\n" %
stats["version-unknown"])
f.write("</table>\n")
def dump_gen_info(f):
# Updated on Mon Feb 19 08:12:08 CET 2018, Git commit aa77030b8f5e41f1c53eb1c1ad664b8c814ba032
o = subprocess.check_output(["git", "log", "master", "-n", "1", "--pretty=format:%H"])
git_commit = o.splitlines()[0]
f.write("<p><i>Updated on %s, git commit %s</i></p>\n" %
(str(datetime.datetime.utcnow()), git_commit))
def dump_html(packages, stats, output):
with open(output, 'w') as f:
f.write(html_header)
dump_html_all_pkgs(f, packages)
dump_html_stats(f, stats)
dump_gen_info(f)
f.write(html_footer)
def dump_json(packages, stats, output):
# Format packages as a dictionnary instead of a list
# Exclude local field that does not contains real date
excluded_fields = ['url_worker', 'name']
pkgs = {
pkg.name: {
k: v
for k, v in pkg.__dict__.items()
if k not in excluded_fields
} for pkg in packages
}
# Aggregate infrastructures into a single dict entry
statistics = {
k: v
for k, v in stats.items()
if not k.startswith('infra-')
}
statistics['infra'] = {k[6:]: v for k, v in stats.items() if k.startswith('infra-')}
# The actual structure to dump, add commit and date to it
o = subprocess.check_output(["git", "log", "master", "-n", "1", "--pretty=format:%H"])
final = {'packages': pkgs,
'stats': statistics,
'commit': o.splitlines()[0],
'date': str(datetime.datetime.utcnow())}
with open(output, 'w') as f:
json.dump(final, f, indent=2, separators=(',', ': '))
f.write('\n')
def parse_args():
parser = argparse.ArgumentParser()
output = parser.add_argument_group('output', 'Output file(s)')
output.add_argument('--html', dest='html', action='store',
help='HTML output file')
output.add_argument('--json', dest='json', action='store',
help='JSON output file')
packages = parser.add_mutually_exclusive_group()
packages.add_argument('-n', dest='npackages', type=int, action='store',
help='Number of packages')
packages.add_argument('-p', dest='packages', action='store',
help='List of packages (comma separated)')
args = parser.parse_args()
if not args.html and not args.json:
parser.error('at least one of --html or --json (or both) is required')
return args
def __main__():
args = parse_args()
if args.packages:
package_list = args.packages.split(",")
else:
package_list = None
print("Build package list ...")
packages = get_pkglist(args.npackages, package_list)
print("Getting package make info ...")
package_init_make_info()
print("Getting package details ...")
for pkg in packages:
pkg.set_infra()
pkg.set_license()
pkg.set_hash_info()
pkg.set_patch_count()
pkg.set_check_package_warnings()
pkg.set_current_version()
pkg.set_url()
print("Checking URL status")
check_package_urls(packages)
print("Getting latest versions ...")
check_package_latest_version(packages)
print("Calculate stats")
stats = calculate_stats(packages)
if args.html:
print("Write HTML")
dump_html(packages, stats, args.html)
if args.json:
print("Write JSON")
dump_json(packages, stats, args.json)
__main__()