support/scripts/pkg-stats: retrieve packages latest version using processes
The major bottleneck in pkg-stats is the time spent waiting for answers from remote servers. Two functions involve such communication with remote servers: - 'check_package_urls' which checks that each package upstream website is up, it is efficient due to the use of process-pools thanks to Matt Weber. - 'check_package_latest_version' which fetches the latest package version from release-monitoring, it uses a http-pool but runs sequentially. This patch extends the use of process-pools to 'check_latest_version'. Due to some limitations of multiprocess callbacks, this patch loses the overall progress of packages in favour of just the current package name. Runtimes for this function are ~3m vs ~25m for the linear version. Tested on an i7 7500U (2/4 cores/threads @3.5GHz) with 15ms ping. Note: There have already been work trying to parallelize this function using threads but there were a failure on some configurations [1]. This implementation rely on a dedicated module already in use on this script, so it's unlikely to see failure with this version. [1] http://lists.busybox.net/pipermail/buildroot/2018-March/215368.html Signed-off-by: Victor Huesca <victor.huesca@bootlin.com> Signed-off-by: Thomas Petazzoni <thomas.petazzoni@bootlin.com>
This commit is contained in:
parent
3938afe1b5
commit
294fc3218c
@ -38,6 +38,10 @@ RM_API_STATUS_FOUND_BY_DISTRO = 2
|
|||||||
RM_API_STATUS_FOUND_BY_PATTERN = 3
|
RM_API_STATUS_FOUND_BY_PATTERN = 3
|
||||||
RM_API_STATUS_NOT_FOUND = 4
|
RM_API_STATUS_NOT_FOUND = 4
|
||||||
|
|
||||||
|
# Used to make multiple requests to the same host. It is global
|
||||||
|
# because it's used by sub-processes.
|
||||||
|
http_pool = None
|
||||||
|
|
||||||
|
|
||||||
class Package:
|
class Package:
|
||||||
all_licenses = list()
|
all_licenses = list()
|
||||||
@ -316,6 +320,15 @@ def release_monitoring_get_latest_version_by_guess(pool, name):
|
|||||||
return (RM_API_STATUS_NOT_FOUND, None, None)
|
return (RM_API_STATUS_NOT_FOUND, None, None)
|
||||||
|
|
||||||
|
|
||||||
|
def check_package_latest_version_worker(name):
|
||||||
|
"""Wrapper to try both by name then by guess"""
|
||||||
|
print(name)
|
||||||
|
res = release_monitoring_get_latest_version_by_distro(http_pool, name)
|
||||||
|
if res[0] == RM_API_STATUS_NOT_FOUND:
|
||||||
|
res = release_monitoring_get_latest_version_by_guess(http_pool, name)
|
||||||
|
return res
|
||||||
|
|
||||||
|
|
||||||
def check_package_latest_version(packages):
|
def check_package_latest_version(packages):
|
||||||
"""
|
"""
|
||||||
Fills in the .latest_version field of all Package objects
|
Fills in the .latest_version field of all Package objects
|
||||||
@ -331,18 +344,15 @@ def check_package_latest_version(packages):
|
|||||||
- id: string containing the id of the project corresponding to this
|
- id: string containing the id of the project corresponding to this
|
||||||
package, as known by release-monitoring.org
|
package, as known by release-monitoring.org
|
||||||
"""
|
"""
|
||||||
pool = HTTPSConnectionPool('release-monitoring.org', port=443,
|
global http_pool
|
||||||
cert_reqs='CERT_REQUIRED', ca_certs=certifi.where(),
|
http_pool = HTTPSConnectionPool('release-monitoring.org', port=443,
|
||||||
timeout=30)
|
cert_reqs='CERT_REQUIRED', ca_certs=certifi.where(),
|
||||||
count = 0
|
timeout=30)
|
||||||
for pkg in packages:
|
worker_pool = Pool(processes=64)
|
||||||
v = release_monitoring_get_latest_version_by_distro(pool, pkg.name)
|
results = worker_pool.map(check_package_latest_version_worker, (pkg.name for pkg in packages))
|
||||||
if v[0] == RM_API_STATUS_NOT_FOUND:
|
for pkg, r in zip(packages, results):
|
||||||
v = release_monitoring_get_latest_version_by_guess(pool, pkg.name)
|
pkg.latest_version = r
|
||||||
|
del http_pool
|
||||||
pkg.latest_version = v
|
|
||||||
print("[%d/%d] Package %s" % (count, len(packages), pkg.name))
|
|
||||||
count += 1
|
|
||||||
|
|
||||||
|
|
||||||
def calculate_stats(packages):
|
def calculate_stats(packages):
|
||||||
|
Loading…
Reference in New Issue
Block a user