scanpypi: get license names from SPDX database

Use spdx_lookup package to compare packages' license file texts
with SPDX database.

This feature is optional.

Bonus: fix wrong indentation.

Signed-off-by: Yegor Yefremov <yegorslists@googlemail.com>
Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
This commit is contained in:
Yegor Yefremov 2018-01-10 09:45:52 +01:00 committed by Thomas Petazzoni
parent 3cd1908292
commit d2ac1ec6f4

View File

@ -24,6 +24,15 @@ import tempfile
import imp import imp
from functools import wraps from functools import wraps
try:
import spdx_lookup as liclookup
except ImportError:
# spdx_lookup is not installed
print('spdx_lookup module is not installed. This can lead to an '
'inaccurate licence detection. Please install it via\n'
'pip install spdx_lookup')
liclookup = None
def setup_decorator(func, method): def setup_decorator(func, method):
""" """
Decorator for distutils.core.setup and setuptools.setup. Decorator for distutils.core.setup and setuptools.setup.
@ -354,29 +363,17 @@ class BuildrootPackage():
lines.append(setup_type_line) lines.append(setup_type_line)
return lines return lines
def __create_mk_license(self): def __get_license_names(self, license_files):
""" """
Create the lines referring to the package's license informations of the Try to determine the related license name.
<package_name>.mk file
The license is found using the metadata from pypi. There are two possibilities. Either the scripts tries to
In the metadata, the license can be found either with standard names in get license name from package's metadata or, if spdx_lookup
the classifiers part or with naming from the packager in the "License" package is available, the script compares license files with
part. SPDX database.
From the classifiers, the license is "translated" according to
buildroot standards if need be (i.e. from Apache Software License to
Apache-2.0).
From the License part, we cannot guess what formatting the packager
used. Hence, it is likely to be incorrect. (i.e. Apache License 2.0
instead of Apache-2.0).
The license's files are found by searching the package for files named
license or license.txt (case insensitive).
If more than one license file is found, the user is asked to select
which ones he wants to use.
""" """
license_line = ''
if liclookup is None:
license_dict = { license_dict = {
'Apache Software License': 'Apache-2.0', 'Apache Software License': 'Apache-2.0',
'BSD License': 'BSD', 'BSD License': 'BSD',
@ -405,7 +402,6 @@ class BuildrootPackage():
if regexp.match(lic)] if regexp.match(lic)]
licenses = map(lambda x: license_dict[x] if x in license_dict else x, licenses = map(lambda x: license_dict[x] if x in license_dict else x,
classifiers_licenses) classifiers_licenses)
lines = []
if not len(licenses): if not len(licenses):
print('WARNING: License has been set to "{license}". It is most' print('WARNING: License has been set to "{license}". It is most'
' likely wrong, please change it if need be'.format( ' likely wrong, please change it if need be'.format(
@ -414,11 +410,39 @@ class BuildrootPackage():
license_line = '{name}_LICENSE = {license}\n'.format( license_line = '{name}_LICENSE = {license}\n'.format(
name=self.mk_name, name=self.mk_name,
license=', '.join(licenses)) license=', '.join(licenses))
lines.append(license_line) else:
license_names = []
for license_file in license_files:
with open(license_file) as lic_file:
match = liclookup.match(lic_file.read())
if match.confidence >= 90.0:
license_names.append(match.license.id)
if len(license_names) > 0:
license_line = ('{name}_LICENSE ='
' {names}\n'.format(
name=self.mk_name,
names=', '.join(license_names)))
return license_line
def __create_mk_license(self):
"""
Create the lines referring to the package's license informations of the
<package_name>.mk file
The license's files are found by searching the package (case insensitive)
for files named license, license.txt etc. If more than one license file
is found, the user is asked to select which ones he wants to use.
"""
lines = []
filenames = ['LICENCE', 'LICENSE', 'LICENSE.RST', 'LICENSE.TXT', filenames = ['LICENCE', 'LICENSE', 'LICENSE.RST', 'LICENSE.TXT',
'COPYING', 'COPYING.TXT'] 'COPYING', 'COPYING.TXT']
license_files = list(find_file_upper_case(filenames, self.tmp_extract)) license_files = list(find_file_upper_case(filenames, self.tmp_extract))
lines.append(self.__get_license_names(license_files))
license_files = [license.replace(self.tmp_extract, '')[1:] license_files = [license.replace(self.tmp_extract, '')[1:]
for license in license_files] for license in license_files]
if len(license_files) > 0: if len(license_files) > 0: