scanpypi: get license names from SPDX database

Use spdx_lookup package to compare packages' license file texts
with SPDX database.

This feature is optional.

Bonus: fix wrong indentation.

Signed-off-by: Yegor Yefremov <yegorslists@googlemail.com>
Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
This commit is contained in:
Yegor Yefremov 2018-01-10 09:45:52 +01:00 committed by Thomas Petazzoni
parent 3cd1908292
commit d2ac1ec6f4

View File

@ -24,6 +24,15 @@ import tempfile
import imp
from functools import wraps
try:
import spdx_lookup as liclookup
except ImportError:
# spdx_lookup is not installed
print('spdx_lookup module is not installed. This can lead to an '
'inaccurate licence detection. Please install it via\n'
'pip install spdx_lookup')
liclookup = None
def setup_decorator(func, method):
"""
Decorator for distutils.core.setup and setuptools.setup.
@ -354,29 +363,17 @@ class BuildrootPackage():
lines.append(setup_type_line)
return lines
def __create_mk_license(self):
def __get_license_names(self, license_files):
"""
Create the lines referring to the package's license informations of the
<package_name>.mk file
Try to determine the related license name.
The license is found using the metadata from pypi.
In the metadata, the license can be found either with standard names in
the classifiers part or with naming from the packager in the "License"
part.
From the classifiers, the license is "translated" according to
buildroot standards if need be (i.e. from Apache Software License to
Apache-2.0).
From the License part, we cannot guess what formatting the packager
used. Hence, it is likely to be incorrect. (i.e. Apache License 2.0
instead of Apache-2.0).
The license's files are found by searching the package for files named
license or license.txt (case insensitive).
If more than one license file is found, the user is asked to select
which ones he wants to use.
There are two possibilities. Either the scripts tries to
get license name from package's metadata or, if spdx_lookup
package is available, the script compares license files with
SPDX database.
"""
license_line = ''
if liclookup is None:
license_dict = {
'Apache Software License': 'Apache-2.0',
'BSD License': 'BSD',
@ -405,7 +402,6 @@ class BuildrootPackage():
if regexp.match(lic)]
licenses = map(lambda x: license_dict[x] if x in license_dict else x,
classifiers_licenses)
lines = []
if not len(licenses):
print('WARNING: License has been set to "{license}". It is most'
' likely wrong, please change it if need be'.format(
@ -414,11 +410,39 @@ class BuildrootPackage():
license_line = '{name}_LICENSE = {license}\n'.format(
name=self.mk_name,
license=', '.join(licenses))
lines.append(license_line)
else:
license_names = []
for license_file in license_files:
with open(license_file) as lic_file:
match = liclookup.match(lic_file.read())
if match.confidence >= 90.0:
license_names.append(match.license.id)
if len(license_names) > 0:
license_line = ('{name}_LICENSE ='
' {names}\n'.format(
name=self.mk_name,
names=', '.join(license_names)))
return license_line
def __create_mk_license(self):
"""
Create the lines referring to the package's license informations of the
<package_name>.mk file
The license's files are found by searching the package (case insensitive)
for files named license, license.txt etc. If more than one license file
is found, the user is asked to select which ones he wants to use.
"""
lines = []
filenames = ['LICENCE', 'LICENSE', 'LICENSE.RST', 'LICENSE.TXT',
'COPYING', 'COPYING.TXT']
license_files = list(find_file_upper_case(filenames, self.tmp_extract))
lines.append(self.__get_license_names(license_files))
license_files = [license.replace(self.tmp_extract, '')[1:]
for license in license_files]
if len(license_files) > 0: