scanpypi: get license names from SPDX database

Use spdx_lookup package to compare packages' license file texts
with SPDX database.

This feature is optional.

Bonus: fix wrong indentation.

Signed-off-by: Yegor Yefremov <yegorslists@googlemail.com>
Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
This commit is contained in:
Yegor Yefremov 2018-01-10 09:45:52 +01:00 committed by Thomas Petazzoni
parent 3cd1908292
commit d2ac1ec6f4

View File

@ -24,6 +24,15 @@ import tempfile
import imp
from functools import wraps
try:
import spdx_lookup as liclookup
except ImportError:
# spdx_lookup is not installed
print('spdx_lookup module is not installed. This can lead to an '
'inaccurate licence detection. Please install it via\n'
'pip install spdx_lookup')
liclookup = None
def setup_decorator(func, method):
"""
Decorator for distutils.core.setup and setuptools.setup.
@ -354,71 +363,86 @@ class BuildrootPackage():
lines.append(setup_type_line)
return lines
def __get_license_names(self, license_files):
"""
Try to determine the related license name.
There are two possibilities. Either the scripts tries to
get license name from package's metadata or, if spdx_lookup
package is available, the script compares license files with
SPDX database.
"""
license_line = ''
if liclookup is None:
license_dict = {
'Apache Software License': 'Apache-2.0',
'BSD License': 'BSD',
'European Union Public Licence 1.0': 'EUPL-1.0',
'European Union Public Licence 1.1': 'EUPL-1.1',
"GNU General Public License": "GPL",
"GNU General Public License v2": "GPL-2.0",
"GNU General Public License v2 or later": "GPL-2.0+",
"GNU General Public License v3": "GPL-3.0",
"GNU General Public License v3 or later": "GPL-3.0+",
"GNU Lesser General Public License v2": "LGPL-2.1",
"GNU Lesser General Public License v2 or later": "LGPL-2.1+",
"GNU Lesser General Public License v3": "LGPL-3.0",
"GNU Lesser General Public License v3 or later": "LGPL-3.0+",
"GNU Library or Lesser General Public License": "LGPL-2.0",
"ISC License": "ISC",
"MIT License": "MIT",
"Mozilla Public License 1.0": "MPL-1.0",
"Mozilla Public License 1.1": "MPL-1.1",
"Mozilla Public License 2.0": "MPL-2.0",
"Zope Public License": "ZPL"
}
regexp = re.compile('^License :* *.* *:+ (.*)( \(.*\))?$')
classifiers_licenses = [regexp.sub(r"\1", lic)
for lic in self.metadata['info']['classifiers']
if regexp.match(lic)]
licenses = map(lambda x: license_dict[x] if x in license_dict else x,
classifiers_licenses)
if not len(licenses):
print('WARNING: License has been set to "{license}". It is most'
' likely wrong, please change it if need be'.format(
license=', '.join(licenses)))
licenses = [self.metadata['info']['license']]
license_line = '{name}_LICENSE = {license}\n'.format(
name=self.mk_name,
license=', '.join(licenses))
else:
license_names = []
for license_file in license_files:
with open(license_file) as lic_file:
match = liclookup.match(lic_file.read())
if match.confidence >= 90.0:
license_names.append(match.license.id)
if len(license_names) > 0:
license_line = ('{name}_LICENSE ='
' {names}\n'.format(
name=self.mk_name,
names=', '.join(license_names)))
return license_line
def __create_mk_license(self):
"""
Create the lines referring to the package's license informations of the
<package_name>.mk file
The license is found using the metadata from pypi.
In the metadata, the license can be found either with standard names in
the classifiers part or with naming from the packager in the "License"
part.
From the classifiers, the license is "translated" according to
buildroot standards if need be (i.e. from Apache Software License to
Apache-2.0).
From the License part, we cannot guess what formatting the packager
used. Hence, it is likely to be incorrect. (i.e. Apache License 2.0
instead of Apache-2.0).
The license's files are found by searching the package for files named
license or license.txt (case insensitive).
If more than one license file is found, the user is asked to select
which ones he wants to use.
The license's files are found by searching the package (case insensitive)
for files named license, license.txt etc. If more than one license file
is found, the user is asked to select which ones he wants to use.
"""
license_dict = {
'Apache Software License': 'Apache-2.0',
'BSD License': 'BSD',
'European Union Public Licence 1.0': 'EUPL-1.0',
'European Union Public Licence 1.1': 'EUPL-1.1',
"GNU General Public License": "GPL",
"GNU General Public License v2": "GPL-2.0",
"GNU General Public License v2 or later": "GPL-2.0+",
"GNU General Public License v3": "GPL-3.0",
"GNU General Public License v3 or later": "GPL-3.0+",
"GNU Lesser General Public License v2": "LGPL-2.1",
"GNU Lesser General Public License v2 or later": "LGPL-2.1+",
"GNU Lesser General Public License v3": "LGPL-3.0",
"GNU Lesser General Public License v3 or later": "LGPL-3.0+",
"GNU Library or Lesser General Public License": "LGPL-2.0",
"ISC License": "ISC",
"MIT License": "MIT",
"Mozilla Public License 1.0": "MPL-1.0",
"Mozilla Public License 1.1": "MPL-1.1",
"Mozilla Public License 2.0": "MPL-2.0",
"Zope Public License": "ZPL"
}
regexp = re.compile('^License :* *.* *:+ (.*)( \(.*\))?$')
classifiers_licenses = [regexp.sub(r"\1", lic)
for lic in self.metadata['info']['classifiers']
if regexp.match(lic)]
licenses = map(lambda x: license_dict[x] if x in license_dict else x,
classifiers_licenses)
lines = []
if not len(licenses):
print('WARNING: License has been set to "{license}". It is most'
' likely wrong, please change it if need be'.format(
license=', '.join(licenses)))
licenses = [self.metadata['info']['license']]
license_line = '{name}_LICENSE = {license}\n'.format(
name=self.mk_name,
license=', '.join(licenses))
lines.append(license_line)
filenames = ['LICENCE', 'LICENSE', 'LICENSE.RST', 'LICENSE.TXT',
'COPYING', 'COPYING.TXT']
'COPYING', 'COPYING.TXT']
license_files = list(find_file_upper_case(filenames, self.tmp_extract))
lines.append(self.__get_license_names(license_files))
license_files = [license.replace(self.tmp_extract, '')[1:]
for license in license_files]
if len(license_files) > 0: