386 lines
12 KiB
Diff
386 lines
12 KiB
Diff
|
From 576b64d6a4253f900d4846503df55dba051063ab Mon Sep 17 00:00:00 2001
|
||
|
From: gxw <guxiwei-hf@loongson.cn>
|
||
|
Date: Tue, 22 Oct 2019 19:20:19 +0800
|
||
|
Subject: [PATCH] Adjust the mmi/msa detection mode for mips platform.
|
||
|
|
||
|
Using mips-simd-check.sh to test the current compiler support mmi/msa
|
||
|
or not before make. If supported, enable mmi/msa.
|
||
|
According to the model name in /proc/cpuinfo to test the current
|
||
|
cpu support mmi/msa or not for runtime detection.
|
||
|
Now We can use the following make instructions on mips platform:
|
||
|
1. make (automatic detection mmi/msa)
|
||
|
2. make ENABLE_MMI=No (disable mmi)
|
||
|
3. make ENABLE_MSA=No (disable msa)
|
||
|
4. make ENABLE_MMI=No ENABLE_MSA=No (disable mmi and msa)
|
||
|
|
||
|
Change-Id: Ibd348ebc11912d7fca1b548c76838675d69b7c40
|
||
|
|
||
|
Downloaded from upstream PR:
|
||
|
https://github.com/cisco/openh264/pull/3175
|
||
|
|
||
|
Signed-off-by: gxw <guxiwei-hf@loongson.cn>
|
||
|
[Bernd: rebased on top of patch 0001]
|
||
|
Signed-off-by: Bernd Kuhls <bernd.kuhls@t-online.de>
|
||
|
---
|
||
|
Makefile | 2 ++
|
||
|
build/arch.mk | 18 ++++++++++++----
|
||
|
build/mips-simd-check.sh | 32 +++++++++++++++++++++++++++
|
||
|
build/mktargets.py | 43 ++++++++++++++++++++++++++-----------
|
||
|
codec/common/inc/cpu_core.h | 1 +
|
||
|
codec/common/src/cpu.cpp | 37 +++++++++++++++++++++++++------
|
||
|
codec/common/targets.mk | 18 ++++++++++++----
|
||
|
codec/decoder/targets.mk | 18 ++++++++++++----
|
||
|
codec/encoder/targets.mk | 18 ++++++++++++----
|
||
|
codec/processing/targets.mk | 18 ++++++++++++----
|
||
|
10 files changed, 166 insertions(+), 39 deletions(-)
|
||
|
create mode 100755 build/mips-simd-check.sh
|
||
|
|
||
|
diff --git a/Makefile b/Makefile
|
||
|
index 74ff029d9..65d13630b 100644
|
||
|
--- a/Makefile
|
||
|
+++ b/Makefile
|
||
|
@@ -35,6 +35,8 @@ GTEST_VER=release-1.8.1
|
||
|
STATIC_LDFLAGS=-lstdc++
|
||
|
STRIP ?= strip
|
||
|
USE_STACK_PROTECTOR = Yes
|
||
|
+ENABLE_MMI=Yes
|
||
|
+ENABLE_MSA=Yes
|
||
|
|
||
|
SHAREDLIB_MAJORVERSION=5
|
||
|
FULL_VERSION := 2.0.0
|
||
|
diff --git a/build/arch.mk b/build/arch.mk
|
||
|
index 8ac3e70a5..555e4afec 100644
|
||
|
--- a/build/arch.mk
|
||
|
+++ b/build/arch.mk
|
||
|
@@ -30,14 +30,24 @@ CFLAGS += -DHAVE_NEON_AARCH64
|
||
|
endif
|
||
|
endif
|
||
|
|
||
|
-#for loongson
|
||
|
+#for mips
|
||
|
ifneq ($(filter mips mips64, $(ARCH)),)
|
||
|
ifeq ($(USE_ASM), Yes)
|
||
|
ASM_ARCH = mips
|
||
|
ASMFLAGS += -I$(SRC_PATH)codec/common/mips/
|
||
|
-LOONGSON3A = $(shell g++ -dM -E - < /dev/null | grep '_MIPS_TUNE ' | cut -f 3 -d " ")
|
||
|
-ifeq ($(LOONGSON3A), "loongson3a")
|
||
|
-CFLAGS += -DHAVE_MMI
|
||
|
+#mmi
|
||
|
+ifeq ($(ENABLE_MMI), Yes)
|
||
|
+ENABLE_MMI = $(shell $(SRC_PATH)build/mips-simd-check.sh $(CC) mmi)
|
||
|
+ifeq ($(ENABLE_MMI), Yes)
|
||
|
+CFLAGS += -march=loongson3a -DHAVE_MMI
|
||
|
+endif
|
||
|
+endif
|
||
|
+#msa
|
||
|
+ifeq ($(ENABLE_MSA), Yes)
|
||
|
+ENABLE_MSA = $(shell $(SRC_PATH)build/mips-simd-check.sh $(CC) msa)
|
||
|
+ifeq ($(ENABLE_MSA), Yes)
|
||
|
+CFLAGS += -mmsa -DHAVE_MSA
|
||
|
+endif
|
||
|
endif
|
||
|
endif
|
||
|
endif
|
||
|
diff --git a/build/mips-simd-check.sh b/build/mips-simd-check.sh
|
||
|
new file mode 100755
|
||
|
index 000000000..cbc29e3d3
|
||
|
--- /dev/null
|
||
|
+++ b/build/mips-simd-check.sh
|
||
|
@@ -0,0 +1,32 @@
|
||
|
+#!/bin/bash
|
||
|
+#**********************************************************************************
|
||
|
+# This script is using in build/arch.mk for mips to detect the simd instructions:
|
||
|
+# mmi, msa (maybe more in the future).
|
||
|
+#
|
||
|
+# --usage:
|
||
|
+# ./mips-simd-check.sh $(CC) mmi
|
||
|
+# or ./mips-simd-check.sh $(CC) msa
|
||
|
+#
|
||
|
+# date: 10/17/2019 Created
|
||
|
+#**********************************************************************************
|
||
|
+
|
||
|
+TMPC=$(mktemp test.XXXXXX.c)
|
||
|
+TMPO=$(mktemp test.XXXXXX.o)
|
||
|
+if [ $2 == "mmi" ]
|
||
|
+then
|
||
|
+ echo "void main(void){ __asm__ volatile(\"punpcklhw \$f0, \$f0, \$f0\"); }" > $TMPC
|
||
|
+ $1 -march=loongson3a $TMPC -o $TMPO &> /dev/null
|
||
|
+ if test -s $TMPO
|
||
|
+ then
|
||
|
+ echo "Yes"
|
||
|
+ fi
|
||
|
+elif [ $2 == "msa" ]
|
||
|
+then
|
||
|
+ echo "void main(void){ __asm__ volatile(\"addvi.b \$w0, \$w1, 1\"); }" > $TMPC
|
||
|
+ $1 -mmsa $TMPC -o $TMPO &> /dev/null
|
||
|
+ if test -s $TMPO
|
||
|
+ then
|
||
|
+ echo "Yes"
|
||
|
+ fi
|
||
|
+fi
|
||
|
+rm -f $TMPC $TMPO
|
||
|
diff --git a/build/mktargets.py b/build/mktargets.py
|
||
|
index 593280c09..518909d3d 100755
|
||
|
--- a/build/mktargets.py
|
||
|
+++ b/build/mktargets.py
|
||
|
@@ -119,9 +119,9 @@ def find_sources():
|
||
|
armfiles.append(file)
|
||
|
mipsfiles = []
|
||
|
for file in cfiles:
|
||
|
- c = file.split('/')
|
||
|
- if 'mips' in c:
|
||
|
- mipsfiles.append(file)
|
||
|
+ c = file.split('/')
|
||
|
+ if 'mips' in c:
|
||
|
+ mipsfiles.append(file)
|
||
|
cfiles = [x for x in cfiles if x not in mipsfiles]
|
||
|
|
||
|
|
||
|
@@ -181,15 +181,34 @@ def find_sources():
|
||
|
f.write("OBJS += $(%s_OBJSARM64)\n\n"%(PREFIX))
|
||
|
|
||
|
if len(mipsfiles) > 0:
|
||
|
- f.write("%s_ASM_MIPS_SRCS=\\\n"%(PREFIX))
|
||
|
- for c in mipsfiles:
|
||
|
- f.write("\t$(%s_SRCDIR)/%s\\\n"%(PREFIX, c))
|
||
|
- f.write("\n")
|
||
|
- f.write("%s_OBJSMIPS += $(%s_ASM_MIPS_SRCS:.c=.$(OBJ))\n"%(PREFIX, PREFIX))
|
||
|
- f.write("ifeq ($(ASM_ARCH), mips)\n")
|
||
|
- f.write("%s_OBJS += $(%s_OBJSMIPS)\n"%(PREFIX,PREFIX))
|
||
|
- f.write("endif\n")
|
||
|
- f.write("OBJS += $(%s_OBJSMIPS)\n\n"%(PREFIX))
|
||
|
+ mmifiles = []
|
||
|
+ for file in mipsfiles:
|
||
|
+ if '_mmi' in file:
|
||
|
+ mmifiles.append(file)
|
||
|
+ f.write("%s_ASM_MIPS_MMI_SRCS=\\\n"%(PREFIX))
|
||
|
+ for c in mmifiles:
|
||
|
+ f.write("\t$(%s_SRCDIR)/%s\\\n"%(PREFIX, c))
|
||
|
+ f.write("\n")
|
||
|
+ f.write("%s_OBJSMIPS_MMI += $(%s_ASM_MIPS_MMI_SRCS:.c=.$(OBJ))\n\n"%(PREFIX, PREFIX))
|
||
|
+ msafiles = []
|
||
|
+ for file in mipsfiles:
|
||
|
+ if '_msa' in file:
|
||
|
+ msafiles.append(file)
|
||
|
+ f.write("%s_ASM_MIPS_MSA_SRCS=\\\n"%(PREFIX))
|
||
|
+ for c in msafiles:
|
||
|
+ f.write("\t$(%s_SRCDIR)/%s\\\n"%(PREFIX, c))
|
||
|
+ f.write("\n")
|
||
|
+ f.write("%s_OBJSMIPS_MSA += $(%s_ASM_MIPS_MSA_SRCS:.c=.$(OBJ))\n"%(PREFIX, PREFIX))
|
||
|
+ f.write("ifeq ($(ASM_ARCH), mips)\n")
|
||
|
+ f.write("ifeq ($(ENABLE_MMI), Yes)\n")
|
||
|
+ f.write("%s_OBJS += $(%s_OBJSMIPS_MMI)\n"%(PREFIX,PREFIX))
|
||
|
+ f.write("endif\n")
|
||
|
+ f.write("ifeq ($(ENABLE_MSA), Yes)\n")
|
||
|
+ f.write("%s_OBJS += $(%s_OBJSMIPS_MSA)\n"%(PREFIX,PREFIX))
|
||
|
+ f.write("endif\n")
|
||
|
+ f.write("endif\n")
|
||
|
+ f.write("OBJS += $(%s_OBJSMIPS_MMI)\n"%(PREFIX))
|
||
|
+ f.write("OBJS += $(%s_OBJSMIPS_MSA)\n\n"%(PREFIX))
|
||
|
|
||
|
f.write("OBJS += $(%s_OBJS)\n\n"%(PREFIX))
|
||
|
write_cpp_rule_pattern(f)
|
||
|
diff --git a/codec/common/inc/cpu_core.h b/codec/common/inc/cpu_core.h
|
||
|
index e5906c62b..f25787b04 100644
|
||
|
--- a/codec/common/inc/cpu_core.h
|
||
|
+++ b/codec/common/inc/cpu_core.h
|
||
|
@@ -86,6 +86,7 @@
|
||
|
|
||
|
/* For loongson */
|
||
|
#define WELS_CPU_MMI 0x00000001 /* mmi */
|
||
|
+#define WELS_CPU_MSA 0x00000002 /* msa */
|
||
|
|
||
|
/*
|
||
|
* Interfaces for CPU core feature detection as below
|
||
|
diff --git a/codec/common/src/cpu.cpp b/codec/common/src/cpu.cpp
|
||
|
index a39fd0645..94bb2d5d3 100644
|
||
|
--- a/codec/common/src/cpu.cpp
|
||
|
+++ b/codec/common/src/cpu.cpp
|
||
|
@@ -309,12 +309,37 @@ uint32_t WelsCPUFeatureDetect (int32_t* pNumberOfLogicProcessors) {
|
||
|
|
||
|
#elif defined(mips)
|
||
|
/* for loongson */
|
||
|
+static uint32_t get_cpu_flags_from_cpuinfo(void)
|
||
|
+{
|
||
|
+ uint32_t flags = 0;
|
||
|
+
|
||
|
+# ifdef __linux__
|
||
|
+ FILE* fp = fopen("/proc/cpuinfo", "r");
|
||
|
+ if (!fp)
|
||
|
+ return flags;
|
||
|
+
|
||
|
+ char buf[200];
|
||
|
+ memset(buf, 0, sizeof(buf));
|
||
|
+ while (fgets(buf, sizeof(buf), fp)) {
|
||
|
+ if (!strncmp(buf, "model name", strlen("model name"))) {
|
||
|
+ if (strstr(buf, "3A4000")) {
|
||
|
+ flags |= WELS_CPU_MSA | WELS_CPU_MMI;
|
||
|
+ } else if (strstr(buf, "2K1000")) {
|
||
|
+ flags |= WELS_CPU_MSA | WELS_CPU_MMI;
|
||
|
+ } else if (strstr(buf, "Loongson-3A") || strstr(buf, "Loongson-3B")) {
|
||
|
+ flags |= WELS_CPU_MMI;
|
||
|
+ }
|
||
|
+ break;
|
||
|
+ }
|
||
|
+ }
|
||
|
+ fclose(fp);
|
||
|
+# endif
|
||
|
+
|
||
|
+ return flags;
|
||
|
+}
|
||
|
+
|
||
|
uint32_t WelsCPUFeatureDetect (int32_t* pNumberOfLogicProcessors) {
|
||
|
-#if defined(HAVE_MMI)
|
||
|
- return WELS_CPU_MMI;
|
||
|
-#else
|
||
|
- return 0;
|
||
|
-#endif
|
||
|
+ return get_cpu_flags_from_cpuinfo();
|
||
|
}
|
||
|
|
||
|
#else /* Neither X86_ASM, HAVE_NEON, HAVE_NEON_AARCH64 nor mips */
|
||
|
@@ -324,5 +349,3 @@ uint32_t WelsCPUFeatureDetect (int32_t* pNumberOfLogicProcessors) {
|
||
|
}
|
||
|
|
||
|
#endif
|
||
|
-
|
||
|
-
|
||
|
diff --git a/codec/common/targets.mk b/codec/common/targets.mk
|
||
|
index 96843cd9d..f2cd192fd 100644
|
||
|
--- a/codec/common/targets.mk
|
||
|
+++ b/codec/common/targets.mk
|
||
|
@@ -66,18 +66,28 @@ COMMON_OBJS += $(COMMON_OBJSARM64)
|
||
|
endif
|
||
|
OBJS += $(COMMON_OBJSARM64)
|
||
|
|
||
|
-COMMON_ASM_MIPS_SRCS=\
|
||
|
+COMMON_ASM_MIPS_MMI_SRCS=\
|
||
|
$(COMMON_SRCDIR)/mips/copy_mb_mmi.c\
|
||
|
$(COMMON_SRCDIR)/mips/deblock_mmi.c\
|
||
|
$(COMMON_SRCDIR)/mips/expand_picture_mmi.c\
|
||
|
$(COMMON_SRCDIR)/mips/intra_pred_com_mmi.c\
|
||
|
$(COMMON_SRCDIR)/mips/satd_sad_mmi.c\
|
||
|
|
||
|
-COMMON_OBJSMIPS += $(COMMON_ASM_MIPS_SRCS:.c=.$(OBJ))
|
||
|
+COMMON_OBJSMIPS_MMI += $(COMMON_ASM_MIPS_MMI_SRCS:.c=.$(OBJ))
|
||
|
+
|
||
|
+COMMON_ASM_MIPS_MSA_SRCS=\
|
||
|
+
|
||
|
+COMMON_OBJSMIPS_MSA += $(COMMON_ASM_MIPS_MSA_SRCS:.c=.$(OBJ))
|
||
|
ifeq ($(ASM_ARCH), mips)
|
||
|
-COMMON_OBJS += $(COMMON_OBJSMIPS)
|
||
|
+ifeq ($(ENABLE_MMI), Yes)
|
||
|
+COMMON_OBJS += $(COMMON_OBJSMIPS_MMI)
|
||
|
+endif
|
||
|
+ifeq ($(ENABLE_MSA), Yes)
|
||
|
+COMMON_OBJS += $(COMMON_OBJSMIPS_MSA)
|
||
|
+endif
|
||
|
endif
|
||
|
-OBJS += $(COMMON_OBJSMIPS)
|
||
|
+OBJS += $(COMMON_OBJSMIPS_MMI)
|
||
|
+OBJS += $(COMMON_OBJSMIPS_MSA)
|
||
|
|
||
|
OBJS += $(COMMON_OBJS)
|
||
|
|
||
|
diff --git a/codec/decoder/targets.mk b/codec/decoder/targets.mk
|
||
|
index c01618411..88dc5afb1 100644
|
||
|
--- a/codec/decoder/targets.mk
|
||
|
+++ b/codec/decoder/targets.mk
|
||
|
@@ -57,14 +57,24 @@ DECODER_OBJS += $(DECODER_OBJSARM64)
|
||
|
endif
|
||
|
OBJS += $(DECODER_OBJSARM64)
|
||
|
|
||
|
-DECODER_ASM_MIPS_SRCS=\
|
||
|
+DECODER_ASM_MIPS_MMI_SRCS=\
|
||
|
$(DECODER_SRCDIR)/core/mips/dct_mmi.c\
|
||
|
|
||
|
-DECODER_OBJSMIPS += $(DECODER_ASM_MIPS_SRCS:.c=.$(OBJ))
|
||
|
+DECODER_OBJSMIPS_MMI += $(DECODER_ASM_MIPS_MMI_SRCS:.c=.$(OBJ))
|
||
|
+
|
||
|
+DECODER_ASM_MIPS_MSA_SRCS=\
|
||
|
+
|
||
|
+DECODER_OBJSMIPS_MSA += $(DECODER_ASM_MIPS_MSA_SRCS:.c=.$(OBJ))
|
||
|
ifeq ($(ASM_ARCH), mips)
|
||
|
-DECODER_OBJS += $(DECODER_OBJSMIPS)
|
||
|
+ifeq ($(ENABLE_MMI), Yes)
|
||
|
+DECODER_OBJS += $(DECODER_OBJSMIPS_MMI)
|
||
|
+endif
|
||
|
+ifeq ($(ENABLE_MSA), Yes)
|
||
|
+DECODER_OBJS += $(DECODER_OBJSMIPS_MSA)
|
||
|
+endif
|
||
|
endif
|
||
|
-OBJS += $(DECODER_OBJSMIPS)
|
||
|
+OBJS += $(DECODER_OBJSMIPS_MMI)
|
||
|
+OBJS += $(DECODER_OBJSMIPS_MSA)
|
||
|
|
||
|
OBJS += $(DECODER_OBJS)
|
||
|
|
||
|
diff --git a/codec/encoder/targets.mk b/codec/encoder/targets.mk
|
||
|
index 1f053280e..4fb2e690e 100644
|
||
|
--- a/codec/encoder/targets.mk
|
||
|
+++ b/codec/encoder/targets.mk
|
||
|
@@ -82,16 +82,26 @@ ENCODER_OBJS += $(ENCODER_OBJSARM64)
|
||
|
endif
|
||
|
OBJS += $(ENCODER_OBJSARM64)
|
||
|
|
||
|
-ENCODER_ASM_MIPS_SRCS=\
|
||
|
+ENCODER_ASM_MIPS_MMI_SRCS=\
|
||
|
$(ENCODER_SRCDIR)/core/mips/dct_mmi.c\
|
||
|
$(ENCODER_SRCDIR)/core/mips/quant_mmi.c\
|
||
|
$(ENCODER_SRCDIR)/core/mips/score_mmi.c\
|
||
|
|
||
|
-ENCODER_OBJSMIPS += $(ENCODER_ASM_MIPS_SRCS:.c=.$(OBJ))
|
||
|
+ENCODER_OBJSMIPS_MMI += $(ENCODER_ASM_MIPS_MMI_SRCS:.c=.$(OBJ))
|
||
|
+
|
||
|
+ENCODER_ASM_MIPS_MSA_SRCS=\
|
||
|
+
|
||
|
+ENCODER_OBJSMIPS_MSA += $(ENCODER_ASM_MIPS_MSA_SRCS:.c=.$(OBJ))
|
||
|
ifeq ($(ASM_ARCH), mips)
|
||
|
-ENCODER_OBJS += $(ENCODER_OBJSMIPS)
|
||
|
+ifeq ($(ENABLE_MMI), Yes)
|
||
|
+ENCODER_OBJS += $(ENCODER_OBJSMIPS_MMI)
|
||
|
+endif
|
||
|
+ifeq ($(ENABLE_MSA), Yes)
|
||
|
+ENCODER_OBJS += $(ENCODER_OBJSMIPS_MSA)
|
||
|
+endif
|
||
|
endif
|
||
|
-OBJS += $(ENCODER_OBJSMIPS)
|
||
|
+OBJS += $(ENCODER_OBJSMIPS_MMI)
|
||
|
+OBJS += $(ENCODER_OBJSMIPS_MSA)
|
||
|
|
||
|
OBJS += $(ENCODER_OBJS)
|
||
|
|
||
|
diff --git a/codec/processing/targets.mk b/codec/processing/targets.mk
|
||
|
index 300de2d80..0f8873335 100644
|
||
|
--- a/codec/processing/targets.mk
|
||
|
+++ b/codec/processing/targets.mk
|
||
|
@@ -58,14 +58,24 @@ PROCESSING_OBJS += $(PROCESSING_OBJSARM64)
|
||
|
endif
|
||
|
OBJS += $(PROCESSING_OBJSARM64)
|
||
|
|
||
|
-PROCESSING_ASM_MIPS_SRCS=\
|
||
|
+PROCESSING_ASM_MIPS_MMI_SRCS=\
|
||
|
$(PROCESSING_SRCDIR)/src/mips/vaa_mmi.c\
|
||
|
|
||
|
-PROCESSING_OBJSMIPS += $(PROCESSING_ASM_MIPS_SRCS:.c=.$(OBJ))
|
||
|
+PROCESSING_OBJSMIPS_MMI += $(PROCESSING_ASM_MIPS_MMI_SRCS:.c=.$(OBJ))
|
||
|
+
|
||
|
+PROCESSING_ASM_MIPS_MSA_SRCS=\
|
||
|
+
|
||
|
+PROCESSING_OBJSMIPS_MSA += $(PROCESSING_ASM_MIPS_MSA_SRCS:.c=.$(OBJ))
|
||
|
ifeq ($(ASM_ARCH), mips)
|
||
|
-PROCESSING_OBJS += $(PROCESSING_OBJSMIPS)
|
||
|
+ifeq ($(ENABLE_MMI), Yes)
|
||
|
+PROCESSING_OBJS += $(PROCESSING_OBJSMIPS_MMI)
|
||
|
+endif
|
||
|
+ifeq ($(ENABLE_MSA), Yes)
|
||
|
+PROCESSING_OBJS += $(PROCESSING_OBJSMIPS_MSA)
|
||
|
+endif
|
||
|
endif
|
||
|
-OBJS += $(PROCESSING_OBJSMIPS)
|
||
|
+OBJS += $(PROCESSING_OBJSMIPS_MMI)
|
||
|
+OBJS += $(PROCESSING_OBJSMIPS_MSA)
|
||
|
|
||
|
OBJS += $(PROCESSING_OBJS)
|
||
|
|