1/****************************************************************************
2**
3** Copyright (C) 2017 The Qt Company Ltd.
4** Contact: https://www.qt.io/licensing/
5**
6** This file is part of the QtCore module of the Qt Toolkit.
7**
8** $QT_BEGIN_LICENSE:LGPL$
9** Commercial License Usage
10** Licensees holding valid commercial Qt licenses may use this file in
11** accordance with the commercial license agreement provided with the
12** Software or, alternatively, in accordance with the terms contained in
13** a written agreement between you and The Qt Company. For licensing terms
14** and conditions see https://www.qt.io/terms-conditions. For further
15** information use the contact form at https://www.qt.io/contact-us.
16**
17** GNU Lesser General Public License Usage
18** Alternatively, this file may be used under the terms of the GNU Lesser
19** General Public License version 3 as published by the Free Software
20** Foundation and appearing in the file LICENSE.LGPL3 included in the
21** packaging of this file. Please review the following information to
22** ensure the GNU Lesser General Public License version 3 requirements
23** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
24**
25** GNU General Public License Usage
26** Alternatively, this file may be used under the terms of the GNU
27** General Public License version 2.0 or (at your option) the GNU General
28** Public license version 3 or any later version approved by the KDE Free
29** Qt Foundation. The licenses are as published by the Free Software
30** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
31** included in the packaging of this file. Please review the following
32** information to ensure the GNU General Public License requirements will
33** be met: https://www.gnu.org/licenses/gpl-2.0.html and
34** https://www.gnu.org/licenses/gpl-3.0.html.
35**
36** $QT_END_LICENSE$
37**
38****************************************************************************/
39
40#include "private/qsimd_p.h"
41
42// The x86 F16C instructions operate on AVX registers, so AVX support is
43// required. We don't need to check for __F16C__ because we this file wouldn't
44// have been compiled if the support was missing in the first place, and not
45// all compilers define it. Technically, we didn't need to check for __AVX__
46// either.
47#if !QT_COMPILER_SUPPORTS_HERE(AVX)
48# error "AVX support required"
49#endif
50
51#ifdef __cplusplus
52QT_BEGIN_NAMESPACE
53extern "C" {
54#endif
55
56QT_FUNCTION_TARGET(F16C)
57void qFloatToFloat16_fast(quint16 *out, const float *in, qsizetype len) Q_DECL_NOEXCEPT
58{
59 qsizetype i = 0;
60 int epilog_i;
61 for (; i < len - 7; i += 8)
62 _mm_storeu_si128((__m128i *)(out + i), _mm256_cvtps_ph(_mm256_loadu_ps(in + i), 0));
63 if (i < len - 3) {
64 _mm_storel_epi64((__m128i *)(out + i), _mm_cvtps_ph(_mm_loadu_ps(in + i), 0));
65 i += 4;
66 }
67 // Inlining "qfloat16::qfloat16(float f)":
68 for (epilog_i = 0; i < len && epilog_i < 3; ++i, ++epilog_i)
69 out[i] = _mm_extract_epi16(_mm_cvtps_ph(_mm_set_ss(in[i]), 0), 0);
70}
71
72QT_FUNCTION_TARGET(F16C)
73void qFloatFromFloat16_fast(float *out, const quint16 *in, qsizetype len) Q_DECL_NOEXCEPT
74{
75 qsizetype i = 0;
76 int epilog_i;
77 for (; i < len - 7; i += 8)
78 _mm256_storeu_ps(out + i, _mm256_cvtph_ps(_mm_loadu_si128((const __m128i *)(in + i))));
79 if (i < len - 3) {
80 _mm_storeu_ps(out + i, _mm_cvtph_ps(_mm_loadl_epi64((const __m128i *)(in + i))));
81 i += 4;
82 }
83 // Inlining "qfloat16::operator float()":
84 for (epilog_i = 0; i < len && epilog_i < 3; ++i, ++epilog_i)
85 out[i] = _mm_cvtss_f32(_mm_cvtph_ps(_mm_cvtsi32_si128(in[i])));
86}
87
88#ifdef __cplusplus
89} // extern "C"
90QT_END_NAMESPACE
91#endif
92