OpenJPH
Open-source implementation of JPEG2000 Part-15
Loading...
Searching...
No Matches
ojph_codeblock_fun.cpp
Go to the documentation of this file.
1//***************************************************************************/
2// This software is released under the 2-Clause BSD license, included
3// below.
4//
5// Copyright (c) 2019, Aous Naman
6// Copyright (c) 2019, Kakadu Software Pty Ltd, Australia
7// Copyright (c) 2019, The University of New South Wales, Australia
8//
9// Redistribution and use in source and binary forms, with or without
10// modification, are permitted provided that the following conditions are
11// met:
12//
13// 1. Redistributions of source code must retain the above copyright
14// notice, this list of conditions and the following disclaimer.
15//
16// 2. Redistributions in binary form must reproduce the above copyright
17// notice, this list of conditions and the following disclaimer in the
18// documentation and/or other materials provided with the distribution.
19//
20// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
21// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
23// PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
26// TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
27// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
28// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
29// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31//***************************************************************************/
32// This file is part of the OpenJPH software implementation.
33// File: ojph_codeblock_fun.cpp
34// Author: Aous Naman
35// Date: 28 August 2019
36//***************************************************************************/
37
38
39#include <climits>
40#include <cmath>
41
42#include "ojph_file.h"
43#include "ojph_mem.h"
44#include "ojph_params.h"
45#include "ojph_codestream.h"
47#include "ojph_codeblock_fun.h"
48
53
54namespace ojph {
55
56 namespace local
57 {
58
60 void gen_mem_clear(void* addr, size_t count);
61 void sse_mem_clear(void* addr, size_t count);
62 void avx_mem_clear(void* addr, size_t count);
63 void wasm_mem_clear(void* addr, size_t count);
64 void vsx_mem_clear(void* addr, size_t count);
65
77
78
80 void gen_rev_tx_to_cb32(const void *sp, ui32 *dp, ui32 K_max,
81 float delta_inv, ui32 count, ui32* max_val);
82 void sse2_rev_tx_to_cb32(const void *sp, ui32 *dp, ui32 K_max,
83 float delta_inv, ui32 count, ui32* max_val);
84 void avx2_rev_tx_to_cb32(const void *sp, ui32 *dp, ui32 K_max,
85 float delta_inv, ui32 count, ui32* max_val);
86 void gen_irv_tx_to_cb32(const void *sp, ui32 *dp, ui32 K_max,
87 float delta_inv, ui32 count, ui32* max_val);
88 void sse2_irv_tx_to_cb32(const void *sp, ui32 *dp, ui32 K_max,
89 float delta_inv, ui32 count, ui32* max_val);
90 void avx2_irv_tx_to_cb32(const void *sp, ui32 *dp, ui32 K_max,
91 float delta_inv, ui32 count, ui32* max_val);
92 void wasm_rev_tx_to_cb32(const void *sp, ui32 *dp, ui32 K_max,
93 float delta_inv, ui32 count, ui32* max_val);
94 void vsx_rev_tx_to_cb32(const void *sp, ui32 *dp, ui32 K_max,
95 float delta_inv, ui32 count, ui32* max_val);
96 void wasm_irv_tx_to_cb32(const void *sp, ui32 *dp, ui32 K_max,
97 float delta_inv, ui32 count, ui32* max_val);
98 void vsx_irv_tx_to_cb32(const void *sp, ui32 *dp, ui32 K_max,
99 float delta_inv, ui32 count, ui32* max_val);
100
101 void gen_rev_tx_to_cb64(const void *sp, ui64 *dp, ui32 K_max,
102 float delta_inv, ui32 count, ui64* max_val);
103 void sse2_rev_tx_to_cb64(const void *sp, ui64 *dp, ui32 K_max,
104 float delta_inv, ui32 count, ui64* max_val);
105 void avx2_rev_tx_to_cb64(const void *sp, ui64 *dp, ui32 K_max,
106 float delta_inv, ui32 count, ui64* max_val);
107 void wasm_rev_tx_to_cb64(const void *sp, ui64 *dp, ui32 K_max,
108 float delta_inv, ui32 count, ui64* max_val);
109 void vsx_rev_tx_to_cb64(const void *sp, ui64 *dp, ui32 K_max,
110 float delta_inv, ui32 count, ui64* max_val);
111
113 void gen_rev_tx_from_cb32(const ui32 *sp, void *dp, ui32 K_max,
114 float delta, ui32 count);
115 void sse2_rev_tx_from_cb32(const ui32 *sp, void *dp, ui32 K_max,
116 float delta, ui32 count);
117 void avx2_rev_tx_from_cb32(const ui32 *sp, void *dp, ui32 K_max,
118 float delta, ui32 count);
119 void gen_irv_tx_from_cb32(const ui32 *sp, void *dp, ui32 K_max,
120 float delta, ui32 count);
121 void sse2_irv_tx_from_cb32(const ui32 *sp, void *dp, ui32 K_max,
122 float delta, ui32 count);
123 void avx2_irv_tx_from_cb32(const ui32 *sp, void *dp, ui32 K_max,
124 float delta, ui32 count);
125 void wasm_rev_tx_from_cb32(const ui32 *sp, void *dp, ui32 K_max,
126 float delta, ui32 count);
127 void vsx_rev_tx_from_cb32(const ui32 *sp, void *dp, ui32 K_max,
128 float delta, ui32 count);
129 void wasm_irv_tx_from_cb32(const ui32 *sp, void *dp, ui32 K_max,
130 float delta, ui32 count);
131 void vsx_irv_tx_from_cb32(const ui32 *sp, void *dp, ui32 K_max,
132 float delta, ui32 count);
133
134 void gen_rev_tx_from_cb64(const ui64 *sp, void *dp, ui32 K_max,
135 float delta, ui32 count);
136 void sse2_rev_tx_from_cb64(const ui64 *sp, void *dp, ui32 K_max,
137 float delta, ui32 count);
138 void avx2_rev_tx_from_cb64(const ui64 *sp, void *dp, ui32 K_max,
139 float delta, ui32 count);
140 void gen_irv_tx_from_cb64(const ui64 *sp, void *dp, ui32 K_max,
141 float delta, ui32 count);
142 void wasm_rev_tx_from_cb64(const ui64 *sp, void *dp, ui32 K_max,
143 float delta, ui32 count);
144 void vsx_rev_tx_from_cb64(const ui64 *sp, void *dp, ui32 K_max,
145 float delta, ui32 count);
146
147 void codeblock_fun::init(bool reversible) {
148
149#if !defined(OJPH_ENABLE_WASM_SIMD) || !defined(OJPH_EMSCRIPTEN)
150
151 // Default path, no acceleration. We may change this later
155 if (reversible) {
158 }
159 else
160 {
163 }
165
168 if (reversible) {
171 }
172 else
173 {
174 tx_to_cb64 = NULL;
176 }
178 bool result = initialize_block_encoder_tables();
179 assert(result); ojph_unused(result);
180
181 #ifndef OJPH_DISABLE_SIMD
182
183 #if (defined(OJPH_ARCH_X86_64) || defined(OJPH_ARCH_I386))
184
185 // Accelerated functions for INTEL/AMD CPUs
186 #ifndef OJPH_DISABLE_SSE
189 #endif // !OJPH_DISABLE_SSE
190
191 #ifndef OJPH_DISABLE_SSE2
194 if (reversible) {
197 }
198 else {
201 }
203 if (reversible) {
206 }
207 else
208 {
209 tx_to_cb64 = NULL;
211 }
212 }
213 #endif // !OJPH_DISABLE_SSE2
214
215 #ifndef OJPH_DISABLE_SSSE3
218 #endif // !OJPH_DISABLE_SSSE3
219
220 #ifndef OJPH_DISABLE_AVX
223 #endif // !OJPH_DISABLE_AVX
224
225 #ifndef OJPH_DISABLE_AVX2
229 if (reversible) {
232 }
233 else {
236 }
239 assert(result); ojph_unused(result);
240
242 if (reversible) {
245 }
246 else
247 {
248 tx_to_cb64 = NULL;
250 }
251 }
252 #endif // !OJPH_DISABLE_AVX2
253
254 #if (defined(OJPH_ARCH_X86_64) && !defined(OJPH_DISABLE_AVX512))
258 assert(result); ojph_unused(result);
259 }
260 #endif // !OJPH_DISABLE_AVX512
261
262 #elif defined(OJPH_ARCH_ARM)
263
264 #elif defined(OJPH_ARCH_PPC64LE)
265
266 // 128-bit VSX kernels; see ojph_simd_vsx.h.
267 // The SIMD block decoder is used everywhere on POWER10 (ISA 3.1),
268 // where it beats the scalar decoder on all measured content. On
269 // POWER9 it wins for irreversible content (more magnitude bits
270 // per sample) but trails the scalar decoder slightly on
271 // reversible content, so it is dispatched only for the former.
273 (!reversible &&
279 if (reversible) {
282 }
283 else {
286 }
288 if (reversible) {
291 }
292 else {
293 tx_to_cb64 = NULL;
295 }
296 }
297
298 #endif // !(defined(OJPH_ARCH_X86_64) || defined(OJPH_ARCH_I386))
299
300 #endif // !OJPH_DISABLE_SIMD
301
302#else // OJPH_ENABLE_WASM_SIMD
303
304 // Accelerated functions for WASM SIMD.
308 if (reversible) {
311 }
312 else {
315 }
317
320 if (reversible) {
323 }
324 else
325 {
326 tx_to_cb64 = NULL;
328 }
330 bool result = initialize_block_encoder_tables();
331 assert(result); ojph_unused(result);
332
333#endif // !OJPH_ENABLE_WASM_SIMD
334
335 }
336 } // local
337} // ojph
void gen_irv_tx_from_cb64(const ui64 *sp, void *dp, ui32 K_max, float delta, ui32 count)
bool ojph_decode_codeblock_wasm(ui8 *coded_data, ui32 *decoded_data, ui32 missing_msbs, ui32 num_passes, ui32 lengths1, ui32 lengths2, ui32 width, ui32 height, ui32 stride, bool stripe_causal)
Decodes one codeblock, processing the cleanup, siginificance propagation, and magnitude refinement pa...
void gen_rev_tx_to_cb64(const void *sp, ui64 *dp, ui32 K_max, float delta_inv, ui32 count, ui64 *max_val)
ui64 avx2_find_max_val64(ui64 *address)
ui32 vsx_find_max_val32(ui32 *address)
void wasm_rev_tx_to_cb64(const void *sp, ui64 *dp, ui32 K_max, float delta_inv, ui32 count, ui64 *max_val)
void vsx_rev_tx_to_cb64(const void *sp, ui64 *dp, ui32 K_max, float delta_inv, ui32 count, ui64 *max_val)
void avx2_irv_tx_to_cb32(const void *sp, ui32 *dp, ui32 K_max, float delta_inv, ui32 count, ui32 *max_val)
bool initialize_block_encoder_tables_avx512()
void vsx_rev_tx_from_cb32(const ui32 *sp, void *dp, ui32 K_max, float delta, ui32 count)
void sse2_rev_tx_from_cb32(const ui32 *sp, void *dp, ui32 K_max, float delta, ui32 count)
ui64 vsx_find_max_val64(ui64 *address)
void avx2_rev_tx_from_cb64(const ui64 *sp, void *dp, ui32 K_max, float delta, ui32 count)
ui32 sse2_find_max_val32(ui32 *address)
ui32 wasm_find_max_val32(ui32 *address)
ui64 sse2_find_max_val64(ui64 *address)
bool ojph_decode_codeblock_ssse3(ui8 *coded_data, ui32 *decoded_data, ui32 missing_msbs, ui32 num_passes, ui32 lengths1, ui32 lengths2, ui32 width, ui32 height, ui32 stride, bool stripe_causal)
void wasm_rev_tx_from_cb32(const ui32 *sp, void *dp, ui32 K_max, float delta, ui32 count)
void avx_mem_clear(void *addr, size_t count)
void vsx_irv_tx_to_cb32(const void *sp, ui32 *dp, ui32 K_max, float delta_inv, ui32 count, ui32 *max_val)
ui32 avx2_find_max_val32(ui32 *address)
void avx2_rev_tx_to_cb32(const void *sp, ui32 *dp, ui32 K_max, float delta_inv, ui32 count, ui32 *max_val)
void wasm_rev_tx_from_cb64(const ui64 *sp, void *dp, ui32 K_max, float delta, ui32 count)
void vsx_irv_tx_from_cb32(const ui32 *sp, void *dp, ui32 K_max, float delta, ui32 count)
void ojph_encode_codeblock32(ui32 *buf, ui32 missing_msbs, ui32 num_passes, ui32 width, ui32 height, ui32 stride, ui32 *lengths, ojph::mem_elastic_allocator *elastic, ojph::coded_lists *&coded)
void ojph_encode_codeblock64(ui64 *buf, ui32 missing_msbs, ui32 num_passes, ui32 width, ui32 height, ui32 stride, ui32 *lengths, ojph::mem_elastic_allocator *elastic, ojph::coded_lists *&coded)
void sse2_rev_tx_to_cb32(const void *sp, ui32 *dp, ui32 K_max, float delta_inv, ui32 count, ui32 *max_val)
void gen_irv_tx_from_cb32(const ui32 *sp, void *dp, ui32 K_max, float delta, ui32 count)
void vsx_rev_tx_from_cb64(const ui64 *sp, void *dp, ui32 K_max, float delta, ui32 count)
void gen_irv_tx_to_cb32(const void *sp, ui32 *dp, ui32 K_max, float delta_inv, ui32 count, ui32 *max_val)
void ojph_encode_codeblock_avx2(ui32 *buf, ui32 missing_msbs, ui32 num_passes, ui32 width, ui32 height, ui32 stride, ui32 *lengths, ojph::mem_elastic_allocator *elastic, ojph::coded_lists *&coded)
void wasm_irv_tx_to_cb32(const void *sp, ui32 *dp, ui32 K_max, float delta_inv, ui32 count, ui32 *max_val)
ui64 wasm_find_max_val64(ui64 *address)
void sse2_rev_tx_to_cb64(const void *sp, ui64 *dp, ui32 K_max, float delta_inv, ui32 count, ui64 *max_val)
void gen_mem_clear(void *addr, size_t count)
void gen_rev_tx_from_cb64(const ui64 *sp, void *dp, ui32 K_max, float delta, ui32 count)
void sse_mem_clear(void *addr, size_t count)
bool ojph_decode_codeblock32(ui8 *coded_data, ui32 *decoded_data, ui32 missing_msbs, ui32 num_passes, ui32 lengths1, ui32 lengths2, ui32 width, ui32 height, ui32 stride, bool stripe_causal)
Decodes one codeblock, processing the cleanup, siginificance propagation, and magnitude refinement pa...
void avx2_rev_tx_to_cb64(const void *sp, ui64 *dp, ui32 K_max, float delta_inv, ui32 count, ui64 *max_val)
ui64 gen_find_max_val64(ui64 *address)
bool ojph_decode_codeblock_vsx(ui8 *coded_data, ui32 *decoded_data, ui32 missing_msbs, ui32 num_passes, ui32 lengths1, ui32 lengths2, ui32 width, ui32 height, ui32 stride, bool stripe_causal)
Decodes one codeblock, processing the cleanup, siginificance propagation, and magnitude refinement pa...
void sse2_irv_tx_from_cb32(const ui32 *sp, void *dp, ui32 K_max, float delta, ui32 count)
void wasm_mem_clear(void *addr, size_t count)
void gen_rev_tx_from_cb32(const ui32 *sp, void *dp, ui32 K_max, float delta, ui32 count)
void wasm_rev_tx_to_cb32(const void *sp, ui32 *dp, ui32 K_max, float delta_inv, ui32 count, ui32 *max_val)
void avx2_rev_tx_from_cb32(const ui32 *sp, void *dp, ui32 K_max, float delta, ui32 count)
bool ojph_decode_codeblock_avx2(ui8 *coded_data, ui32 *decoded_data, ui32 missing_msbs, ui32 num_passes, ui32 lengths1, ui32 lengths2, ui32 width, ui32 height, ui32 stride, bool stripe_causal)
void sse2_rev_tx_from_cb64(const ui64 *sp, void *dp, ui32 K_max, float delta, ui32 count)
void vsx_rev_tx_to_cb32(const void *sp, ui32 *dp, ui32 K_max, float delta_inv, ui32 count, ui32 *max_val)
bool initialize_block_encoder_tables()
void gen_rev_tx_to_cb32(const void *sp, ui32 *dp, ui32 K_max, float delta_inv, ui32 count, ui32 *max_val)
bool initialize_block_encoder_tables_avx2()
ui32 gen_find_max_val32(ui32 *address)
void wasm_irv_tx_from_cb32(const ui32 *sp, void *dp, ui32 K_max, float delta, ui32 count)
void sse2_irv_tx_to_cb32(const void *sp, ui32 *dp, ui32 K_max, float delta_inv, ui32 count, ui32 *max_val)
void ojph_encode_codeblock_avx512(ui32 *buf, ui32 missing_msbs, ui32 num_passes, ui32 width, ui32 height, ui32 stride, ui32 *lengths, ojph::mem_elastic_allocator *elastic, ojph::coded_lists *&coded)
void avx2_irv_tx_from_cb32(const ui32 *sp, void *dp, ui32 K_max, float delta, ui32 count)
void vsx_mem_clear(void *addr, size_t count)
bool ojph_decode_codeblock64(ui8 *coded_data, ui64 *decoded_data, ui32 missing_msbs, ui32 num_passes, ui32 lengths1, ui32 lengths2, ui32 width, ui32 height, ui32 stride, bool stripe_causal)
Decodes one codeblock, processing the cleanup, siginificance propagation, and magnitude refinement pa...
uint64_t ui64
Definition ojph_defs.h:56
@ PPC_CPU_EXT_LEVEL_ARCH_3_1
Definition ojph_arch.h:181
@ PPC_CPU_EXT_LEVEL_ARCH_3_00
Definition ojph_arch.h:180
OJPH_EXPORT int get_cpu_ext_level()
uint32_t ui32
Definition ojph_defs.h:54
@ X86_CPU_EXT_LEVEL_AVX2
Definition ojph_arch.h:163
@ X86_CPU_EXT_LEVEL_AVX
Definition ojph_arch.h:162
@ X86_CPU_EXT_LEVEL_AVX512
Definition ojph_arch.h:165
@ X86_CPU_EXT_LEVEL_SSE2
Definition ojph_arch.h:157
@ X86_CPU_EXT_LEVEL_SSE
Definition ojph_arch.h:156
@ X86_CPU_EXT_LEVEL_SSSE3
Definition ojph_arch.h:159
#define ojph_unused(x)
Definition ojph_defs.h:78
find_max_val_fun32 find_max_val32
find_max_val_fun64 find_max_val64