1 | // Copyright 2014 Google Inc. All Rights Reserved. |
2 | // |
3 | // Use of this source code is governed by a BSD-style license |
4 | // that can be found in the COPYING file in the root of the source |
5 | // tree. An additional intellectual property rights grant can be found |
6 | // in the file PATENTS. All contributing project authors may |
7 | // be found in the AUTHORS file in the root of the source tree. |
8 | // ----------------------------------------------------------------------------- |
9 | // |
10 | // Author: Djordje Pesut (djordje.pesut@imgtec.com) |
11 | |
12 | #include "./dsp.h" |
13 | |
14 | #if defined(WEBP_USE_MIPS32) |
15 | |
16 | #include "../enc/cost_enc.h" |
17 | |
18 | static int GetResidualCost(int ctx0, const VP8Residual* const res) { |
19 | int temp0, temp1; |
20 | int v_reg, ctx_reg; |
21 | int n = res->first; |
22 | // should be prob[VP8EncBands[n]], but it's equivalent for n=0 or 1 |
23 | int p0 = res->prob[n][ctx0][0]; |
24 | CostArrayPtr const costs = res->costs; |
25 | const uint16_t* t = costs[n][ctx0]; |
26 | // bit_cost(1, p0) is already incorporated in t[] tables, but only if ctx != 0 |
27 | // (as required by the syntax). For ctx0 == 0, we need to add it here or it'll |
28 | // be missing during the loop. |
29 | int cost = (ctx0 == 0) ? VP8BitCost(1, p0) : 0; |
30 | const int16_t* res_coeffs = res->coeffs; |
31 | const int res_last = res->last; |
32 | const int const_max_level = MAX_VARIABLE_LEVEL; |
33 | const int const_2 = 2; |
34 | const uint16_t** p_costs = &costs[n][0]; |
35 | const size_t inc_p_costs = NUM_CTX * sizeof(*p_costs); |
36 | |
37 | if (res->last < 0) { |
38 | return VP8BitCost(0, p0); |
39 | } |
40 | |
41 | __asm__ volatile ( |
42 | ".set push \n\t" |
43 | ".set noreorder \n\t" |
44 | "subu %[temp1], %[res_last], %[n] \n\t" |
45 | "sll %[temp0], %[n], 1 \n\t" |
46 | "blez %[temp1], 2f \n\t" |
47 | " addu %[res_coeffs], %[res_coeffs], %[temp0] \n\t" |
48 | "1: \n\t" |
49 | "lh %[v_reg], 0(%[res_coeffs]) \n\t" |
50 | "addiu %[n], %[n], 1 \n\t" |
51 | "negu %[temp0], %[v_reg] \n\t" |
52 | "slti %[temp1], %[v_reg], 0 \n\t" |
53 | "movn %[v_reg], %[temp0], %[temp1] \n\t" |
54 | "sltiu %[temp0], %[v_reg], 2 \n\t" |
55 | "move %[ctx_reg], %[v_reg] \n\t" |
56 | "movz %[ctx_reg], %[const_2], %[temp0] \n\t" |
57 | "sll %[temp1], %[v_reg], 1 \n\t" |
58 | "addu %[temp1], %[temp1], %[VP8LevelFixedCosts] \n\t" |
59 | "lhu %[temp1], 0(%[temp1]) \n\t" |
60 | "slt %[temp0], %[v_reg], %[const_max_level] \n\t" |
61 | "movz %[v_reg], %[const_max_level], %[temp0] \n\t" |
62 | "addu %[cost], %[cost], %[temp1] \n\t" |
63 | "sll %[v_reg], %[v_reg], 1 \n\t" |
64 | "sll %[ctx_reg], %[ctx_reg], 2 \n\t" |
65 | "addu %[v_reg], %[v_reg], %[t] \n\t" |
66 | "lhu %[temp0], 0(%[v_reg]) \n\t" |
67 | "addu %[p_costs], %[p_costs], %[inc_p_costs] \n\t" |
68 | "addu %[t], %[p_costs], %[ctx_reg] \n\t" |
69 | "addu %[cost], %[cost], %[temp0] \n\t" |
70 | "addiu %[res_coeffs], %[res_coeffs], 2 \n\t" |
71 | "bne %[n], %[res_last], 1b \n\t" |
72 | " lw %[t], 0(%[t]) \n\t" |
73 | "2: \n\t" |
74 | ".set pop \n\t" |
75 | : [cost]"+&r" (cost), [t]"+&r" (t), [n]"+&r" (n), [v_reg]"=&r" (v_reg), |
76 | [ctx_reg]"=&r" (ctx_reg), [p_costs]"+&r" (p_costs), [temp0]"=&r" (temp0), |
77 | [temp1]"=&r" (temp1), [res_coeffs]"+&r" (res_coeffs) |
78 | : [const_2]"r" (const_2), [const_max_level]"r" (const_max_level), |
79 | [VP8LevelFixedCosts]"r" (VP8LevelFixedCosts), [res_last]"r" (res_last), |
80 | [inc_p_costs]"r" (inc_p_costs) |
81 | : "memory" |
82 | ); |
83 | |
84 | // Last coefficient is always non-zero |
85 | { |
86 | const int v = abs(res->coeffs[n]); |
87 | assert(v != 0); |
88 | cost += VP8LevelCost(t, v); |
89 | if (n < 15) { |
90 | const int b = VP8EncBands[n + 1]; |
91 | const int ctx = (v == 1) ? 1 : 2; |
92 | const int last_p0 = res->prob[b][ctx][0]; |
93 | cost += VP8BitCost(0, last_p0); |
94 | } |
95 | } |
96 | return cost; |
97 | } |
98 | |
99 | static void SetResidualCoeffs(const int16_t* const coeffs, |
100 | VP8Residual* const res) { |
101 | const int16_t* p_coeffs = (int16_t*)coeffs; |
102 | int temp0, temp1, temp2, n, n1; |
103 | assert(res->first == 0 || coeffs[0] == 0); |
104 | |
105 | __asm__ volatile ( |
106 | ".set push \n\t" |
107 | ".set noreorder \n\t" |
108 | "addiu %[p_coeffs], %[p_coeffs], 28 \n\t" |
109 | "li %[n], 15 \n\t" |
110 | "li %[temp2], -1 \n\t" |
111 | "0: \n\t" |
112 | "ulw %[temp0], 0(%[p_coeffs]) \n\t" |
113 | "beqz %[temp0], 1f \n\t" |
114 | #if defined(WORDS_BIGENDIAN) |
115 | " sll %[temp1], %[temp0], 16 \n\t" |
116 | #else |
117 | " srl %[temp1], %[temp0], 16 \n\t" |
118 | #endif |
119 | "addiu %[n1], %[n], -1 \n\t" |
120 | "movz %[temp0], %[n1], %[temp1] \n\t" |
121 | "movn %[temp0], %[n], %[temp1] \n\t" |
122 | "j 2f \n\t" |
123 | " addiu %[temp2], %[temp0], 0 \n\t" |
124 | "1: \n\t" |
125 | "addiu %[n], %[n], -2 \n\t" |
126 | "bgtz %[n], 0b \n\t" |
127 | " addiu %[p_coeffs], %[p_coeffs], -4 \n\t" |
128 | "2: \n\t" |
129 | ".set pop \n\t" |
130 | : [p_coeffs]"+&r" (p_coeffs), [temp0]"=&r" (temp0), |
131 | [temp1]"=&r" (temp1), [temp2]"=&r" (temp2), |
132 | [n]"=&r" (n), [n1]"=&r" (n1) |
133 | : |
134 | : "memory" |
135 | ); |
136 | res->last = temp2; |
137 | res->coeffs = coeffs; |
138 | } |
139 | |
140 | //------------------------------------------------------------------------------ |
141 | // Entry point |
142 | |
143 | extern void VP8EncDspCostInitMIPS32(void); |
144 | |
145 | WEBP_TSAN_IGNORE_FUNCTION void VP8EncDspCostInitMIPS32(void) { |
146 | VP8GetResidualCost = GetResidualCost; |
147 | VP8SetResidualCoeffs = SetResidualCoeffs; |
148 | } |
149 | |
150 | #else // !WEBP_USE_MIPS32 |
151 | |
152 | WEBP_DSP_INIT_STUB(VP8EncDspCostInitMIPS32) |
153 | |
154 | #endif // WEBP_USE_MIPS32 |
155 | |