David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 1 | // SPDX-License-Identifier: GPL-2.0-or-later |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 2 | /* Linux driver for Philips webcam |
| 3 | Decompression for chipset version 2 et 3 |
| 4 | (C) 2004-2006 Luc Saillard (luc@saillard.org) |
| 5 | |
| 6 | NOTE: this version of pwc is an unofficial (modified) release of pwc & pcwx |
| 7 | driver and thus may have bugs that are not present in the original version. |
| 8 | Please send bug reports and support requests to <luc@saillard.org>. |
| 9 | The decompression routines have been implemented by reverse-engineering the |
| 10 | Nemosoft binary pwcx module. Caveat emptor. |
| 11 | |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 12 | |
| 13 | */ |
| 14 | |
| 15 | #include "pwc-timon.h" |
| 16 | #include "pwc-kiara.h" |
| 17 | #include "pwc-dec23.h" |
| 18 | |
| 19 | #include <linux/string.h> |
| 20 | #include <linux/slab.h> |
| 21 | |
| 22 | /* |
| 23 | * USE_LOOKUP_TABLE_TO_CLAMP |
| 24 | * 0: use a C version of this tests: { a<0?0:(a>255?255:a) } |
| 25 | * 1: use a faster lookup table for cpu with a big cache (intel) |
| 26 | */ |
| 27 | #define USE_LOOKUP_TABLE_TO_CLAMP 1 |
| 28 | /* |
| 29 | * UNROLL_LOOP_FOR_COPYING_BLOCK |
| 30 | * 0: use a loop for a smaller code (but little slower) |
| 31 | * 1: when unrolling the loop, gcc produces some faster code (perhaps only |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 32 | * valid for intel processor class). Activating this option, automatically |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 33 | * activate USE_LOOKUP_TABLE_TO_CLAMP |
| 34 | */ |
| 35 | #define UNROLL_LOOP_FOR_COPY 1 |
| 36 | #if UNROLL_LOOP_FOR_COPY |
| 37 | # undef USE_LOOKUP_TABLE_TO_CLAMP |
| 38 | # define USE_LOOKUP_TABLE_TO_CLAMP 1 |
| 39 | #endif |
| 40 | |
| 41 | static void build_subblock_pattern(struct pwc_dec23_private *pdec) |
| 42 | { |
| 43 | static const unsigned int initial_values[12] = { |
| 44 | -0x526500, -0x221200, 0x221200, 0x526500, |
| 45 | -0x3de200, 0x3de200, |
| 46 | -0x6db480, -0x2d5d00, 0x2d5d00, 0x6db480, |
| 47 | -0x12c200, 0x12c200 |
| 48 | |
| 49 | }; |
| 50 | static const unsigned int values_derivated[12] = { |
| 51 | 0xa4ca, 0x4424, -0x4424, -0xa4ca, |
| 52 | 0x7bc4, -0x7bc4, |
| 53 | 0xdb69, 0x5aba, -0x5aba, -0xdb69, |
| 54 | 0x2584, -0x2584 |
| 55 | }; |
| 56 | unsigned int temp_values[12]; |
| 57 | int i, j; |
| 58 | |
| 59 | memcpy(temp_values, initial_values, sizeof(initial_values)); |
| 60 | for (i = 0; i < 256; i++) { |
| 61 | for (j = 0; j < 12; j++) { |
| 62 | pdec->table_subblock[i][j] = temp_values[j]; |
| 63 | temp_values[j] += values_derivated[j]; |
| 64 | } |
| 65 | } |
| 66 | } |
| 67 | |
| 68 | static void build_bit_powermask_table(struct pwc_dec23_private *pdec) |
| 69 | { |
| 70 | unsigned char *p; |
| 71 | unsigned int bit, byte, mask, val; |
| 72 | unsigned int bitpower = 1; |
| 73 | |
| 74 | for (bit = 0; bit < 8; bit++) { |
| 75 | mask = bitpower - 1; |
| 76 | p = pdec->table_bitpowermask[bit]; |
| 77 | for (byte = 0; byte < 256; byte++) { |
| 78 | val = (byte & mask); |
| 79 | if (byte & bitpower) |
| 80 | val = -val; |
| 81 | *p++ = val; |
| 82 | } |
| 83 | bitpower<<=1; |
| 84 | } |
| 85 | } |
| 86 | |
| 87 | |
| 88 | static void build_table_color(const unsigned int romtable[16][8], |
| 89 | unsigned char p0004[16][1024], |
| 90 | unsigned char p8004[16][256]) |
| 91 | { |
| 92 | int compression_mode, j, k, bit, pw; |
| 93 | unsigned char *p0, *p8; |
| 94 | const unsigned int *r; |
| 95 | |
| 96 | /* We have 16 compressions tables */ |
| 97 | for (compression_mode = 0; compression_mode < 16; compression_mode++) { |
| 98 | p0 = p0004[compression_mode]; |
| 99 | p8 = p8004[compression_mode]; |
| 100 | r = romtable[compression_mode]; |
| 101 | |
| 102 | for (j = 0; j < 8; j++, r++, p0 += 128) { |
| 103 | |
| 104 | for (k = 0; k < 16; k++) { |
| 105 | if (k == 0) |
| 106 | bit = 1; |
| 107 | else if (k >= 1 && k < 3) |
| 108 | bit = (r[0] >> 15) & 7; |
| 109 | else if (k >= 3 && k < 6) |
| 110 | bit = (r[0] >> 12) & 7; |
| 111 | else if (k >= 6 && k < 10) |
| 112 | bit = (r[0] >> 9) & 7; |
| 113 | else if (k >= 10 && k < 13) |
| 114 | bit = (r[0] >> 6) & 7; |
| 115 | else if (k >= 13 && k < 15) |
| 116 | bit = (r[0] >> 3) & 7; |
| 117 | else |
| 118 | bit = (r[0]) & 7; |
| 119 | if (k == 0) |
| 120 | *p8++ = 8; |
| 121 | else |
| 122 | *p8++ = j - bit; |
| 123 | *p8++ = bit; |
| 124 | |
| 125 | pw = 1 << bit; |
| 126 | p0[k + 0x00] = (1 * pw) + 0x80; |
| 127 | p0[k + 0x10] = (2 * pw) + 0x80; |
| 128 | p0[k + 0x20] = (3 * pw) + 0x80; |
| 129 | p0[k + 0x30] = (4 * pw) + 0x80; |
| 130 | p0[k + 0x40] = (-1 * pw) + 0x80; |
| 131 | p0[k + 0x50] = (-2 * pw) + 0x80; |
| 132 | p0[k + 0x60] = (-3 * pw) + 0x80; |
| 133 | p0[k + 0x70] = (-4 * pw) + 0x80; |
| 134 | } /* end of for (k=0; k<16; k++, p8++) */ |
| 135 | } /* end of for (j=0; j<8; j++ , table++) */ |
| 136 | } /* end of foreach compression_mode */ |
| 137 | } |
| 138 | |
| 139 | /* |
| 140 | * |
| 141 | */ |
| 142 | static void fill_table_dc00_d800(struct pwc_dec23_private *pdec) |
| 143 | { |
| 144 | #define SCALEBITS 15 |
| 145 | #define ONE_HALF (1UL << (SCALEBITS - 1)) |
| 146 | int i; |
| 147 | unsigned int offset1 = ONE_HALF; |
| 148 | unsigned int offset2 = 0x0000; |
| 149 | |
| 150 | for (i=0; i<256; i++) { |
| 151 | pdec->table_dc00[i] = offset1 & ~(ONE_HALF); |
| 152 | pdec->table_d800[i] = offset2; |
| 153 | |
| 154 | offset1 += 0x7bc4; |
| 155 | offset2 += 0x7bc4; |
| 156 | } |
| 157 | } |
| 158 | |
| 159 | /* |
| 160 | * To decode the stream: |
| 161 | * if look_bits(2) == 0: # op == 2 in the lookup table |
| 162 | * skip_bits(2) |
| 163 | * end of the stream |
| 164 | * elif look_bits(3) == 7: # op == 1 in the lookup table |
| 165 | * skip_bits(3) |
| 166 | * yyyy = get_bits(4) |
| 167 | * xxxx = get_bits(8) |
| 168 | * else: # op == 0 in the lookup table |
| 169 | * skip_bits(x) |
| 170 | * |
| 171 | * For speedup processing, we build a lookup table and we takes the first 6 bits. |
| 172 | * |
| 173 | * struct { |
| 174 | * unsigned char op; // operation to execute |
| 175 | * unsigned char bits; // bits use to perform operation |
| 176 | * unsigned char offset1; // offset to add to access in the table_0004 % 16 |
| 177 | * unsigned char offset2; // offset to add to access in the table_0004 |
| 178 | * } |
| 179 | * |
| 180 | * How to build this table ? |
| 181 | * op == 2 when (i%4)==0 |
| 182 | * op == 1 when (i%8)==7 |
| 183 | * op == 0 otherwise |
| 184 | * |
| 185 | */ |
| 186 | static const unsigned char hash_table_ops[64*4] = { |
| 187 | 0x02, 0x00, 0x00, 0x00, |
| 188 | 0x00, 0x03, 0x01, 0x00, |
| 189 | 0x00, 0x04, 0x01, 0x10, |
| 190 | 0x00, 0x06, 0x01, 0x30, |
| 191 | 0x02, 0x00, 0x00, 0x00, |
| 192 | 0x00, 0x03, 0x01, 0x40, |
| 193 | 0x00, 0x05, 0x01, 0x20, |
| 194 | 0x01, 0x00, 0x00, 0x00, |
| 195 | 0x02, 0x00, 0x00, 0x00, |
| 196 | 0x00, 0x03, 0x01, 0x00, |
| 197 | 0x00, 0x04, 0x01, 0x50, |
| 198 | 0x00, 0x05, 0x02, 0x00, |
| 199 | 0x02, 0x00, 0x00, 0x00, |
| 200 | 0x00, 0x03, 0x01, 0x40, |
| 201 | 0x00, 0x05, 0x03, 0x00, |
| 202 | 0x01, 0x00, 0x00, 0x00, |
| 203 | 0x02, 0x00, 0x00, 0x00, |
| 204 | 0x00, 0x03, 0x01, 0x00, |
| 205 | 0x00, 0x04, 0x01, 0x10, |
| 206 | 0x00, 0x06, 0x02, 0x10, |
| 207 | 0x02, 0x00, 0x00, 0x00, |
| 208 | 0x00, 0x03, 0x01, 0x40, |
| 209 | 0x00, 0x05, 0x01, 0x60, |
| 210 | 0x01, 0x00, 0x00, 0x00, |
| 211 | 0x02, 0x00, 0x00, 0x00, |
| 212 | 0x00, 0x03, 0x01, 0x00, |
| 213 | 0x00, 0x04, 0x01, 0x50, |
| 214 | 0x00, 0x05, 0x02, 0x40, |
| 215 | 0x02, 0x00, 0x00, 0x00, |
| 216 | 0x00, 0x03, 0x01, 0x40, |
| 217 | 0x00, 0x05, 0x03, 0x40, |
| 218 | 0x01, 0x00, 0x00, 0x00, |
| 219 | 0x02, 0x00, 0x00, 0x00, |
| 220 | 0x00, 0x03, 0x01, 0x00, |
| 221 | 0x00, 0x04, 0x01, 0x10, |
| 222 | 0x00, 0x06, 0x01, 0x70, |
| 223 | 0x02, 0x00, 0x00, 0x00, |
| 224 | 0x00, 0x03, 0x01, 0x40, |
| 225 | 0x00, 0x05, 0x01, 0x20, |
| 226 | 0x01, 0x00, 0x00, 0x00, |
| 227 | 0x02, 0x00, 0x00, 0x00, |
| 228 | 0x00, 0x03, 0x01, 0x00, |
| 229 | 0x00, 0x04, 0x01, 0x50, |
| 230 | 0x00, 0x05, 0x02, 0x00, |
| 231 | 0x02, 0x00, 0x00, 0x00, |
| 232 | 0x00, 0x03, 0x01, 0x40, |
| 233 | 0x00, 0x05, 0x03, 0x00, |
| 234 | 0x01, 0x00, 0x00, 0x00, |
| 235 | 0x02, 0x00, 0x00, 0x00, |
| 236 | 0x00, 0x03, 0x01, 0x00, |
| 237 | 0x00, 0x04, 0x01, 0x10, |
| 238 | 0x00, 0x06, 0x02, 0x50, |
| 239 | 0x02, 0x00, 0x00, 0x00, |
| 240 | 0x00, 0x03, 0x01, 0x40, |
| 241 | 0x00, 0x05, 0x01, 0x60, |
| 242 | 0x01, 0x00, 0x00, 0x00, |
| 243 | 0x02, 0x00, 0x00, 0x00, |
| 244 | 0x00, 0x03, 0x01, 0x00, |
| 245 | 0x00, 0x04, 0x01, 0x50, |
| 246 | 0x00, 0x05, 0x02, 0x40, |
| 247 | 0x02, 0x00, 0x00, 0x00, |
| 248 | 0x00, 0x03, 0x01, 0x40, |
| 249 | 0x00, 0x05, 0x03, 0x40, |
| 250 | 0x01, 0x00, 0x00, 0x00 |
| 251 | }; |
| 252 | |
| 253 | /* |
| 254 | * |
| 255 | */ |
| 256 | static const unsigned int MulIdx[16][16] = { |
| 257 | {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,}, |
| 258 | {0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3,}, |
| 259 | {0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3,}, |
| 260 | {4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4,}, |
| 261 | {6, 7, 8, 9, 7, 10, 11, 8, 8, 11, 10, 7, 9, 8, 7, 6,}, |
| 262 | {4, 5, 5, 4, 4, 5, 5, 4, 4, 5, 5, 4, 4, 5, 5, 4,}, |
| 263 | {1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3, 0, 2,}, |
| 264 | {0, 3, 3, 0, 1, 2, 2, 1, 2, 1, 1, 2, 3, 0, 0, 3,}, |
| 265 | {0, 1, 2, 3, 3, 2, 1, 0, 3, 2, 1, 0, 0, 1, 2, 3,}, |
| 266 | {1, 1, 1, 1, 3, 3, 3, 3, 0, 0, 0, 0, 2, 2, 2, 2,}, |
| 267 | {7, 10, 11, 8, 9, 8, 7, 6, 6, 7, 8, 9, 8, 11, 10, 7,}, |
| 268 | {4, 5, 5, 4, 5, 4, 4, 5, 5, 4, 4, 5, 4, 5, 5, 4,}, |
| 269 | {7, 9, 6, 8, 10, 8, 7, 11, 11, 7, 8, 10, 8, 6, 9, 7,}, |
| 270 | {1, 3, 0, 2, 2, 0, 3, 1, 2, 0, 3, 1, 1, 3, 0, 2,}, |
| 271 | {1, 2, 2, 1, 3, 0, 0, 3, 0, 3, 3, 0, 2, 1, 1, 2,}, |
| 272 | {10, 8, 7, 11, 8, 6, 9, 7, 7, 9, 6, 8, 11, 7, 8, 10} |
| 273 | }; |
| 274 | |
| 275 | #if USE_LOOKUP_TABLE_TO_CLAMP |
| 276 | #define MAX_OUTER_CROP_VALUE (512) |
| 277 | static unsigned char pwc_crop_table[256 + 2*MAX_OUTER_CROP_VALUE]; |
| 278 | #define CLAMP(x) (pwc_crop_table[MAX_OUTER_CROP_VALUE+(x)]) |
| 279 | #else |
| 280 | #define CLAMP(x) ((x)>255?255:((x)<0?0:x)) |
| 281 | #endif |
| 282 | |
| 283 | |
| 284 | /* If the type or the command change, we rebuild the lookup table */ |
| 285 | void pwc_dec23_init(struct pwc_device *pdev, const unsigned char *cmd) |
| 286 | { |
| 287 | int flags, version, shift, i; |
| 288 | struct pwc_dec23_private *pdec = &pdev->dec23; |
| 289 | |
| 290 | mutex_init(&pdec->lock); |
| 291 | |
| 292 | if (pdec->last_cmd_valid && pdec->last_cmd == cmd[2]) |
| 293 | return; |
| 294 | |
| 295 | if (DEVICE_USE_CODEC3(pdev->type)) { |
| 296 | flags = cmd[2] & 0x18; |
| 297 | if (flags == 8) |
| 298 | pdec->nbits = 7; /* More bits, mean more bits to encode the stream, but better quality */ |
| 299 | else if (flags == 0x10) |
| 300 | pdec->nbits = 8; |
| 301 | else |
| 302 | pdec->nbits = 6; |
| 303 | |
| 304 | version = cmd[2] >> 5; |
| 305 | build_table_color(KiaraRomTable[version][0], pdec->table_0004_pass1, pdec->table_8004_pass1); |
| 306 | build_table_color(KiaraRomTable[version][1], pdec->table_0004_pass2, pdec->table_8004_pass2); |
| 307 | |
| 308 | } else { |
| 309 | |
| 310 | flags = cmd[2] & 6; |
| 311 | if (flags == 2) |
| 312 | pdec->nbits = 7; |
| 313 | else if (flags == 4) |
| 314 | pdec->nbits = 8; |
| 315 | else |
| 316 | pdec->nbits = 6; |
| 317 | |
| 318 | version = cmd[2] >> 3; |
| 319 | build_table_color(TimonRomTable[version][0], pdec->table_0004_pass1, pdec->table_8004_pass1); |
| 320 | build_table_color(TimonRomTable[version][1], pdec->table_0004_pass2, pdec->table_8004_pass2); |
| 321 | } |
| 322 | |
David Brazdil | 0f672f6 | 2019-12-10 10:32:29 +0000 | [diff] [blame^] | 323 | /* Information can be coded on a variable number of bits but never less than 8 */ |
Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 324 | shift = 8 - pdec->nbits; |
| 325 | pdec->scalebits = SCALEBITS - shift; |
| 326 | pdec->nbitsmask = 0xFF >> shift; |
| 327 | |
| 328 | fill_table_dc00_d800(pdec); |
| 329 | build_subblock_pattern(pdec); |
| 330 | build_bit_powermask_table(pdec); |
| 331 | |
| 332 | #if USE_LOOKUP_TABLE_TO_CLAMP |
| 333 | /* Build the static table to clamp value [0-255] */ |
| 334 | for (i=0;i<MAX_OUTER_CROP_VALUE;i++) |
| 335 | pwc_crop_table[i] = 0; |
| 336 | for (i=0; i<256; i++) |
| 337 | pwc_crop_table[MAX_OUTER_CROP_VALUE+i] = i; |
| 338 | for (i=0; i<MAX_OUTER_CROP_VALUE; i++) |
| 339 | pwc_crop_table[MAX_OUTER_CROP_VALUE+256+i] = 255; |
| 340 | #endif |
| 341 | |
| 342 | pdec->last_cmd = cmd[2]; |
| 343 | pdec->last_cmd_valid = 1; |
| 344 | } |
| 345 | |
| 346 | /* |
| 347 | * Copy the 4x4 image block to Y plane buffer |
| 348 | */ |
| 349 | static void copy_image_block_Y(const int *src, unsigned char *dst, unsigned int bytes_per_line, unsigned int scalebits) |
| 350 | { |
| 351 | #if UNROLL_LOOP_FOR_COPY |
| 352 | const unsigned char *cm = pwc_crop_table+MAX_OUTER_CROP_VALUE; |
| 353 | const int *c = src; |
| 354 | unsigned char *d = dst; |
| 355 | |
| 356 | *d++ = cm[c[0] >> scalebits]; |
| 357 | *d++ = cm[c[1] >> scalebits]; |
| 358 | *d++ = cm[c[2] >> scalebits]; |
| 359 | *d++ = cm[c[3] >> scalebits]; |
| 360 | |
| 361 | d = dst + bytes_per_line; |
| 362 | *d++ = cm[c[4] >> scalebits]; |
| 363 | *d++ = cm[c[5] >> scalebits]; |
| 364 | *d++ = cm[c[6] >> scalebits]; |
| 365 | *d++ = cm[c[7] >> scalebits]; |
| 366 | |
| 367 | d = dst + bytes_per_line*2; |
| 368 | *d++ = cm[c[8] >> scalebits]; |
| 369 | *d++ = cm[c[9] >> scalebits]; |
| 370 | *d++ = cm[c[10] >> scalebits]; |
| 371 | *d++ = cm[c[11] >> scalebits]; |
| 372 | |
| 373 | d = dst + bytes_per_line*3; |
| 374 | *d++ = cm[c[12] >> scalebits]; |
| 375 | *d++ = cm[c[13] >> scalebits]; |
| 376 | *d++ = cm[c[14] >> scalebits]; |
| 377 | *d++ = cm[c[15] >> scalebits]; |
| 378 | #else |
| 379 | int i; |
| 380 | const int *c = src; |
| 381 | unsigned char *d = dst; |
| 382 | for (i = 0; i < 4; i++, c++) |
| 383 | *d++ = CLAMP((*c) >> scalebits); |
| 384 | |
| 385 | d = dst + bytes_per_line; |
| 386 | for (i = 0; i < 4; i++, c++) |
| 387 | *d++ = CLAMP((*c) >> scalebits); |
| 388 | |
| 389 | d = dst + bytes_per_line*2; |
| 390 | for (i = 0; i < 4; i++, c++) |
| 391 | *d++ = CLAMP((*c) >> scalebits); |
| 392 | |
| 393 | d = dst + bytes_per_line*3; |
| 394 | for (i = 0; i < 4; i++, c++) |
| 395 | *d++ = CLAMP((*c) >> scalebits); |
| 396 | #endif |
| 397 | } |
| 398 | |
| 399 | /* |
| 400 | * Copy the 4x4 image block to a CrCb plane buffer |
| 401 | * |
| 402 | */ |
| 403 | static void copy_image_block_CrCb(const int *src, unsigned char *dst, unsigned int bytes_per_line, unsigned int scalebits) |
| 404 | { |
| 405 | #if UNROLL_LOOP_FOR_COPY |
| 406 | /* Unroll all loops */ |
| 407 | const unsigned char *cm = pwc_crop_table+MAX_OUTER_CROP_VALUE; |
| 408 | const int *c = src; |
| 409 | unsigned char *d = dst; |
| 410 | |
| 411 | *d++ = cm[c[0] >> scalebits]; |
| 412 | *d++ = cm[c[4] >> scalebits]; |
| 413 | *d++ = cm[c[1] >> scalebits]; |
| 414 | *d++ = cm[c[5] >> scalebits]; |
| 415 | *d++ = cm[c[2] >> scalebits]; |
| 416 | *d++ = cm[c[6] >> scalebits]; |
| 417 | *d++ = cm[c[3] >> scalebits]; |
| 418 | *d++ = cm[c[7] >> scalebits]; |
| 419 | |
| 420 | d = dst + bytes_per_line; |
| 421 | *d++ = cm[c[12] >> scalebits]; |
| 422 | *d++ = cm[c[8] >> scalebits]; |
| 423 | *d++ = cm[c[13] >> scalebits]; |
| 424 | *d++ = cm[c[9] >> scalebits]; |
| 425 | *d++ = cm[c[14] >> scalebits]; |
| 426 | *d++ = cm[c[10] >> scalebits]; |
| 427 | *d++ = cm[c[15] >> scalebits]; |
| 428 | *d++ = cm[c[11] >> scalebits]; |
| 429 | #else |
| 430 | int i; |
| 431 | const int *c1 = src; |
| 432 | const int *c2 = src + 4; |
| 433 | unsigned char *d = dst; |
| 434 | |
| 435 | for (i = 0; i < 4; i++, c1++, c2++) { |
| 436 | *d++ = CLAMP((*c1) >> scalebits); |
| 437 | *d++ = CLAMP((*c2) >> scalebits); |
| 438 | } |
| 439 | c1 = src + 12; |
| 440 | d = dst + bytes_per_line; |
| 441 | for (i = 0; i < 4; i++, c1++, c2++) { |
| 442 | *d++ = CLAMP((*c1) >> scalebits); |
| 443 | *d++ = CLAMP((*c2) >> scalebits); |
| 444 | } |
| 445 | #endif |
| 446 | } |
| 447 | |
| 448 | /* |
| 449 | * To manage the stream, we keep bits in a 32 bits register. |
| 450 | * fill_nbits(n): fill the reservoir with at least n bits |
| 451 | * skip_bits(n): discard n bits from the reservoir |
| 452 | * get_bits(n): fill the reservoir, returns the first n bits and discard the |
| 453 | * bits from the reservoir. |
| 454 | * __get_nbits(n): faster version of get_bits(n), but asumes that the reservoir |
| 455 | * contains at least n bits. bits returned is discarded. |
| 456 | */ |
| 457 | #define fill_nbits(pdec, nbits_wanted) do { \ |
| 458 | while (pdec->nbits_in_reservoir<(nbits_wanted)) \ |
| 459 | { \ |
| 460 | pdec->reservoir |= (*(pdec->stream)++) << (pdec->nbits_in_reservoir); \ |
| 461 | pdec->nbits_in_reservoir += 8; \ |
| 462 | } \ |
| 463 | } while(0); |
| 464 | |
| 465 | #define skip_nbits(pdec, nbits_to_skip) do { \ |
| 466 | pdec->reservoir >>= (nbits_to_skip); \ |
| 467 | pdec->nbits_in_reservoir -= (nbits_to_skip); \ |
| 468 | } while(0); |
| 469 | |
| 470 | #define get_nbits(pdec, nbits_wanted, result) do { \ |
| 471 | fill_nbits(pdec, nbits_wanted); \ |
| 472 | result = (pdec->reservoir) & ((1U<<(nbits_wanted))-1); \ |
| 473 | skip_nbits(pdec, nbits_wanted); \ |
| 474 | } while(0); |
| 475 | |
| 476 | #define __get_nbits(pdec, nbits_wanted, result) do { \ |
| 477 | result = (pdec->reservoir) & ((1U<<(nbits_wanted))-1); \ |
| 478 | skip_nbits(pdec, nbits_wanted); \ |
| 479 | } while(0); |
| 480 | |
| 481 | #define look_nbits(pdec, nbits_wanted) \ |
| 482 | ((pdec->reservoir) & ((1U<<(nbits_wanted))-1)) |
| 483 | |
| 484 | /* |
| 485 | * Decode a 4x4 pixel block |
| 486 | */ |
| 487 | static void decode_block(struct pwc_dec23_private *pdec, |
| 488 | const unsigned char *ptable0004, |
| 489 | const unsigned char *ptable8004) |
| 490 | { |
| 491 | unsigned int primary_color; |
| 492 | unsigned int channel_v, offset1, op; |
| 493 | int i; |
| 494 | |
| 495 | fill_nbits(pdec, 16); |
| 496 | __get_nbits(pdec, pdec->nbits, primary_color); |
| 497 | |
| 498 | if (look_nbits(pdec,2) == 0) { |
| 499 | skip_nbits(pdec, 2); |
| 500 | /* Very simple, the color is the same for all pixels of the square */ |
| 501 | for (i = 0; i < 16; i++) |
| 502 | pdec->temp_colors[i] = pdec->table_dc00[primary_color]; |
| 503 | |
| 504 | return; |
| 505 | } |
| 506 | |
| 507 | /* This block is encoded with small pattern */ |
| 508 | for (i = 0; i < 16; i++) |
| 509 | pdec->temp_colors[i] = pdec->table_d800[primary_color]; |
| 510 | |
| 511 | __get_nbits(pdec, 3, channel_v); |
| 512 | channel_v = ((channel_v & 1) << 2) | (channel_v & 2) | ((channel_v & 4) >> 2); |
| 513 | |
| 514 | ptable0004 += (channel_v * 128); |
| 515 | ptable8004 += (channel_v * 32); |
| 516 | |
| 517 | offset1 = 0; |
| 518 | do |
| 519 | { |
| 520 | unsigned int htable_idx, rows = 0; |
| 521 | const unsigned int *block; |
| 522 | |
| 523 | /* [ zzzz y x x ] |
| 524 | * xx == 00 :=> end of the block def, remove the two bits from the stream |
| 525 | * yxx == 111 |
| 526 | * yxx == any other value |
| 527 | * |
| 528 | */ |
| 529 | fill_nbits(pdec, 16); |
| 530 | htable_idx = look_nbits(pdec, 6); |
| 531 | op = hash_table_ops[htable_idx * 4]; |
| 532 | |
| 533 | if (op == 2) { |
| 534 | skip_nbits(pdec, 2); |
| 535 | |
| 536 | } else if (op == 1) { |
| 537 | /* 15bits [ xxxx xxxx yyyy 111 ] |
| 538 | * yyy => offset in the table8004 |
| 539 | * xxx => offset in the tabled004 (tree) |
| 540 | */ |
| 541 | unsigned int mask, shift; |
| 542 | unsigned int nbits, col1; |
| 543 | unsigned int yyyy; |
| 544 | |
| 545 | skip_nbits(pdec, 3); |
| 546 | /* offset1 += yyyy */ |
| 547 | __get_nbits(pdec, 4, yyyy); |
| 548 | offset1 += 1 + yyyy; |
| 549 | offset1 &= 0x0F; |
| 550 | nbits = ptable8004[offset1 * 2]; |
| 551 | |
| 552 | /* col1 = xxxx xxxx */ |
| 553 | __get_nbits(pdec, nbits+1, col1); |
| 554 | |
| 555 | /* Bit mask table */ |
| 556 | mask = pdec->table_bitpowermask[nbits][col1]; |
| 557 | shift = ptable8004[offset1 * 2 + 1]; |
| 558 | rows = ((mask << shift) + 0x80) & 0xFF; |
| 559 | |
| 560 | block = pdec->table_subblock[rows]; |
| 561 | for (i = 0; i < 16; i++) |
| 562 | pdec->temp_colors[i] += block[MulIdx[offset1][i]]; |
| 563 | |
| 564 | } else { |
| 565 | /* op == 0 |
| 566 | * offset1 is coded on 3 bits |
| 567 | */ |
| 568 | unsigned int shift; |
| 569 | |
| 570 | offset1 += hash_table_ops [htable_idx * 4 + 2]; |
| 571 | offset1 &= 0x0F; |
| 572 | |
| 573 | rows = ptable0004[offset1 + hash_table_ops [htable_idx * 4 + 3]]; |
| 574 | block = pdec->table_subblock[rows]; |
| 575 | for (i = 0; i < 16; i++) |
| 576 | pdec->temp_colors[i] += block[MulIdx[offset1][i]]; |
| 577 | |
| 578 | shift = hash_table_ops[htable_idx * 4 + 1]; |
| 579 | skip_nbits(pdec, shift); |
| 580 | } |
| 581 | |
| 582 | } while (op != 2); |
| 583 | |
| 584 | } |
| 585 | |
| 586 | static void DecompressBand23(struct pwc_dec23_private *pdec, |
| 587 | const unsigned char *rawyuv, |
| 588 | unsigned char *planar_y, |
| 589 | unsigned char *planar_u, |
| 590 | unsigned char *planar_v, |
| 591 | unsigned int compressed_image_width, |
| 592 | unsigned int real_image_width) |
| 593 | { |
| 594 | int compression_index, nblocks; |
| 595 | const unsigned char *ptable0004; |
| 596 | const unsigned char *ptable8004; |
| 597 | |
| 598 | pdec->reservoir = 0; |
| 599 | pdec->nbits_in_reservoir = 0; |
| 600 | pdec->stream = rawyuv + 1; /* The first byte of the stream is skipped */ |
| 601 | |
| 602 | get_nbits(pdec, 4, compression_index); |
| 603 | |
| 604 | /* pass 1: uncompress Y component */ |
| 605 | nblocks = compressed_image_width / 4; |
| 606 | |
| 607 | ptable0004 = pdec->table_0004_pass1[compression_index]; |
| 608 | ptable8004 = pdec->table_8004_pass1[compression_index]; |
| 609 | |
| 610 | /* Each block decode a square of 4x4 */ |
| 611 | while (nblocks) { |
| 612 | decode_block(pdec, ptable0004, ptable8004); |
| 613 | copy_image_block_Y(pdec->temp_colors, planar_y, real_image_width, pdec->scalebits); |
| 614 | planar_y += 4; |
| 615 | nblocks--; |
| 616 | } |
| 617 | |
| 618 | /* pass 2: uncompress UV component */ |
| 619 | nblocks = compressed_image_width / 8; |
| 620 | |
| 621 | ptable0004 = pdec->table_0004_pass2[compression_index]; |
| 622 | ptable8004 = pdec->table_8004_pass2[compression_index]; |
| 623 | |
| 624 | /* Each block decode a square of 4x4 */ |
| 625 | while (nblocks) { |
| 626 | decode_block(pdec, ptable0004, ptable8004); |
| 627 | copy_image_block_CrCb(pdec->temp_colors, planar_u, real_image_width/2, pdec->scalebits); |
| 628 | |
| 629 | decode_block(pdec, ptable0004, ptable8004); |
| 630 | copy_image_block_CrCb(pdec->temp_colors, planar_v, real_image_width/2, pdec->scalebits); |
| 631 | |
| 632 | planar_v += 8; |
| 633 | planar_u += 8; |
| 634 | nblocks -= 2; |
| 635 | } |
| 636 | |
| 637 | } |
| 638 | |
| 639 | /** |
| 640 | * Uncompress a pwc23 buffer. |
| 641 | * @pdev: pointer to pwc device's internal struct |
| 642 | * @src: raw data |
| 643 | * @dst: image output |
| 644 | */ |
| 645 | void pwc_dec23_decompress(struct pwc_device *pdev, |
| 646 | const void *src, |
| 647 | void *dst) |
| 648 | { |
| 649 | int bandlines_left, bytes_per_block; |
| 650 | struct pwc_dec23_private *pdec = &pdev->dec23; |
| 651 | |
| 652 | /* YUV420P image format */ |
| 653 | unsigned char *pout_planar_y; |
| 654 | unsigned char *pout_planar_u; |
| 655 | unsigned char *pout_planar_v; |
| 656 | unsigned int plane_size; |
| 657 | |
| 658 | mutex_lock(&pdec->lock); |
| 659 | |
| 660 | bandlines_left = pdev->height / 4; |
| 661 | bytes_per_block = pdev->width * 4; |
| 662 | plane_size = pdev->height * pdev->width; |
| 663 | |
| 664 | pout_planar_y = dst; |
| 665 | pout_planar_u = dst + plane_size; |
| 666 | pout_planar_v = dst + plane_size + plane_size / 4; |
| 667 | |
| 668 | while (bandlines_left--) { |
| 669 | DecompressBand23(pdec, src, |
| 670 | pout_planar_y, pout_planar_u, pout_planar_v, |
| 671 | pdev->width, pdev->width); |
| 672 | src += pdev->vbandlength; |
| 673 | pout_planar_y += bytes_per_block; |
| 674 | pout_planar_u += pdev->width; |
| 675 | pout_planar_v += pdev->width; |
| 676 | } |
| 677 | mutex_unlock(&pdec->lock); |
| 678 | } |