Andrew Scull | b4b6d4a | 2019-01-02 15:54:55 +0000 | [diff] [blame] | 1 | /* |
| 2 | * sun4i-ss-hash.c - hardware cryptographic accelerator for Allwinner A20 SoC |
| 3 | * |
| 4 | * Copyright (C) 2013-2015 Corentin LABBE <clabbe.montjoie@gmail.com> |
| 5 | * |
| 6 | * This file add support for MD5 and SHA1. |
| 7 | * |
| 8 | * You could find the datasheet in Documentation/arm/sunxi/README |
| 9 | * |
| 10 | * This program is free software; you can redistribute it and/or modify |
| 11 | * it under the terms of the GNU General Public License as published by |
| 12 | * the Free Software Foundation; either version 2 of the License, or |
| 13 | * (at your option) any later version. |
| 14 | */ |
| 15 | #include "sun4i-ss.h" |
| 16 | #include <linux/scatterlist.h> |
| 17 | |
| 18 | /* This is a totally arbitrary value */ |
| 19 | #define SS_TIMEOUT 100 |
| 20 | |
| 21 | int sun4i_hash_crainit(struct crypto_tfm *tfm) |
| 22 | { |
| 23 | struct sun4i_tfm_ctx *op = crypto_tfm_ctx(tfm); |
| 24 | struct ahash_alg *alg = __crypto_ahash_alg(tfm->__crt_alg); |
| 25 | struct sun4i_ss_alg_template *algt; |
| 26 | |
| 27 | memset(op, 0, sizeof(struct sun4i_tfm_ctx)); |
| 28 | |
| 29 | algt = container_of(alg, struct sun4i_ss_alg_template, alg.hash); |
| 30 | op->ss = algt->ss; |
| 31 | |
| 32 | crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), |
| 33 | sizeof(struct sun4i_req_ctx)); |
| 34 | return 0; |
| 35 | } |
| 36 | |
| 37 | /* sun4i_hash_init: initialize request context */ |
| 38 | int sun4i_hash_init(struct ahash_request *areq) |
| 39 | { |
| 40 | struct sun4i_req_ctx *op = ahash_request_ctx(areq); |
| 41 | struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq); |
| 42 | struct ahash_alg *alg = __crypto_ahash_alg(tfm->base.__crt_alg); |
| 43 | struct sun4i_ss_alg_template *algt; |
| 44 | |
| 45 | memset(op, 0, sizeof(struct sun4i_req_ctx)); |
| 46 | |
| 47 | algt = container_of(alg, struct sun4i_ss_alg_template, alg.hash); |
| 48 | op->mode = algt->mode; |
| 49 | |
| 50 | return 0; |
| 51 | } |
| 52 | |
| 53 | int sun4i_hash_export_md5(struct ahash_request *areq, void *out) |
| 54 | { |
| 55 | struct sun4i_req_ctx *op = ahash_request_ctx(areq); |
| 56 | struct md5_state *octx = out; |
| 57 | int i; |
| 58 | |
| 59 | octx->byte_count = op->byte_count + op->len; |
| 60 | |
| 61 | memcpy(octx->block, op->buf, op->len); |
| 62 | |
| 63 | if (op->byte_count) { |
| 64 | for (i = 0; i < 4; i++) |
| 65 | octx->hash[i] = op->hash[i]; |
| 66 | } else { |
| 67 | octx->hash[0] = SHA1_H0; |
| 68 | octx->hash[1] = SHA1_H1; |
| 69 | octx->hash[2] = SHA1_H2; |
| 70 | octx->hash[3] = SHA1_H3; |
| 71 | } |
| 72 | |
| 73 | return 0; |
| 74 | } |
| 75 | |
| 76 | int sun4i_hash_import_md5(struct ahash_request *areq, const void *in) |
| 77 | { |
| 78 | struct sun4i_req_ctx *op = ahash_request_ctx(areq); |
| 79 | const struct md5_state *ictx = in; |
| 80 | int i; |
| 81 | |
| 82 | sun4i_hash_init(areq); |
| 83 | |
| 84 | op->byte_count = ictx->byte_count & ~0x3F; |
| 85 | op->len = ictx->byte_count & 0x3F; |
| 86 | |
| 87 | memcpy(op->buf, ictx->block, op->len); |
| 88 | |
| 89 | for (i = 0; i < 4; i++) |
| 90 | op->hash[i] = ictx->hash[i]; |
| 91 | |
| 92 | return 0; |
| 93 | } |
| 94 | |
| 95 | int sun4i_hash_export_sha1(struct ahash_request *areq, void *out) |
| 96 | { |
| 97 | struct sun4i_req_ctx *op = ahash_request_ctx(areq); |
| 98 | struct sha1_state *octx = out; |
| 99 | int i; |
| 100 | |
| 101 | octx->count = op->byte_count + op->len; |
| 102 | |
| 103 | memcpy(octx->buffer, op->buf, op->len); |
| 104 | |
| 105 | if (op->byte_count) { |
| 106 | for (i = 0; i < 5; i++) |
| 107 | octx->state[i] = op->hash[i]; |
| 108 | } else { |
| 109 | octx->state[0] = SHA1_H0; |
| 110 | octx->state[1] = SHA1_H1; |
| 111 | octx->state[2] = SHA1_H2; |
| 112 | octx->state[3] = SHA1_H3; |
| 113 | octx->state[4] = SHA1_H4; |
| 114 | } |
| 115 | |
| 116 | return 0; |
| 117 | } |
| 118 | |
| 119 | int sun4i_hash_import_sha1(struct ahash_request *areq, const void *in) |
| 120 | { |
| 121 | struct sun4i_req_ctx *op = ahash_request_ctx(areq); |
| 122 | const struct sha1_state *ictx = in; |
| 123 | int i; |
| 124 | |
| 125 | sun4i_hash_init(areq); |
| 126 | |
| 127 | op->byte_count = ictx->count & ~0x3F; |
| 128 | op->len = ictx->count & 0x3F; |
| 129 | |
| 130 | memcpy(op->buf, ictx->buffer, op->len); |
| 131 | |
| 132 | for (i = 0; i < 5; i++) |
| 133 | op->hash[i] = ictx->state[i]; |
| 134 | |
| 135 | return 0; |
| 136 | } |
| 137 | |
| 138 | #define SS_HASH_UPDATE 1 |
| 139 | #define SS_HASH_FINAL 2 |
| 140 | |
| 141 | /* |
| 142 | * sun4i_hash_update: update hash engine |
| 143 | * |
| 144 | * Could be used for both SHA1 and MD5 |
| 145 | * Write data by step of 32bits and put then in the SS. |
| 146 | * |
| 147 | * Since we cannot leave partial data and hash state in the engine, |
| 148 | * we need to get the hash state at the end of this function. |
| 149 | * We can get the hash state every 64 bytes |
| 150 | * |
| 151 | * So the first work is to get the number of bytes to write to SS modulo 64 |
| 152 | * The extra bytes will go to a temporary buffer op->buf storing op->len bytes |
| 153 | * |
| 154 | * So at the begin of update() |
| 155 | * if op->len + areq->nbytes < 64 |
| 156 | * => all data will be written to wait buffer (op->buf) and end=0 |
| 157 | * if not, write all data from op->buf to the device and position end to |
| 158 | * complete to 64bytes |
| 159 | * |
| 160 | * example 1: |
| 161 | * update1 60o => op->len=60 |
| 162 | * update2 60o => need one more word to have 64 bytes |
| 163 | * end=4 |
| 164 | * so write all data from op->buf and one word of SGs |
| 165 | * write remaining data in op->buf |
| 166 | * final state op->len=56 |
| 167 | */ |
| 168 | static int sun4i_hash(struct ahash_request *areq) |
| 169 | { |
| 170 | /* |
| 171 | * i is the total bytes read from SGs, to be compared to areq->nbytes |
| 172 | * i is important because we cannot rely on SG length since the sum of |
| 173 | * SG->length could be greater than areq->nbytes |
| 174 | * |
| 175 | * end is the position when we need to stop writing to the device, |
| 176 | * to be compared to i |
| 177 | * |
| 178 | * in_i: advancement in the current SG |
| 179 | */ |
| 180 | unsigned int i = 0, end, fill, min_fill, nwait, nbw = 0, j = 0, todo; |
| 181 | unsigned int in_i = 0; |
| 182 | u32 spaces, rx_cnt = SS_RX_DEFAULT, bf[32] = {0}, wb = 0, v, ivmode = 0; |
| 183 | struct sun4i_req_ctx *op = ahash_request_ctx(areq); |
| 184 | struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq); |
| 185 | struct sun4i_tfm_ctx *tfmctx = crypto_ahash_ctx(tfm); |
| 186 | struct sun4i_ss_ctx *ss = tfmctx->ss; |
| 187 | struct scatterlist *in_sg = areq->src; |
| 188 | struct sg_mapping_iter mi; |
| 189 | int in_r, err = 0; |
| 190 | size_t copied = 0; |
| 191 | |
| 192 | dev_dbg(ss->dev, "%s %s bc=%llu len=%u mode=%x wl=%u h0=%0x", |
| 193 | __func__, crypto_tfm_alg_name(areq->base.tfm), |
| 194 | op->byte_count, areq->nbytes, op->mode, |
| 195 | op->len, op->hash[0]); |
| 196 | |
| 197 | if (unlikely(!areq->nbytes) && !(op->flags & SS_HASH_FINAL)) |
| 198 | return 0; |
| 199 | |
| 200 | /* protect against overflow */ |
| 201 | if (unlikely(areq->nbytes > UINT_MAX - op->len)) { |
| 202 | dev_err(ss->dev, "Cannot process too large request\n"); |
| 203 | return -EINVAL; |
| 204 | } |
| 205 | |
| 206 | if (op->len + areq->nbytes < 64 && !(op->flags & SS_HASH_FINAL)) { |
| 207 | /* linearize data to op->buf */ |
| 208 | copied = sg_pcopy_to_buffer(areq->src, sg_nents(areq->src), |
| 209 | op->buf + op->len, areq->nbytes, 0); |
| 210 | op->len += copied; |
| 211 | return 0; |
| 212 | } |
| 213 | |
| 214 | spin_lock_bh(&ss->slock); |
| 215 | |
| 216 | /* |
| 217 | * if some data have been processed before, |
| 218 | * we need to restore the partial hash state |
| 219 | */ |
| 220 | if (op->byte_count) { |
| 221 | ivmode = SS_IV_ARBITRARY; |
| 222 | for (i = 0; i < 5; i++) |
| 223 | writel(op->hash[i], ss->base + SS_IV0 + i * 4); |
| 224 | } |
| 225 | /* Enable the device */ |
| 226 | writel(op->mode | SS_ENABLED | ivmode, ss->base + SS_CTL); |
| 227 | |
| 228 | if (!(op->flags & SS_HASH_UPDATE)) |
| 229 | goto hash_final; |
| 230 | |
| 231 | /* start of handling data */ |
| 232 | if (!(op->flags & SS_HASH_FINAL)) { |
| 233 | end = ((areq->nbytes + op->len) / 64) * 64 - op->len; |
| 234 | |
| 235 | if (end > areq->nbytes || areq->nbytes - end > 63) { |
| 236 | dev_err(ss->dev, "ERROR: Bound error %u %u\n", |
| 237 | end, areq->nbytes); |
| 238 | err = -EINVAL; |
| 239 | goto release_ss; |
| 240 | } |
| 241 | } else { |
| 242 | /* Since we have the flag final, we can go up to modulo 4 */ |
| 243 | end = ((areq->nbytes + op->len) / 4) * 4 - op->len; |
| 244 | } |
| 245 | |
| 246 | /* TODO if SGlen % 4 and !op->len then DMA */ |
| 247 | i = 1; |
| 248 | while (in_sg && i == 1) { |
| 249 | if (in_sg->length % 4) |
| 250 | i = 0; |
| 251 | in_sg = sg_next(in_sg); |
| 252 | } |
| 253 | if (i == 1 && !op->len && areq->nbytes) |
| 254 | dev_dbg(ss->dev, "We can DMA\n"); |
| 255 | |
| 256 | i = 0; |
| 257 | sg_miter_start(&mi, areq->src, sg_nents(areq->src), |
| 258 | SG_MITER_FROM_SG | SG_MITER_ATOMIC); |
| 259 | sg_miter_next(&mi); |
| 260 | in_i = 0; |
| 261 | |
| 262 | do { |
| 263 | /* |
| 264 | * we need to linearize in two case: |
| 265 | * - the buffer is already used |
| 266 | * - the SG does not have enough byte remaining ( < 4) |
| 267 | */ |
| 268 | if (op->len || (mi.length - in_i) < 4) { |
| 269 | /* |
| 270 | * if we have entered here we have two reason to stop |
| 271 | * - the buffer is full |
| 272 | * - reach the end |
| 273 | */ |
| 274 | while (op->len < 64 && i < end) { |
| 275 | /* how many bytes we can read from current SG */ |
| 276 | in_r = min3(mi.length - in_i, end - i, |
| 277 | 64 - op->len); |
| 278 | memcpy(op->buf + op->len, mi.addr + in_i, in_r); |
| 279 | op->len += in_r; |
| 280 | i += in_r; |
| 281 | in_i += in_r; |
| 282 | if (in_i == mi.length) { |
| 283 | sg_miter_next(&mi); |
| 284 | in_i = 0; |
| 285 | } |
| 286 | } |
| 287 | if (op->len > 3 && !(op->len % 4)) { |
| 288 | /* write buf to the device */ |
| 289 | writesl(ss->base + SS_RXFIFO, op->buf, |
| 290 | op->len / 4); |
| 291 | op->byte_count += op->len; |
| 292 | op->len = 0; |
| 293 | } |
| 294 | } |
| 295 | if (mi.length - in_i > 3 && i < end) { |
| 296 | /* how many bytes we can read from current SG */ |
| 297 | in_r = min3(mi.length - in_i, areq->nbytes - i, |
| 298 | ((mi.length - in_i) / 4) * 4); |
| 299 | /* how many bytes we can write in the device*/ |
| 300 | todo = min3((u32)(end - i) / 4, rx_cnt, (u32)in_r / 4); |
| 301 | writesl(ss->base + SS_RXFIFO, mi.addr + in_i, todo); |
| 302 | op->byte_count += todo * 4; |
| 303 | i += todo * 4; |
| 304 | in_i += todo * 4; |
| 305 | rx_cnt -= todo; |
| 306 | if (!rx_cnt) { |
| 307 | spaces = readl(ss->base + SS_FCSR); |
| 308 | rx_cnt = SS_RXFIFO_SPACES(spaces); |
| 309 | } |
| 310 | if (in_i == mi.length) { |
| 311 | sg_miter_next(&mi); |
| 312 | in_i = 0; |
| 313 | } |
| 314 | } |
| 315 | } while (i < end); |
| 316 | |
| 317 | /* |
| 318 | * Now we have written to the device all that we can, |
| 319 | * store the remaining bytes in op->buf |
| 320 | */ |
| 321 | if ((areq->nbytes - i) < 64) { |
| 322 | while (i < areq->nbytes && in_i < mi.length && op->len < 64) { |
| 323 | /* how many bytes we can read from current SG */ |
| 324 | in_r = min3(mi.length - in_i, areq->nbytes - i, |
| 325 | 64 - op->len); |
| 326 | memcpy(op->buf + op->len, mi.addr + in_i, in_r); |
| 327 | op->len += in_r; |
| 328 | i += in_r; |
| 329 | in_i += in_r; |
| 330 | if (in_i == mi.length) { |
| 331 | sg_miter_next(&mi); |
| 332 | in_i = 0; |
| 333 | } |
| 334 | } |
| 335 | } |
| 336 | |
| 337 | sg_miter_stop(&mi); |
| 338 | |
| 339 | /* |
| 340 | * End of data process |
| 341 | * Now if we have the flag final go to finalize part |
| 342 | * If not, store the partial hash |
| 343 | */ |
| 344 | if (op->flags & SS_HASH_FINAL) |
| 345 | goto hash_final; |
| 346 | |
| 347 | writel(op->mode | SS_ENABLED | SS_DATA_END, ss->base + SS_CTL); |
| 348 | i = 0; |
| 349 | do { |
| 350 | v = readl(ss->base + SS_CTL); |
| 351 | i++; |
| 352 | } while (i < SS_TIMEOUT && (v & SS_DATA_END)); |
| 353 | if (unlikely(i >= SS_TIMEOUT)) { |
| 354 | dev_err_ratelimited(ss->dev, |
| 355 | "ERROR: hash end timeout %d>%d ctl=%x len=%u\n", |
| 356 | i, SS_TIMEOUT, v, areq->nbytes); |
| 357 | err = -EIO; |
| 358 | goto release_ss; |
| 359 | } |
| 360 | |
| 361 | /* |
| 362 | * The datasheet isn't very clear about when to retrieve the digest. The |
| 363 | * bit SS_DATA_END is cleared when the engine has processed the data and |
| 364 | * when the digest is computed *but* it doesn't mean the digest is |
| 365 | * available in the digest registers. Hence the delay to be sure we can |
| 366 | * read it. |
| 367 | */ |
| 368 | ndelay(1); |
| 369 | |
| 370 | for (i = 0; i < crypto_ahash_digestsize(tfm) / 4; i++) |
| 371 | op->hash[i] = readl(ss->base + SS_MD0 + i * 4); |
| 372 | |
| 373 | goto release_ss; |
| 374 | |
| 375 | /* |
| 376 | * hash_final: finalize hashing operation |
| 377 | * |
| 378 | * If we have some remaining bytes, we write them. |
| 379 | * Then ask the SS for finalizing the hashing operation |
| 380 | * |
| 381 | * I do not check RX FIFO size in this function since the size is 32 |
| 382 | * after each enabling and this function neither write more than 32 words. |
| 383 | * If we come from the update part, we cannot have more than |
| 384 | * 3 remaining bytes to write and SS is fast enough to not care about it. |
| 385 | */ |
| 386 | |
| 387 | hash_final: |
| 388 | |
| 389 | /* write the remaining words of the wait buffer */ |
| 390 | if (op->len) { |
| 391 | nwait = op->len / 4; |
| 392 | if (nwait) { |
| 393 | writesl(ss->base + SS_RXFIFO, op->buf, nwait); |
| 394 | op->byte_count += 4 * nwait; |
| 395 | } |
| 396 | |
| 397 | nbw = op->len - 4 * nwait; |
| 398 | if (nbw) { |
| 399 | wb = *(u32 *)(op->buf + nwait * 4); |
| 400 | wb &= GENMASK((nbw * 8) - 1, 0); |
| 401 | |
| 402 | op->byte_count += nbw; |
| 403 | } |
| 404 | } |
| 405 | |
| 406 | /* write the remaining bytes of the nbw buffer */ |
| 407 | wb |= ((1 << 7) << (nbw * 8)); |
| 408 | bf[j++] = wb; |
| 409 | |
| 410 | /* |
| 411 | * number of space to pad to obtain 64o minus 8(size) minus 4 (final 1) |
| 412 | * I take the operations from other MD5/SHA1 implementations |
| 413 | */ |
| 414 | |
| 415 | /* last block size */ |
| 416 | fill = 64 - (op->byte_count % 64); |
| 417 | min_fill = 2 * sizeof(u32) + (nbw ? 0 : sizeof(u32)); |
| 418 | |
| 419 | /* if we can't fill all data, jump to the next 64 block */ |
| 420 | if (fill < min_fill) |
| 421 | fill += 64; |
| 422 | |
| 423 | j += (fill - min_fill) / sizeof(u32); |
| 424 | |
| 425 | /* write the length of data */ |
| 426 | if (op->mode == SS_OP_SHA1) { |
| 427 | __be64 bits = cpu_to_be64(op->byte_count << 3); |
| 428 | bf[j++] = lower_32_bits(bits); |
| 429 | bf[j++] = upper_32_bits(bits); |
| 430 | } else { |
| 431 | __le64 bits = op->byte_count << 3; |
| 432 | bf[j++] = lower_32_bits(bits); |
| 433 | bf[j++] = upper_32_bits(bits); |
| 434 | } |
| 435 | writesl(ss->base + SS_RXFIFO, bf, j); |
| 436 | |
| 437 | /* Tell the SS to stop the hashing */ |
| 438 | writel(op->mode | SS_ENABLED | SS_DATA_END, ss->base + SS_CTL); |
| 439 | |
| 440 | /* |
| 441 | * Wait for SS to finish the hash. |
| 442 | * The timeout could happen only in case of bad overclocking |
| 443 | * or driver bug. |
| 444 | */ |
| 445 | i = 0; |
| 446 | do { |
| 447 | v = readl(ss->base + SS_CTL); |
| 448 | i++; |
| 449 | } while (i < SS_TIMEOUT && (v & SS_DATA_END)); |
| 450 | if (unlikely(i >= SS_TIMEOUT)) { |
| 451 | dev_err_ratelimited(ss->dev, |
| 452 | "ERROR: hash end timeout %d>%d ctl=%x len=%u\n", |
| 453 | i, SS_TIMEOUT, v, areq->nbytes); |
| 454 | err = -EIO; |
| 455 | goto release_ss; |
| 456 | } |
| 457 | |
| 458 | /* |
| 459 | * The datasheet isn't very clear about when to retrieve the digest. The |
| 460 | * bit SS_DATA_END is cleared when the engine has processed the data and |
| 461 | * when the digest is computed *but* it doesn't mean the digest is |
| 462 | * available in the digest registers. Hence the delay to be sure we can |
| 463 | * read it. |
| 464 | */ |
| 465 | ndelay(1); |
| 466 | |
| 467 | /* Get the hash from the device */ |
| 468 | if (op->mode == SS_OP_SHA1) { |
| 469 | for (i = 0; i < 5; i++) { |
| 470 | v = cpu_to_be32(readl(ss->base + SS_MD0 + i * 4)); |
| 471 | memcpy(areq->result + i * 4, &v, 4); |
| 472 | } |
| 473 | } else { |
| 474 | for (i = 0; i < 4; i++) { |
| 475 | v = readl(ss->base + SS_MD0 + i * 4); |
| 476 | memcpy(areq->result + i * 4, &v, 4); |
| 477 | } |
| 478 | } |
| 479 | |
| 480 | release_ss: |
| 481 | writel(0, ss->base + SS_CTL); |
| 482 | spin_unlock_bh(&ss->slock); |
| 483 | return err; |
| 484 | } |
| 485 | |
| 486 | int sun4i_hash_final(struct ahash_request *areq) |
| 487 | { |
| 488 | struct sun4i_req_ctx *op = ahash_request_ctx(areq); |
| 489 | |
| 490 | op->flags = SS_HASH_FINAL; |
| 491 | return sun4i_hash(areq); |
| 492 | } |
| 493 | |
| 494 | int sun4i_hash_update(struct ahash_request *areq) |
| 495 | { |
| 496 | struct sun4i_req_ctx *op = ahash_request_ctx(areq); |
| 497 | |
| 498 | op->flags = SS_HASH_UPDATE; |
| 499 | return sun4i_hash(areq); |
| 500 | } |
| 501 | |
| 502 | /* sun4i_hash_finup: finalize hashing operation after an update */ |
| 503 | int sun4i_hash_finup(struct ahash_request *areq) |
| 504 | { |
| 505 | struct sun4i_req_ctx *op = ahash_request_ctx(areq); |
| 506 | |
| 507 | op->flags = SS_HASH_UPDATE | SS_HASH_FINAL; |
| 508 | return sun4i_hash(areq); |
| 509 | } |
| 510 | |
| 511 | /* combo of init/update/final functions */ |
| 512 | int sun4i_hash_digest(struct ahash_request *areq) |
| 513 | { |
| 514 | int err; |
| 515 | struct sun4i_req_ctx *op = ahash_request_ctx(areq); |
| 516 | |
| 517 | err = sun4i_hash_init(areq); |
| 518 | if (err) |
| 519 | return err; |
| 520 | |
| 521 | op->flags = SS_HASH_UPDATE | SS_HASH_FINAL; |
| 522 | return sun4i_hash(areq); |
| 523 | } |