Fix wrong output in the benchmark application

The benchmark application prints the performance in Kb/s,
While it actually calculates KB/s.
Resolves issue #850
diff --git a/programs/test/benchmark.c b/programs/test/benchmark.c
index eb578e7..d88bc57 100644
--- a/programs/test/benchmark.c
+++ b/programs/test/benchmark.c
@@ -126,7 +126,7 @@
         CODE;                                                           \
     }                                                                   \
                                                                         \
-    mbedtls_printf( "%9lu Kb/s,  %9lu cycles/byte\n",                   \
+    mbedtls_printf( "%9lu KB/s,  %9lu cycles/byte\n",                   \
                      ii * BUFSIZE / 1024,                               \
                      ( mbedtls_timing_hardclock() - tsc ) / ( jj * BUFSIZE ) );         \
 } while( 0 )
diff --git a/yotta/data/example-benchmark/README.md b/yotta/data/example-benchmark/README.md
index 8589e7b..8397f5e 100644
--- a/yotta/data/example-benchmark/README.md
+++ b/yotta/data/example-benchmark/README.md
@@ -56,24 +56,24 @@
     {{start}}
 
 
-      SHA-1                    :       3644 Kb/s,         32 cycles/byte
-      SHA-256                  :       1957 Kb/s,         59 cycles/byte
-      SHA-512                  :        587 Kb/s,        200 cycles/byte
-      AES-CBC-128              :       1359 Kb/s,         86 cycles/byte
-      AES-CBC-192              :       1183 Kb/s,         99 cycles/byte
-      AES-CBC-256              :       1048 Kb/s,        111 cycles/byte
-      AES-GCM-128              :        421 Kb/s,        279 cycles/byte
-      AES-GCM-192              :        403 Kb/s,        292 cycles/byte
-      AES-GCM-256              :        385 Kb/s,        305 cycles/byte
-      AES-CCM-128              :        542 Kb/s,        216 cycles/byte
-      AES-CCM-192              :        484 Kb/s,        242 cycles/byte
-      AES-CCM-256              :        437 Kb/s,        268 cycles/byte
-      CTR_DRBG (NOPR)          :       1002 Kb/s,        117 cycles/byte
-      CTR_DRBG (PR)            :        705 Kb/s,        166 cycles/byte
-      HMAC_DRBG SHA-1 (NOPR)   :        228 Kb/s,        517 cycles/byte
-      HMAC_DRBG SHA-1 (PR)     :        210 Kb/s,        561 cycles/byte
-      HMAC_DRBG SHA-256 (NOPR) :        212 Kb/s,        557 cycles/byte
-      HMAC_DRBG SHA-256 (PR)   :        185 Kb/s,        637 cycles/byte
+      SHA-1                    :       3644 KB/s,         32 cycles/byte
+      SHA-256                  :       1957 KB/s,         59 cycles/byte
+      SHA-512                  :        587 KB/s,        200 cycles/byte
+      AES-CBC-128              :       1359 KB/s,         86 cycles/byte
+      AES-CBC-192              :       1183 KB/s,         99 cycles/byte
+      AES-CBC-256              :       1048 KB/s,        111 cycles/byte
+      AES-GCM-128              :        421 KB/s,        279 cycles/byte
+      AES-GCM-192              :        403 KB/s,        292 cycles/byte
+      AES-GCM-256              :        385 KB/s,        305 cycles/byte
+      AES-CCM-128              :        542 KB/s,        216 cycles/byte
+      AES-CCM-192              :        484 KB/s,        242 cycles/byte
+      AES-CCM-256              :        437 KB/s,        268 cycles/byte
+      CTR_DRBG (NOPR)          :       1002 KB/s,        117 cycles/byte
+      CTR_DRBG (PR)            :        705 KB/s,        166 cycles/byte
+      HMAC_DRBG SHA-1 (NOPR)   :        228 KB/s,        517 cycles/byte
+      HMAC_DRBG SHA-1 (PR)     :        210 KB/s,        561 cycles/byte
+      HMAC_DRBG SHA-256 (NOPR) :        212 KB/s,        557 cycles/byte
+      HMAC_DRBG SHA-256 (PR)   :        185 KB/s,        637 cycles/byte
       RSA-2048                 :      41 ms/ public
       RSA-2048                 :    1349 ms/private
       RSA-4096                 :     134 ms/ public
diff --git a/yotta/data/example-benchmark/main.cpp b/yotta/data/example-benchmark/main.cpp
index ef38c44..36cfc0e 100644
--- a/yotta/data/example-benchmark/main.cpp
+++ b/yotta/data/example-benchmark/main.cpp
@@ -229,7 +229,7 @@
         CODE;                                                           \
     }                                                                   \
                                                                         \
-    mbedtls_printf( "%9lu Kb/s,  %9lu cycles/byte\r\n",                  \
+    mbedtls_printf( "%9lu KB/s,  %9lu cycles/byte\r\n",                  \
                      i * BUFSIZE / 1024,                                \
                      ( mbedtls_timing_hardclock() - tsc ) / ( j * BUFSIZE ) );         \
 } while( 0 )