/* * The MIT License (MIT) * * Copyright (c) 2016 Mikkel F. Jørgensen, dvide.com * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * * Fast printing of (u)int8/16/32/64_t, (u)int, (u)long. * * Functions take for the * * int print_(type value, char *buf); * * and returns number of characters printed, excluding trailing '\0' * which is also printed. Prints at most 21 characters including zero- * termination. * * The function `print_bool` is a bit different - it simply prints "true\0" for * non-zero integers, and "false\0" otherwise. * * The general algorithm is in-place formatting using binary search log10 * followed by duff device loop unrolling div / 100 stages. * * The simpler post copy algorithm also provided for fmt_(u)int uses a * temp buffer and loops over div/100 and post copy to target buffer. * * * Benchmarks on core-i7, 2.2GHz, 64-bit clang/OS-X -O2: * * print_int64: avg 15ns for values between INT64_MIN + (10^7/2 .. 10^7/2) * print_int64: avg 11ns for values between 10^9 + (0..10,000,000). * print_int32: avg 7ns for values cast from INT64_MIN + (10^7/2 .. 10^7/2) * print_int32: avg 7ns for values between 10^9 + (0..10,000,000). * print_int64: avg 13ns for values between 10^16 + (0..10,000,000). * print_int64: avg 5ns for values between 0 and 10,000,000. * print_int32: avg 5ns for values between 0 and 10,000,000. * print_int16: avg 10ns for values cast from 0 and 10,000,000. * print_int8: avg 4ns for values cast from 0 and 10,000,000. * * Post copy algorithm: * print_int: avg 12ns for values between INT64_MIN + (10^7/2 .. 10^7/2) * print_int: avg 14ns for values between 10^9 + (0..10,000,000). * print_long: avg 29ns for values between INT64_MIN + (10^7/2 .. 10^7/2) * * The post copy algorithm is nearly half as fast as the in-place * algorithm, but can also be faster occasionally - possibly because the * optimizer being able to skip the copy step. */ #ifndef PPRINTINT_H #define PPRINTINT_H #ifdef __cplusplus extern "C" { #endif #ifndef UINT8_MAX #include #endif #include "pattributes.h" /* fallthrough */ #define PDIAGNOSTIC_IGNORE_UNUSED_FUNCTION #include "pdiagnostic_push.h" static int print_bool(int n, char *p); static int print_uint8(uint8_t n, char *p); static int print_uint16(uint16_t n, char *p); static int print_uint32(uint32_t n, char *p); static int print_uint64(uint64_t n, char *p); static int print_int8(int8_t n, char *p); static int print_int16(int16_t n, char *p); static int print_int32(int32_t n, char *p); static int print_int64(int64_t n, char *p); /* * Uses slightly slower, but more compact alogrithm * that is not hardcoded to implementation size. * Other types may be defined using macros below. */ static int print_ulong(unsigned long n, char *p); static int print_uint(unsigned int n, char *p); static int print_int(int n, char *p); static int print_long(long n, char *p); #if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64) #define __print_unaligned_copy_16(p, q) (*(uint16_t*)(p) = *(uint16_t*)(q)) #else #define __print_unaligned_copy_16(p, q) \ ((((uint8_t*)(p))[0] = ((uint8_t*)(q))[0]), \ (((uint8_t*)(p))[1] = ((uint8_t*)(q))[1])) #endif static const char __print_digit_pairs[] = "0001020304050607080910111213141516171819" "2021222324252627282930313233343536373839" "4041424344454647484950515253545556575859" "6061626364656667686970717273747576777879" "8081828384858687888990919293949596979899"; #define __print_stage() \ p -= 2; \ dp = __print_digit_pairs + (n % 100) * 2; \ n /= 100; \ __print_unaligned_copy_16(p, dp); #define __print_long_stage() \ __print_stage() \ __print_stage() #define __print_short_stage() \ *--p = (n % 10) + '0'; \ n /= 10; static int print_bool(int n, char *buf) { if (n) { memcpy(buf, "true\0", 5); return 4; } else { memcpy(buf, "false\0", 6); return 5; } } static int print_uint8(uint8_t n, char *p) { const char *dp; if (n >= 100) { p += 3; *p = '\0'; __print_stage(); p[-1] = (char)n + '0'; return 3; } if (n >= 10) { p += 2; *p = '\0'; __print_stage(); return 2; } p[1] = '\0'; p[0] = (char)n + '0'; return 1; } static int print_uint16(uint16_t n, char *p) { int k = 0; const char *dp; if (n >= 1000) { if(n >= 10000) { k = 5; } else { k = 4; } } else { if(n >= 100) { k = 3; } else if(n >= 10) { k = 2; } else { k = 1; } } p += k; *p = '\0'; if (k & 1) { switch (k) { case 5: __print_stage(); pattribute(fallthrough); case 3: __print_stage(); pattribute(fallthrough); case 1: p[-1] = (char)n + '0'; } } else { switch (k) { case 4: __print_stage(); pattribute(fallthrough); case 2: __print_stage(); } } return k; } static int print_uint32(uint32_t n, char *p) { int k = 0; const char *dp; if(n >= 10000UL) { if(n >= 10000000UL) { if(n >= 1000000000UL) { k = 10; } else if(n >= 100000000UL) { k = 9; } else { k = 8; } } else { if(n >= 1000000UL) { k = 7; } else if(n >= 100000UL) { k = 6; } else { k = 5; } } } else { if(n >= 100UL) { if(n >= 1000UL) { k = 4; } else { k = 3; } } else { if(n >= 10UL) { k = 2; } else { k = 1UL; } } } p += k; *p = '\0'; if (k & 1) { switch (k) { case 9: __print_stage(); pattribute(fallthrough); case 7: __print_stage(); pattribute(fallthrough); case 5: __print_stage(); pattribute(fallthrough); case 3: __print_stage(); pattribute(fallthrough); case 1: p[-1] = (char)n + '0'; } } else { switch (k) { case 10: __print_stage(); pattribute(fallthrough); case 8: __print_stage(); pattribute(fallthrough); case 6: __print_stage(); pattribute(fallthrough); case 4: __print_stage(); pattribute(fallthrough); case 2: __print_stage(); } } return k; } static int print_uint64(uint64_t n, char *p) { int k = 0; const char *dp; const uint64_t x = 1000000000ULL; if (n < x) { return print_uint32((uint32_t)n, p); } if(n >= 10000ULL * x) { if(n >= 10000000ULL * x) { if(n >= 1000000000ULL * x) { if (n >= 10000000000ULL * x) { k = 11 + 9; } else { k = 10 + 9; } } else if(n >= 100000000ULL * x) { k = 9 + 9; } else { k = 8 + 9; } } else { if(n >= 1000000ULL * x) { k = 7 + 9; } else if(n >= 100000ULL * x) { k = 6 + 9; } else { k = 5 + 9; } } } else { if(n >= 100ULL * x) { if(n >= 1000ULL * x) { k = 4 + 9; } else { k = 3 + 9; } } else { if(n >= 10ULL * x) { k = 2 + 9; } else { k = 1 + 9; } } } p += k; *p = '\0'; if (k & 1) { switch (k) { case 19: __print_stage(); pattribute(fallthrough); case 17: __print_stage(); pattribute(fallthrough); case 15: __print_stage(); pattribute(fallthrough); case 13: __print_stage(); pattribute(fallthrough); case 11: __print_stage() __print_short_stage(); } } else { switch (k) { case 20: __print_stage(); pattribute(fallthrough); case 18: __print_stage(); pattribute(fallthrough); case 16: __print_stage(); pattribute(fallthrough); case 14: __print_stage(); pattribute(fallthrough); case 12: __print_stage(); pattribute(fallthrough); case 10: __print_stage(); } } __print_long_stage() __print_long_stage() return k; } static int print_int8(int8_t n, char *p) { int sign; if ((sign = n < 0)) { *p++ = '-'; n = -n; } return print_uint8((uint8_t)n, p) + sign; } static int print_int16(int16_t n, char *p) { int sign; if ((sign = n < 0)) { *p++ = '-'; n = -n; } return print_uint16((uint16_t)n, p) + sign; } static int print_int32(int32_t n, char *p) { int sign; if ((sign = n < 0)) { *p++ = '-'; n = -n; } return print_uint32((uint32_t)n, p) + sign; } static int print_int64(int64_t n, char *p) { int sign; if ((sign = n < 0)) { *p++ = '-'; n = -n; } return print_uint64((uint64_t)n, p) + sign; } #define __define_print_int_simple(NAME, UNAME, T, UT) \ static int UNAME(UT n, char *buf) \ { \ char tmp[20]; \ char* p = tmp + 20; \ char* q = p; \ unsigned int k, m; \ \ while (n >= 100) { \ p -= 2; \ m = (unsigned int)(n % 100) * 2; \ n /= 100; \ __print_unaligned_copy_16(p, __print_digit_pairs + m); \ } \ p -= 2; \ m = (unsigned int)n * 2; \ __print_unaligned_copy_16(p, __print_digit_pairs + m); \ if (n < 10) { \ ++p; \ } \ k = (unsigned int)(q - p); \ while (p != q) { \ *buf++ = *p++; \ } \ *buf = '\0'; \ return (int)k; \ } \ \ static int NAME(T n, char *buf) \ { \ int sign = n < 0; \ \ if (sign) { \ *buf++ = '-'; \ n = -n; \ } \ return UNAME((UT)n, buf) + sign; \ } __define_print_int_simple(print_int, print_uint, int, unsigned int) __define_print_int_simple(print_long, print_ulong, long, unsigned long) #ifdef PPRINTINT_BENCH int main() { int64_t count = 10000000; /* 10^7 */ #if 0 int64_t base = 0; int64_t base = 10000000000000000; /* 10^16 */ int64_t base = 1000000000; /* 10^9 */ #endif int64_t base = INT64_MIN - count/2; char buf[100]; int i, k = 0, n = 0; for (i = 0; i < count; i++) { k = print_int64(i + base, buf); n += buf[0] + buf[k - 1]; } return n; } /* Call with time on executable, multiply time in seconds by 100 to get time unit in ns/number. */ #endif /* PPRINTINT_BENCH */ #ifdef PPRINTINT_TEST #include #include int main() { char buf[21]; int failed = 0; int k; k = print_uint64(UINT64_MAX, buf); if (strlen(buf) != k) printf("length error\n"); if (strcmp("18446744073709551615", buf)) { printf("UINT64_MAX didn't print correctly, got:\n'%s'\n", buf); ++failed; } k = print_int64(INT64_MAX, buf); if (strlen(buf) != k) printf("length error\n"); if (strcmp("9223372036854775807", buf)) { printf("INT64_MAX didn't print correctly, got:\n'%s'\n", buf); ++failed; } k = print_int64(INT64_MIN, buf); if (strlen(buf) != k) printf("length error\n"); if (strcmp("-9223372036854775808", buf)) { printf("INT64_MIN didn't print correctly, got:\n'%s'\n", buf); ++failed; } k = print_uint32(UINT32_MAX, buf); if (strlen(buf) != k) printf("length error\n"); if (strcmp("4294967295", buf)) { printf("UINT32_MAX didn't print correctly, got:\n'%s'\n", buf); ++failed; } k = print_int32(INT32_MAX, buf); if (strlen(buf) != k) printf("length error\n"); if (strcmp("2147483647", buf)) { printf("INT32_MAX didn't print correctly, got:\n'%s'\n", buf); ++failed; } k = print_int32(INT32_MIN, buf); if (strlen(buf) != k) printf("length error\n"); if (strcmp("-2147483648", buf)) { printf("INT32_MIN didn't print correctly, got:\n'%s'\n", buf); ++failed; } k = print_uint16(UINT16_MAX, buf); if (strlen(buf) != k) printf("length error\n"); if (strcmp("65535", buf)) { printf("UINT16_MAX didn't print correctly, got:\n'%s'\n", buf); ++failed; } k = print_int16(INT16_MAX, buf); if (strlen(buf) != k) printf("length error\n"); if (strcmp("32767", buf)) { printf("INT16_MAX didn't print correctly, got:\n'%s'\n", buf); ++failed; } k = print_int16(INT16_MIN, buf); if (strlen(buf) != k) printf("length error\n"); if (strcmp("-32768", buf)) { printf("INT16_MIN didn't print correctly, got:\n'%s'\n", buf); ++failed; } k = print_uint8(UINT8_MAX, buf); if (strlen(buf) != k) printf("length error\n"); if (strcmp("255", buf)) { printf("INT8_MAX didn't print correctly, got:\n'%s'\n", buf); ++failed; } k = print_int8(INT8_MAX, buf); if (strlen(buf) != k) printf("length error\n"); if (strcmp("127", buf)) { printf("INT8_MAX didn't print correctly, got:\n'%s'\n", buf); ++failed; } k = print_int8(INT8_MIN, buf); if (strlen(buf) != k) printf("length error\n"); if (strcmp("-128", buf)) { printf("INT8_MIN didn't print correctly, got:\n'%s'\n", buf); ++failed; } k = print_int(INT32_MAX, buf); if (strlen(buf) != k) printf("length error\n"); if (strcmp("2147483647", buf)) { printf("INT32_MAX didn't print correctly with k = print_int, got:\n'%s'\n", buf); ++failed; } k = print_int(INT32_MIN, buf); if (strlen(buf) != k) printf("length error\n"); if (strcmp("-2147483648", buf)) { printf("INT32_MIN didn't print correctly k = print_int, got:\n'%s'\n", buf); ++failed; } k = print_long(INT32_MAX, buf); if (strlen(buf) != k) printf("length error\n"); if (strcmp("2147483647", buf)) { printf("INT32_MAX didn't print correctly with fmt_long, got:\n'%s'\n", buf); ++failed; } k = print_long(INT32_MIN, buf); if (strlen(buf) != k) printf("length error\n"); if (strcmp("-2147483648", buf)) { printf("INT32_MIN didn't print correctly fmt_long, got:\n'%s'\n", buf); ++failed; } k = print_bool(1, buf); if (strlen(buf) != k) printf("length error\n"); if (strcmp("true", buf) { printf("1 didn't print 'true' as expected, got:\n'%s'\n", buf); ++failed; } k = print_bool(-1, buf); if (strlen(buf) != k) printf("length error\n"); if (strcmp("true", buf) { printf("-1 didn't print 'true' as expected, got:\n'%s'\n", buf); ++failed; } k = print_bool(, buf); if (strlen(buf) != k) printf("length error\n"); if (strcmp("false", buf) { printf("0 didn't print 'false' as expected, got:\n'%s'\n", buf); ++failed; } if (failed) { printf("FAILED\n"); return -1; } printf("SUCCESS\n"); return 0; } #endif /* PPRINTINT_TEST */ #include "pdiagnostic_pop.h" #ifdef __cplusplus } #endif #endif /* PPRINTINT_H */