diff --git a/c/hash.c b/c/hash.c new file mode 100644 index 0000000..e50eb2a --- /dev/null +++ b/c/hash.c @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* inspired from kernel's + */ +#include "hash.h" + +/* Return the hash of a string of known length */ +unsigned int full_name_hash(const void *salt, const char *name, unsigned int len) +{ + unsigned long hash = init_name_hash(salt); + while (len--) + hash = partial_name_hash((unsigned char)*name++, hash); + return end_name_hash(hash); +} + +/* Return the "hash_len" (hash and length) of a null-terminated string */ +u64 hashlen_string(const void *salt, const char *name) +{ + unsigned long hash = init_name_hash(salt); + unsigned long len = 0, c; + + c = (unsigned char)*name; + while (c) { + len++; + hash = partial_name_hash(c, hash); + c = (unsigned char)name[len]; + } + return hashlen_create(end_name_hash(hash), len); +} diff --git a/c/include/bits.h b/c/include/bits.h index ef26e7c..07271aa 100644 --- a/c/include/bits.h +++ b/c/include/bits.h @@ -262,6 +262,90 @@ static __always_inline int popcount32(u32 n) # endif } +/* rolXX are taken from kernel's are are: + * SPDX-License-Identifier: GPL-2.0 + */ + +/** + * rol64 - rotate a 64-bit value left + * @word: value to rotate + * @shift: bits to roll + */ +static inline u64 rol64(u64 word, unsigned int shift) +{ + return (word << (shift & 63)) | (word >> ((-shift) & 63)); +} + +/** + * ror64 - rotate a 64-bit value right + * @word: value to rotate + * @shift: bits to roll + */ +static inline u64 ror64(u64 word, unsigned int shift) +{ + return (word >> (shift & 63)) | (word << ((-shift) & 63)); +} + +/** + * rol32 - rotate a 32-bit value left + * @word: value to rotate + * @shift: bits to roll + */ +static inline u32 rol32(u32 word, unsigned int shift) +{ + return (word << (shift & 31)) | (word >> ((-shift) & 31)); +} + +/** + * ror32 - rotate a 32-bit value right + * @word: value to rotate + * @shift: bits to roll + */ +static inline u32 ror32(u32 word, unsigned int shift) +{ + return (word >> (shift & 31)) | (word << ((-shift) & 31)); +} + +/** + * rol16 - rotate a 16-bit value left + * @word: value to rotate + * @shift: bits to roll + */ +static inline u16 rol16(u16 word, unsigned int shift) +{ + return (word << (shift & 15)) | (word >> ((-shift) & 15)); +} + +/** + * ror16 - rotate a 16-bit value right + * @word: value to rotate + * @shift: bits to roll + */ +static inline u16 ror16(u16 word, unsigned int shift) +{ + return (word >> (shift & 15)) | (word << ((-shift) & 15)); +} + +/** + * rol8 - rotate an 8-bit value left + * @word: value to rotate + * @shift: bits to roll + */ +static inline u8 rol8(u8 word, unsigned int shift) +{ + return (word << (shift & 7)) | (word >> ((-shift) & 7)); +} + +/** + * ror8 - rotate an 8-bit value right + * @word: value to rotate + * @shift: bits to roll + */ +static inline u8 ror8(u8 word, unsigned int shift) +{ + return (word >> (shift & 7)) | (word << ((-shift) & 7)); +} + /** bit_for_each - iterate over an u64/u32 bits diff --git a/c/include/br.h b/c/include/br.h new file mode 100644 index 0000000..11d67cd --- /dev/null +++ b/c/include/br.h @@ -0,0 +1,205 @@ +/* bits.h - bits functions. + * + * Copyright (C) 2021-2022 Bruno Raoult ("br") + * Licensed under the GNU General Public License v3.0 or later. + * Some rights reserved. See COPYING. + * + * You should have received a copy of the GNU General Public License along with this + * program. If not, see . + * + * SPDX-License-Identifier: GPL-3.0-or-later + * + * Some parts are taken from Linux's kernel and others, and are : + * SPDX-License-Identifier: GPL-2.0 + * + * This header contains generic stuff. + */ + +#ifndef _BR_H +#define _BR_H + +/* generate a (maybe) unique id. + */ +#define ___PASTE(x, y) x##y +#define __PASTE(x, y) ___PASTE(x, y) +#define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__) +//__##prefix##__COUNTER__ + +/* see https://lkml.org/lkml/2018/3/20/845 for explanation of this monster + */ +#define __is_constexpr(x) \ + (sizeof(int) == sizeof(*(8 ? ((void *)((long)(x) * 0l)) : (int *)8))) + +/* + * min()/max()/clamp() macros must accomplish three things: + * + * - avoid multiple evaluations of the arguments (so side-effects like + * "x++" happen only once) when non-constant. + * - perform strict type-checking (to generate warnings instead of + * nasty runtime surprises). See the "unnecessary" pointer comparison + * in __typecheck(). + * - retain result as a constant expressions when called with only + * constant expressions (to avoid tripping VLA warnings in stack + * allocation usage). + */ +#define __typecheck(x, y) \ + (!!(sizeof((typeof(x) *)1 == (typeof(y) *)1))) + +#define __no_side_effects(x, y) \ + (__is_constexpr(x) && __is_constexpr(y)) + +#define __safe_cmp(x, y) \ + (__typecheck(x, y) && __no_side_effects(x, y)) + +#define __cmp(x, y, op) ((x) op (y) ? (x) : (y)) + +#define __cmp_once(x, y, unique_x, unique_y, op) ({ \ + typeof(x) unique_x = (x); \ + typeof(y) unique_y = (y); \ + __cmp(unique_x, unique_y, op); }) + +#define __careful_cmp(x, y, op) \ + __builtin_choose_expr(__safe_cmp(x, y), \ + __cmp(x, y, op), \ + __cmp_once(x, y, __UNIQUE_ID(__x), __UNIQUE_ID(__y), op)) + +#define __pure __attribute__((__pure__)) + +/** + * min - return minimum of two values of the same or compatible types + * @x: first value + * @y: second value + */ +#define min(x, y) __careful_cmp(x, y, <) + +/** + * max - return maximum of two values of the same or compatible types + * @x: first value + * @y: second value + */ +#define max(x, y) __careful_cmp(x, y, >) + +/** + * min3 - return minimum of three values + * @x: first value + * @y: second value + * @z: third value + */ +#define min3(x, y, z) min((typeof(x))min(x, y), z) + +/** + * max3 - return maximum of three values + * @x: first value + * @y: second value + * @z: third value + */ +#define max3(x, y, z) max((typeof(x))max(x, y), z) + +/** + * min_not_zero - return the minimum that is _not_ zero, unless both are zero + * @x: value1 + * @y: value2 + */ +#define min_not_zero(x, y) ({ \ + typeof(x) __x = (x); \ + typeof(y) __y = (y); \ + __x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); }) + +/** + * clamp - return a value clamped to a given range with strict typechecking + * @val: current value + * @lo: lowest allowable value + * @hi: highest allowable value + * + * This macro does strict typechecking of @lo/@hi to make sure they are of the + * same type as @val. See the unnecessary pointer comparisons. + */ +#define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi) + +/* + * ..and if you can't take the strict + * types, you can specify one yourself. + * + * Or not use min/max/clamp at all, of course. + */ + +/** + * min_t - return minimum of two values, using the specified type + * @type: data type to use + * @x: first value + * @y: second value + */ +#define min_t(type, x, y) __careful_cmp((type)(x), (type)(y), <) + +/** + * max_t - return maximum of two values, using the specified type + * @type: data type to use + * @x: first value + * @y: second value + */ +#define max_t(type, x, y) __careful_cmp((type)(x), (type)(y), >) + +/** + * clamp_t - return a value clamped to a given range using a given type + * @type: the type of variable to use + * @val: current value + * @lo: minimum allowable value + * @hi: maximum allowable value + * + * This macro does no typechecking and uses temporary variables of type + * @type to make all the comparisons. + */ +#define clamp_t(type, val, lo, hi) min_t(type, max_t(type, val, lo), hi) + +/** + * clamp_val - return a value clamped to a given range using val's type + * @val: current value + * @lo: minimum allowable value + * @hi: maximum allowable value + * + * This macro does no typechecking and uses temporary variables of whatever + * type the input argument @val is. This is useful when @val is an unsigned + * type and @lo and @hi are literals that will otherwise be assigned a signed + * integer type. + */ +#define clamp_val(val, lo, hi) clamp_t(typeof(val), val, lo, hi) + +/** + * swap - swap values of @a and @b + * @a: first value + * @b: second value + */ +#define swap(a, b) \ + do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0) + +/** + * ARRAY_SIZE - get the number of elements in array @arr + * @arr: array to be sized + */ +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) + +/** + * abs - return absolute value of an argument + * @x: the value. If it is unsigned type, it is converted to signed type first. + * char is treated as if it was signed (regardless of whether it really is) + * but the macro's return type is preserved as char. + * + * Return: an absolute value of x. + */ +#define abs(x) __abs_choose_expr(x, long long, \ + __abs_choose_expr(x, long, \ + __abs_choose_expr(x, int, \ + __abs_choose_expr(x, short, \ + __abs_choose_expr(x, char, \ + __builtin_choose_expr( \ + __builtin_types_compatible_p(typeof(x), char), \ + (char)({ signed char __x = (x); __x<0?-__x:__x; }), \ + ((void)0))))))) + +#define __abs_choose_expr(x, type, other) __builtin_choose_expr( \ + __builtin_types_compatible_p(typeof(x), signed type) || \ + __builtin_types_compatible_p(typeof(x), unsigned type), \ + ({ signed type __x = (x); __x < 0 ? -__x : __x; }), other) + + +#endif /* _BR_H */ diff --git a/c/include/bug.h b/c/include/bug.h index 2c4566f..fdff9b8 100644 --- a/c/include/bug.h +++ b/c/include/bug.h @@ -8,7 +8,8 @@ #include "likely.h" #include "debug.h" -/* inspired by Linux kernel's */ +/* BUG functions inspired by Linux kernel's + */ #define panic() exit(0xff) diff --git a/c/include/hash.h b/c/include/hash.h index 5ce4330..d9c9d32 100644 --- a/c/include/hash.h +++ b/c/include/hash.h @@ -2,7 +2,7 @@ #ifndef _BR_HASH_H #define _BR_HASH_H -/* adaptation of Linux kernel's +/* adaptation of Linux kernel's and */ /* Fast hashing routine for ints, longs and pointers. @@ -11,6 +11,8 @@ #include #include #include "bits.h" +#include "br.h" + /* * The "GOLDEN_RATIO_PRIME" is used in ifs/btrfs/brtfs_inode.h and * fs/inode.c. It's not actually prime any more (the previous primes @@ -98,4 +100,74 @@ static inline u32 hash32_ptr(const void *ptr) return (u32)val; } +/* + * Routines for hashing strings of bytes to a 32-bit hash value. + * + * These hash functions are NOT GUARANTEED STABLE between kernel + * versions, architectures, or even repeated boots of the same kernel. + * (E.g. they may depend on boot-time hardware detection or be + * deliberately randomized.) + * + * They are also not intended to be secure against collisions caused by + * malicious inputs; much slower hash functions are required for that. + * + * They are optimized for pathname components, meaning short strings. + * Even if a majority of files have longer names, the dynamic profile of + * pathname components skews short due to short directory names. + * (E.g. /usr/lib/libsesquipedalianism.so.3.141.) + */ + +/* + * Version 1: one byte at a time. Example of use: + * + * unsigned long hash = init_name_hash; + * while (*p) + * hash = partial_name_hash(tolower(*p++), hash); + * hash = end_name_hash(hash); + * + * Although this is designed for bytes, fs/hfsplus/unicode.c + * abuses it to hash 16-bit values. + */ + +/* Hash courtesy of the R5 hash in reiserfs modulo sign bits */ +#define init_name_hash(salt) (unsigned long)(salt) + +/* partial hash update function. Assume roughly 4 bits per character */ +static inline unsigned long +partial_name_hash(unsigned long c, unsigned long prevhash) +{ + return (prevhash + (c << 4) + (c >> 4)) * 11; +} + +/* + * Finally: cut down the number of bits to a int value (and try to avoid + * losing bits). This also has the property (wanted by the dcache) + * that the msbits make a good hash table index. + */ +static inline unsigned int end_name_hash(unsigned long hash) +{ + return hash_long(hash, 32); +} + +/* + * Version 2: One word (32 or 64 bits) at a time. + * If CONFIG_DCACHE_WORD_ACCESS is defined (meaning + * exists, which describes major Linux platforms like x86 and ARM), then + * this computes a different hash function much faster. + * + * If not set, this falls back to a wrapper around the preceding. + */ +extern unsigned int __pure full_name_hash(const void *salt, const char *, unsigned int); + +/* + * A hash_len is a u64 with the hash of a string in the low + * half and the length in the high half. + */ +#define hashlen_hash(hashlen) ((u32)(hashlen)) +#define hashlen_len(hashlen) ((u32)((hashlen) >> 32)) +#define hashlen_create(hash, len) ((u64)(len)<<32 | (u32)(hash)) + +/* Return the "hash_len" (hash and length) of a null-terminated string */ +extern u64 __pure hashlen_string(const void *salt, const char *name); + #endif /* _BR_HASH_H */ diff --git a/c/include/stringhash.h b/c/include/stringhash.h new file mode 100644 index 0000000..c0c5c5b --- /dev/null +++ b/c/include/stringhash.h @@ -0,0 +1,79 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LINUX_STRINGHASH_H +#define __LINUX_STRINGHASH_H + +#include /* For __pure */ +#include /* For u32, u64 */ +#include + +/* + * Routines for hashing strings of bytes to a 32-bit hash value. + * + * These hash functions are NOT GUARANTEED STABLE between kernel + * versions, architectures, or even repeated boots of the same kernel. + * (E.g. they may depend on boot-time hardware detection or be + * deliberately randomized.) + * + * They are also not intended to be secure against collisions caused by + * malicious inputs; much slower hash functions are required for that. + * + * They are optimized for pathname components, meaning short strings. + * Even if a majority of files have longer names, the dynamic profile of + * pathname components skews short due to short directory names. + * (E.g. /usr/lib/libsesquipedalianism.so.3.141.) + */ + +/* + * Version 1: one byte at a time. Example of use: + * + * unsigned long hash = init_name_hash; + * while (*p) + * hash = partial_name_hash(tolower(*p++), hash); + * hash = end_name_hash(hash); + * + * Although this is designed for bytes, fs/hfsplus/unicode.c + * abuses it to hash 16-bit values. + */ + +/* Hash courtesy of the R5 hash in reiserfs modulo sign bits */ +#define init_name_hash(salt) (unsigned long)(salt) + +/* partial hash update function. Assume roughly 4 bits per character */ +static inline unsigned long +partial_name_hash(unsigned long c, unsigned long prevhash) +{ + return (prevhash + (c << 4) + (c >> 4)) * 11; +} + +/* + * Finally: cut down the number of bits to a int value (and try to avoid + * losing bits). This also has the property (wanted by the dcache) + * that the msbits make a good hash table index. + */ +static inline unsigned int end_name_hash(unsigned long hash) +{ + return hash_long(hash, 32); +} + +/* + * Version 2: One word (32 or 64 bits) at a time. + * If CONFIG_DCACHE_WORD_ACCESS is defined (meaning + * exists, which describes major Linux platforms like x86 and ARM), then + * this computes a different hash function much faster. + * + * If not set, this falls back to a wrapper around the preceding. + */ +extern unsigned int __pure full_name_hash(const void *salt, const char *, unsigned int); + +/* + * A hash_len is a u64 with the hash of a string in the low + * half and the length in the high half. + */ +#define hashlen_hash(hashlen) ((u32)(hashlen)) +#define hashlen_len(hashlen) ((u32)((hashlen) >> 32)) +#define hashlen_create(hash, len) ((u64)(len)<<32 | (u32)(hash)) + +/* Return the "hash_len" (hash and length) of a null-terminated string */ +extern u64 __pure hashlen_string(const void *salt, const char *name); + +#endif /* __LINUX_STRINGHASH_H */