include (untested) hash
This commit is contained in:
29
c/hash.c
Normal file
29
c/hash.c
Normal file
@@ -0,0 +1,29 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
|
||||
/* inspired from kernel's <fs/namei.h>
|
||||
*/
|
||||
#include "hash.h"
|
||||
|
||||
/* Return the hash of a string of known length */
|
||||
unsigned int full_name_hash(const void *salt, const char *name, unsigned int len)
|
||||
{
|
||||
unsigned long hash = init_name_hash(salt);
|
||||
while (len--)
|
||||
hash = partial_name_hash((unsigned char)*name++, hash);
|
||||
return end_name_hash(hash);
|
||||
}
|
||||
|
||||
/* Return the "hash_len" (hash and length) of a null-terminated string */
|
||||
u64 hashlen_string(const void *salt, const char *name)
|
||||
{
|
||||
unsigned long hash = init_name_hash(salt);
|
||||
unsigned long len = 0, c;
|
||||
|
||||
c = (unsigned char)*name;
|
||||
while (c) {
|
||||
len++;
|
||||
hash = partial_name_hash(c, hash);
|
||||
c = (unsigned char)name[len];
|
||||
}
|
||||
return hashlen_create(end_name_hash(hash), len);
|
||||
}
|
@@ -262,6 +262,90 @@ static __always_inline int popcount32(u32 n)
|
||||
# endif
|
||||
}
|
||||
|
||||
/* rolXX are taken from kernel's <linux/bitops.h> are are:
|
||||
* SPDX-License-Identifier: GPL-2.0
|
||||
*/
|
||||
|
||||
/**
|
||||
* rol64 - rotate a 64-bit value left
|
||||
* @word: value to rotate
|
||||
* @shift: bits to roll
|
||||
*/
|
||||
static inline u64 rol64(u64 word, unsigned int shift)
|
||||
{
|
||||
return (word << (shift & 63)) | (word >> ((-shift) & 63));
|
||||
}
|
||||
|
||||
/**
|
||||
* ror64 - rotate a 64-bit value right
|
||||
* @word: value to rotate
|
||||
* @shift: bits to roll
|
||||
*/
|
||||
static inline u64 ror64(u64 word, unsigned int shift)
|
||||
{
|
||||
return (word >> (shift & 63)) | (word << ((-shift) & 63));
|
||||
}
|
||||
|
||||
/**
|
||||
* rol32 - rotate a 32-bit value left
|
||||
* @word: value to rotate
|
||||
* @shift: bits to roll
|
||||
*/
|
||||
static inline u32 rol32(u32 word, unsigned int shift)
|
||||
{
|
||||
return (word << (shift & 31)) | (word >> ((-shift) & 31));
|
||||
}
|
||||
|
||||
/**
|
||||
* ror32 - rotate a 32-bit value right
|
||||
* @word: value to rotate
|
||||
* @shift: bits to roll
|
||||
*/
|
||||
static inline u32 ror32(u32 word, unsigned int shift)
|
||||
{
|
||||
return (word >> (shift & 31)) | (word << ((-shift) & 31));
|
||||
}
|
||||
|
||||
/**
|
||||
* rol16 - rotate a 16-bit value left
|
||||
* @word: value to rotate
|
||||
* @shift: bits to roll
|
||||
*/
|
||||
static inline u16 rol16(u16 word, unsigned int shift)
|
||||
{
|
||||
return (word << (shift & 15)) | (word >> ((-shift) & 15));
|
||||
}
|
||||
|
||||
/**
|
||||
* ror16 - rotate a 16-bit value right
|
||||
* @word: value to rotate
|
||||
* @shift: bits to roll
|
||||
*/
|
||||
static inline u16 ror16(u16 word, unsigned int shift)
|
||||
{
|
||||
return (word >> (shift & 15)) | (word << ((-shift) & 15));
|
||||
}
|
||||
|
||||
/**
|
||||
* rol8 - rotate an 8-bit value left
|
||||
* @word: value to rotate
|
||||
* @shift: bits to roll
|
||||
*/
|
||||
static inline u8 rol8(u8 word, unsigned int shift)
|
||||
{
|
||||
return (word << (shift & 7)) | (word >> ((-shift) & 7));
|
||||
}
|
||||
|
||||
/**
|
||||
* ror8 - rotate an 8-bit value right
|
||||
* @word: value to rotate
|
||||
* @shift: bits to roll
|
||||
*/
|
||||
static inline u8 ror8(u8 word, unsigned int shift)
|
||||
{
|
||||
return (word >> (shift & 7)) | (word << ((-shift) & 7));
|
||||
}
|
||||
|
||||
|
||||
|
||||
/** bit_for_each - iterate over an u64/u32 bits
|
||||
|
205
c/include/br.h
Normal file
205
c/include/br.h
Normal file
@@ -0,0 +1,205 @@
|
||||
/* bits.h - bits functions.
|
||||
*
|
||||
* Copyright (C) 2021-2022 Bruno Raoult ("br")
|
||||
* Licensed under the GNU General Public License v3.0 or later.
|
||||
* Some rights reserved. See COPYING.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along with this
|
||||
* program. If not, see <https://www.gnu.org/licenses/gpl-3.0-standalone.html>.
|
||||
*
|
||||
* SPDX-License-Identifier: GPL-3.0-or-later <https://spdx.org/licenses/GPL-3.0-or-later.html>
|
||||
*
|
||||
* Some parts are taken from Linux's kernel <linux/kernel.h> and others, and are :
|
||||
* SPDX-License-Identifier: GPL-2.0
|
||||
*
|
||||
* This header contains generic stuff.
|
||||
*/
|
||||
|
||||
#ifndef _BR_H
|
||||
#define _BR_H
|
||||
|
||||
/* generate a (maybe) unique id.
|
||||
*/
|
||||
#define ___PASTE(x, y) x##y
|
||||
#define __PASTE(x, y) ___PASTE(x, y)
|
||||
#define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__)
|
||||
//__##prefix##__COUNTER__
|
||||
|
||||
/* see https://lkml.org/lkml/2018/3/20/845 for explanation of this monster
|
||||
*/
|
||||
#define __is_constexpr(x) \
|
||||
(sizeof(int) == sizeof(*(8 ? ((void *)((long)(x) * 0l)) : (int *)8)))
|
||||
|
||||
/*
|
||||
* min()/max()/clamp() macros must accomplish three things:
|
||||
*
|
||||
* - avoid multiple evaluations of the arguments (so side-effects like
|
||||
* "x++" happen only once) when non-constant.
|
||||
* - perform strict type-checking (to generate warnings instead of
|
||||
* nasty runtime surprises). See the "unnecessary" pointer comparison
|
||||
* in __typecheck().
|
||||
* - retain result as a constant expressions when called with only
|
||||
* constant expressions (to avoid tripping VLA warnings in stack
|
||||
* allocation usage).
|
||||
*/
|
||||
#define __typecheck(x, y) \
|
||||
(!!(sizeof((typeof(x) *)1 == (typeof(y) *)1)))
|
||||
|
||||
#define __no_side_effects(x, y) \
|
||||
(__is_constexpr(x) && __is_constexpr(y))
|
||||
|
||||
#define __safe_cmp(x, y) \
|
||||
(__typecheck(x, y) && __no_side_effects(x, y))
|
||||
|
||||
#define __cmp(x, y, op) ((x) op (y) ? (x) : (y))
|
||||
|
||||
#define __cmp_once(x, y, unique_x, unique_y, op) ({ \
|
||||
typeof(x) unique_x = (x); \
|
||||
typeof(y) unique_y = (y); \
|
||||
__cmp(unique_x, unique_y, op); })
|
||||
|
||||
#define __careful_cmp(x, y, op) \
|
||||
__builtin_choose_expr(__safe_cmp(x, y), \
|
||||
__cmp(x, y, op), \
|
||||
__cmp_once(x, y, __UNIQUE_ID(__x), __UNIQUE_ID(__y), op))
|
||||
|
||||
#define __pure __attribute__((__pure__))
|
||||
|
||||
/**
|
||||
* min - return minimum of two values of the same or compatible types
|
||||
* @x: first value
|
||||
* @y: second value
|
||||
*/
|
||||
#define min(x, y) __careful_cmp(x, y, <)
|
||||
|
||||
/**
|
||||
* max - return maximum of two values of the same or compatible types
|
||||
* @x: first value
|
||||
* @y: second value
|
||||
*/
|
||||
#define max(x, y) __careful_cmp(x, y, >)
|
||||
|
||||
/**
|
||||
* min3 - return minimum of three values
|
||||
* @x: first value
|
||||
* @y: second value
|
||||
* @z: third value
|
||||
*/
|
||||
#define min3(x, y, z) min((typeof(x))min(x, y), z)
|
||||
|
||||
/**
|
||||
* max3 - return maximum of three values
|
||||
* @x: first value
|
||||
* @y: second value
|
||||
* @z: third value
|
||||
*/
|
||||
#define max3(x, y, z) max((typeof(x))max(x, y), z)
|
||||
|
||||
/**
|
||||
* min_not_zero - return the minimum that is _not_ zero, unless both are zero
|
||||
* @x: value1
|
||||
* @y: value2
|
||||
*/
|
||||
#define min_not_zero(x, y) ({ \
|
||||
typeof(x) __x = (x); \
|
||||
typeof(y) __y = (y); \
|
||||
__x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); })
|
||||
|
||||
/**
|
||||
* clamp - return a value clamped to a given range with strict typechecking
|
||||
* @val: current value
|
||||
* @lo: lowest allowable value
|
||||
* @hi: highest allowable value
|
||||
*
|
||||
* This macro does strict typechecking of @lo/@hi to make sure they are of the
|
||||
* same type as @val. See the unnecessary pointer comparisons.
|
||||
*/
|
||||
#define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi)
|
||||
|
||||
/*
|
||||
* ..and if you can't take the strict
|
||||
* types, you can specify one yourself.
|
||||
*
|
||||
* Or not use min/max/clamp at all, of course.
|
||||
*/
|
||||
|
||||
/**
|
||||
* min_t - return minimum of two values, using the specified type
|
||||
* @type: data type to use
|
||||
* @x: first value
|
||||
* @y: second value
|
||||
*/
|
||||
#define min_t(type, x, y) __careful_cmp((type)(x), (type)(y), <)
|
||||
|
||||
/**
|
||||
* max_t - return maximum of two values, using the specified type
|
||||
* @type: data type to use
|
||||
* @x: first value
|
||||
* @y: second value
|
||||
*/
|
||||
#define max_t(type, x, y) __careful_cmp((type)(x), (type)(y), >)
|
||||
|
||||
/**
|
||||
* clamp_t - return a value clamped to a given range using a given type
|
||||
* @type: the type of variable to use
|
||||
* @val: current value
|
||||
* @lo: minimum allowable value
|
||||
* @hi: maximum allowable value
|
||||
*
|
||||
* This macro does no typechecking and uses temporary variables of type
|
||||
* @type to make all the comparisons.
|
||||
*/
|
||||
#define clamp_t(type, val, lo, hi) min_t(type, max_t(type, val, lo), hi)
|
||||
|
||||
/**
|
||||
* clamp_val - return a value clamped to a given range using val's type
|
||||
* @val: current value
|
||||
* @lo: minimum allowable value
|
||||
* @hi: maximum allowable value
|
||||
*
|
||||
* This macro does no typechecking and uses temporary variables of whatever
|
||||
* type the input argument @val is. This is useful when @val is an unsigned
|
||||
* type and @lo and @hi are literals that will otherwise be assigned a signed
|
||||
* integer type.
|
||||
*/
|
||||
#define clamp_val(val, lo, hi) clamp_t(typeof(val), val, lo, hi)
|
||||
|
||||
/**
|
||||
* swap - swap values of @a and @b
|
||||
* @a: first value
|
||||
* @b: second value
|
||||
*/
|
||||
#define swap(a, b) \
|
||||
do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0)
|
||||
|
||||
/**
|
||||
* ARRAY_SIZE - get the number of elements in array @arr
|
||||
* @arr: array to be sized
|
||||
*/
|
||||
#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
|
||||
|
||||
/**
|
||||
* abs - return absolute value of an argument
|
||||
* @x: the value. If it is unsigned type, it is converted to signed type first.
|
||||
* char is treated as if it was signed (regardless of whether it really is)
|
||||
* but the macro's return type is preserved as char.
|
||||
*
|
||||
* Return: an absolute value of x.
|
||||
*/
|
||||
#define abs(x) __abs_choose_expr(x, long long, \
|
||||
__abs_choose_expr(x, long, \
|
||||
__abs_choose_expr(x, int, \
|
||||
__abs_choose_expr(x, short, \
|
||||
__abs_choose_expr(x, char, \
|
||||
__builtin_choose_expr( \
|
||||
__builtin_types_compatible_p(typeof(x), char), \
|
||||
(char)({ signed char __x = (x); __x<0?-__x:__x; }), \
|
||||
((void)0)))))))
|
||||
|
||||
#define __abs_choose_expr(x, type, other) __builtin_choose_expr( \
|
||||
__builtin_types_compatible_p(typeof(x), signed type) || \
|
||||
__builtin_types_compatible_p(typeof(x), unsigned type), \
|
||||
({ signed type __x = (x); __x < 0 ? -__x : __x; }), other)
|
||||
|
||||
|
||||
#endif /* _BR_H */
|
@@ -8,7 +8,8 @@
|
||||
#include "likely.h"
|
||||
#include "debug.h"
|
||||
|
||||
/* inspired by Linux kernel's <asm/bug.h> */
|
||||
/* BUG functions inspired by Linux kernel's <asm/bug.h>
|
||||
*/
|
||||
|
||||
#define panic() exit(0xff)
|
||||
|
||||
|
@@ -2,7 +2,7 @@
|
||||
|
||||
#ifndef _BR_HASH_H
|
||||
#define _BR_HASH_H
|
||||
/* adaptation of Linux kernel's <linux/hash.h>
|
||||
/* adaptation of Linux kernel's <linux/hash.h> and <linux/stringhash.h>
|
||||
*/
|
||||
|
||||
/* Fast hashing routine for ints, longs and pointers.
|
||||
@@ -11,6 +11,8 @@
|
||||
#include <asm/types.h>
|
||||
#include <asm/bitsperlong.h>
|
||||
#include "bits.h"
|
||||
#include "br.h"
|
||||
|
||||
/*
|
||||
* The "GOLDEN_RATIO_PRIME" is used in ifs/btrfs/brtfs_inode.h and
|
||||
* fs/inode.c. It's not actually prime any more (the previous primes
|
||||
@@ -98,4 +100,74 @@ static inline u32 hash32_ptr(const void *ptr)
|
||||
return (u32)val;
|
||||
}
|
||||
|
||||
/*
|
||||
* Routines for hashing strings of bytes to a 32-bit hash value.
|
||||
*
|
||||
* These hash functions are NOT GUARANTEED STABLE between kernel
|
||||
* versions, architectures, or even repeated boots of the same kernel.
|
||||
* (E.g. they may depend on boot-time hardware detection or be
|
||||
* deliberately randomized.)
|
||||
*
|
||||
* They are also not intended to be secure against collisions caused by
|
||||
* malicious inputs; much slower hash functions are required for that.
|
||||
*
|
||||
* They are optimized for pathname components, meaning short strings.
|
||||
* Even if a majority of files have longer names, the dynamic profile of
|
||||
* pathname components skews short due to short directory names.
|
||||
* (E.g. /usr/lib/libsesquipedalianism.so.3.141.)
|
||||
*/
|
||||
|
||||
/*
|
||||
* Version 1: one byte at a time. Example of use:
|
||||
*
|
||||
* unsigned long hash = init_name_hash;
|
||||
* while (*p)
|
||||
* hash = partial_name_hash(tolower(*p++), hash);
|
||||
* hash = end_name_hash(hash);
|
||||
*
|
||||
* Although this is designed for bytes, fs/hfsplus/unicode.c
|
||||
* abuses it to hash 16-bit values.
|
||||
*/
|
||||
|
||||
/* Hash courtesy of the R5 hash in reiserfs modulo sign bits */
|
||||
#define init_name_hash(salt) (unsigned long)(salt)
|
||||
|
||||
/* partial hash update function. Assume roughly 4 bits per character */
|
||||
static inline unsigned long
|
||||
partial_name_hash(unsigned long c, unsigned long prevhash)
|
||||
{
|
||||
return (prevhash + (c << 4) + (c >> 4)) * 11;
|
||||
}
|
||||
|
||||
/*
|
||||
* Finally: cut down the number of bits to a int value (and try to avoid
|
||||
* losing bits). This also has the property (wanted by the dcache)
|
||||
* that the msbits make a good hash table index.
|
||||
*/
|
||||
static inline unsigned int end_name_hash(unsigned long hash)
|
||||
{
|
||||
return hash_long(hash, 32);
|
||||
}
|
||||
|
||||
/*
|
||||
* Version 2: One word (32 or 64 bits) at a time.
|
||||
* If CONFIG_DCACHE_WORD_ACCESS is defined (meaning <asm/word-at-a-time.h>
|
||||
* exists, which describes major Linux platforms like x86 and ARM), then
|
||||
* this computes a different hash function much faster.
|
||||
*
|
||||
* If not set, this falls back to a wrapper around the preceding.
|
||||
*/
|
||||
extern unsigned int __pure full_name_hash(const void *salt, const char *, unsigned int);
|
||||
|
||||
/*
|
||||
* A hash_len is a u64 with the hash of a string in the low
|
||||
* half and the length in the high half.
|
||||
*/
|
||||
#define hashlen_hash(hashlen) ((u32)(hashlen))
|
||||
#define hashlen_len(hashlen) ((u32)((hashlen) >> 32))
|
||||
#define hashlen_create(hash, len) ((u64)(len)<<32 | (u32)(hash))
|
||||
|
||||
/* Return the "hash_len" (hash and length) of a null-terminated string */
|
||||
extern u64 __pure hashlen_string(const void *salt, const char *name);
|
||||
|
||||
#endif /* _BR_HASH_H */
|
||||
|
79
c/include/stringhash.h
Normal file
79
c/include/stringhash.h
Normal file
@@ -0,0 +1,79 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef __LINUX_STRINGHASH_H
|
||||
#define __LINUX_STRINGHASH_H
|
||||
|
||||
#include <linux/compiler.h> /* For __pure */
|
||||
#include <linux/types.h> /* For u32, u64 */
|
||||
#include <linux/hash.h>
|
||||
|
||||
/*
|
||||
* Routines for hashing strings of bytes to a 32-bit hash value.
|
||||
*
|
||||
* These hash functions are NOT GUARANTEED STABLE between kernel
|
||||
* versions, architectures, or even repeated boots of the same kernel.
|
||||
* (E.g. they may depend on boot-time hardware detection or be
|
||||
* deliberately randomized.)
|
||||
*
|
||||
* They are also not intended to be secure against collisions caused by
|
||||
* malicious inputs; much slower hash functions are required for that.
|
||||
*
|
||||
* They are optimized for pathname components, meaning short strings.
|
||||
* Even if a majority of files have longer names, the dynamic profile of
|
||||
* pathname components skews short due to short directory names.
|
||||
* (E.g. /usr/lib/libsesquipedalianism.so.3.141.)
|
||||
*/
|
||||
|
||||
/*
|
||||
* Version 1: one byte at a time. Example of use:
|
||||
*
|
||||
* unsigned long hash = init_name_hash;
|
||||
* while (*p)
|
||||
* hash = partial_name_hash(tolower(*p++), hash);
|
||||
* hash = end_name_hash(hash);
|
||||
*
|
||||
* Although this is designed for bytes, fs/hfsplus/unicode.c
|
||||
* abuses it to hash 16-bit values.
|
||||
*/
|
||||
|
||||
/* Hash courtesy of the R5 hash in reiserfs modulo sign bits */
|
||||
#define init_name_hash(salt) (unsigned long)(salt)
|
||||
|
||||
/* partial hash update function. Assume roughly 4 bits per character */
|
||||
static inline unsigned long
|
||||
partial_name_hash(unsigned long c, unsigned long prevhash)
|
||||
{
|
||||
return (prevhash + (c << 4) + (c >> 4)) * 11;
|
||||
}
|
||||
|
||||
/*
|
||||
* Finally: cut down the number of bits to a int value (and try to avoid
|
||||
* losing bits). This also has the property (wanted by the dcache)
|
||||
* that the msbits make a good hash table index.
|
||||
*/
|
||||
static inline unsigned int end_name_hash(unsigned long hash)
|
||||
{
|
||||
return hash_long(hash, 32);
|
||||
}
|
||||
|
||||
/*
|
||||
* Version 2: One word (32 or 64 bits) at a time.
|
||||
* If CONFIG_DCACHE_WORD_ACCESS is defined (meaning <asm/word-at-a-time.h>
|
||||
* exists, which describes major Linux platforms like x86 and ARM), then
|
||||
* this computes a different hash function much faster.
|
||||
*
|
||||
* If not set, this falls back to a wrapper around the preceding.
|
||||
*/
|
||||
extern unsigned int __pure full_name_hash(const void *salt, const char *, unsigned int);
|
||||
|
||||
/*
|
||||
* A hash_len is a u64 with the hash of a string in the low
|
||||
* half and the length in the high half.
|
||||
*/
|
||||
#define hashlen_hash(hashlen) ((u32)(hashlen))
|
||||
#define hashlen_len(hashlen) ((u32)((hashlen) >> 32))
|
||||
#define hashlen_create(hash, len) ((u64)(len)<<32 | (u32)(hash))
|
||||
|
||||
/* Return the "hash_len" (hash and length) of a null-terminated string */
|
||||
extern u64 __pure hashlen_string(const void *salt, const char *name);
|
||||
|
||||
#endif /* __LINUX_STRINGHASH_H */
|
Reference in New Issue
Block a user