initial commit

This commit is contained in:
2021-08-08 21:11:22 +02:00
commit fe7136d801
130 changed files with 6858 additions and 0 deletions

124
c/word-count/src/hash.c Normal file
View File

@@ -0,0 +1,124 @@
#include <string.h>
#include <malloc.h>
#include "hash.h"
#define HASH_SIZE 50
//static hash_t *hash_table[HASH_SIZE];
static h_entry_t *pool_free, *alloc_entries;
static int n_entries;
void h_init(hash_t *hash)
{
memset(hash->entries, 0, sizeof(h_entry_t)*hash->size);
}
hash_t *h_create(int size)
{
hash_t *hash;
if ( !(hash=calloc(sizeof(hash_t) + size*(sizeof (h_entry_t *)), 1)) )
return NULL;
hash->size=size;
return hash;
}
void h_destroy(hash_t *h)
{
h_entry_t *tmp;
for (int i=0; i<h->size; ++i) {
while (h->entries[i]) {
tmp=h->entries[i]->next;
h_entry_free(h->entries[i]);
h->entries[i]=tmp;
}
}
free(h);
}
/* Daniel J. Bernstein's djb2 hash function */
unsigned long hash_djb2(const unsigned char *str, const int len)
{
unsigned long hash = 5381;
for (int i=0; i<len; ++i)
hash = ((hash << 5) + hash) + str[i]; /* hash * 33 + str[i] */
return hash;
}
void h_entry_free(h_entry_t *e)
{
e->next=pool_free;
pool_free=e;
}
h_entry_t *h_entry_find(hash_t *h, const unsigned char *s, const int l)
{
unsigned long hash=hash_djb2(s, l);
h_entry_t *entry;
int found=0;
# ifdef DEBUG
printf("h_entry_find([%.*s]): hash=%#lx (%lu) - ", l, s, hash, hash%h->size);
# endif
hash%=h->size;
for (entry=h->entries[hash]; entry; entry=entry->next) {
if (l == entry->key_len && !memcmp(entry->data, s, l)) {
found=1;
break;
}
}
# ifdef DEBUG
printf("ret=%p\n", found? (void *)entry: (void *)-1);
# endif
return found? entry: NULL;
}
h_entry_t *h_entry_add(hash_t *h, const unsigned char *s, const int l, int *insert)
{
unsigned long hash;
h_entry_t *entry;
*insert=0;
if (!pool_free) {
register int i=n_entries;
n_entries+=ENTRY_ALLOC_SIZE;
# ifdef DEBUG
printf("get_hash: allocating %d new entries - total entries=%d\n",
ENTRY_ALLOC_SIZE, n_entries);
# endif
alloc_entries=reallocarray(alloc_entries, n_entries, sizeof(h_entry_t));
for (; i<n_entries; ++i) { /* create free entries list */
(alloc_entries+i)->next=pool_free;
pool_free=alloc_entries+i;
}
}
if ((entry=h_entry_find(h, s, l)))
return entry;
*insert=1;
hash=hash_djb2(s, l)%h->size;
/* get a free entry from pool */
entry=pool_free;
pool_free=pool_free->next;
/* set entry in hash */
entry->next=h->entries[hash];
h->entries[hash]=entry;
entry->data=(unsigned char *)s;
entry->key_len=l;
//assert(entry!=freenodes);
# ifdef DEBUG
printf("h_entry_add: %p\n", (void *)entry);
# endif
return entry;
}

52
c/word-count/src/hash.h Normal file
View File

@@ -0,0 +1,52 @@
#ifndef _HASH_H
#define _HASH_H
#include <stdbool.h>
typedef struct h_entry {
void *data;
int key_len;
struct h_entry *prev, *next;
} h_entry_t;
typedef struct {
int size;
h_entry_t *entries[];
} hash_t;
/* a few predefined hash sizes */
typedef struct {
int size;
h_entry_t *entries[16];
} hash_16_t;
typedef struct {
int size;
h_entry_t *entries[128];
} hash_128_t;
typedef struct {
int size;
h_entry_t *entries[1024];
} hash_1024_t;
#define ENTRY_ALLOC_SIZE 20
/* hash map functions */
hash_t *h_create(int size);
void h_init(hash_t *);
void h_destroy(hash_t *);
/* static free_nodes */
void set_pool_free_static(h_entry_t *p);
/* hash entries functions */
h_entry_t *h_entry_add(hash_t *, const unsigned char *, const int, int *);
h_entry_t *h_entry_find(hash_t *, const unsigned char *, const int);
void h_entry_free(h_entry_t *);
/* hash function */
unsigned long hash_djb2(const unsigned char *str, const int len);
#endif

View File

@@ -0,0 +1,162 @@
#include <ctype.h>
#include <stdio.h>
#include <string.h>
#include "word_count.h"
#include "hash.h"
/* See GNUmakefile in following link for explanation
* https://exercism.io/my/solutions/103b2f7d92db42309c1988030f5202c7
*/
#if defined UNIT_TEST || defined DEBUG
#include <stdlib.h>
#endif
/* Some cases are not well explained. So I will consider words mixing
* alphabetic characters and digits as invalid, as well as multiple "'"
* inside an alphabetic word:
* The following will return INVALID_WORD:
* P2P
* 0xFF
* A'2
* The following will return 2 numbers/words:
* 1'2
* A''B
*/
/* get next word in string
*/
static word_t next_word(const char **p)
{
word_t res={NULL, 0};
const char *q, *p1=*p;
static char tmp[1024];
int pos=0;
# ifdef DEBUG
printf("next_word(%s)\n", *p);
# endif
for (; *p1 && !isalpha(*p1) && !isdigit(*p1); ++p1)
;
if (*p1) {
q=p1;
/* alphabetic word */
if (isalpha(*p1)) {
for (; *q &&(isalpha(*q) || *q=='\''); q++) {
if (*q=='\'' && *(q-1)=='\'') { /* two consecutive apostrophes */
res.len=INVALID_WORD;
goto end;
}
tmp[pos++]=tolower(*q);
}
if (*(q-1) == '\'')
q--;
else if (isdigit(*q)) { /* digit in word */
res.len=INVALID_WORD;
goto end;
}
} else {
for (; *q &&(isdigit(*q)); q++)
tmp[pos++]=tolower(*q);
if (isalpha(*q)) { /* alphabetic char in number */
res.len=INVALID_WORD;
goto end;
}
}
res.word=tmp;
res.len=q-p1;
*p=q;
}
end:
# ifdef DEBUG
printf("next_word: [%s], %d\n", res.word? res.word: "NULL", res.len);
# endif
return res;
}
static int insert_word(word_count_word_t *words, word_t w, int pos)
{
#ifdef DEBUG
printf("insert words(len=%d word=[%.*s])\n", w.len, w.len, w.word);
#endif
memcpy(words[pos].text, w.word, w.len);
words[pos].text[w.len]=0;
words[pos].count=0;
return 1;
}
int count_words(const char *sentence, word_count_word_t *words)
{
word_t w;
int current=0, new, index;
hash_t *hash;
h_entry_t *e;
# ifdef DEBUG
const char *s=sentence;
# endif
hash=h_create(16);
# ifdef DEBUG
printf("count_words([%s], %p)\n", sentence, (void *)words);
# endif
for (; *sentence;) {
w=next_word(&sentence);
if (!w.word)
break;
if (w.len > MAX_WORD_LENGTH)
return EXCESSIVE_LENGTH_WORD;
if (!(e=h_entry_find(hash, (const unsigned char *)w.word, w.len))) {
if (current==MAX_WORDS)
return EXCESSIVE_NUMBER_OF_WORDS;
insert_word(words, w, current);
e=h_entry_add(hash, (void *)&words[current], w.len, &new);
current++;
}
index=(word_count_word_t *)e->data-&words[0];
words[index].count++;
//sentence=w.word+w.len;
//sentence+=w.len;
# ifdef DEBUG
printf("count_words: index=%d\n", index);
printf("offset=%d\n", (int)(sentence-s));
# endif
}
h_destroy(hash);
return current;
}
#ifdef UNIT_TEST
static word_count_word_t wtable[MAX_WORDS];
static void reset_wtable()
{
memset(wtable, 0, sizeof(wtable));
}
static void print_wtable(int n)
{
for (int i=0; i<n; ++i) {
printf ("%2d: %2d x \"%s\"\n", i, wtable[i].count, wtable[i].text);
//djb2_hash(wtable[i].text));
}
}
int main(int ac, char **av)
{
int arg=1;
int res;
//hash=h_create(16);
for (; arg<ac; ++arg) {
reset_wtable();
res=count_words(av[arg], wtable);
printf ("res=%d\n", res);
print_wtable(res);
}
//printf("h_destroy 1\n");
//h_destroy(hash);
//printf("h_destroy 1\n");
}
#endif

View File

@@ -0,0 +1,41 @@
#ifndef WORD_COUNT_H
#define WORD_COUNT_H
#define MAX_WORDS 20 // at most MAX_WORDS can be found in the test input string
#define MAX_WORD_LENGTH 50 // no individual word can exceed this length
// results structure
typedef struct word_count_word {
char text[MAX_WORD_LENGTH + 1]; // allow for the string to be null-terminated
int count;
} word_count_word_t;
#define EXCESSIVE_LENGTH_WORD -1
#define EXCESSIVE_NUMBER_OF_WORDS -2
#define INVALID_WORD -3
// count_words - routine to classify the unique words and their frequency in a sentence
// inputs:
// sentence = a null-terminated string containing that is analyzed
//
// outputs:
// words = allocated structure to record the words found and their frequency
// uniqueWords - number of words in the words structure
// returns a negative number if an error.
// words will contain the results up to that point.
int count_words(const char *sentence, word_count_word_t * words);
typedef struct {
char *word;
int len;
} word_t;
/* See GNUmakefile in following link for explanation
* https://exercism.io/my/solutions/103b2f7d92db42309c1988030f5202c7
*/
#ifdef TESTALL
#undef TEST_IGNORE
#define TEST_IGNORE() {}
#endif
#endif