initial commit
This commit is contained in:
124
c/word-count/src/hash.c
Normal file
124
c/word-count/src/hash.c
Normal file
@@ -0,0 +1,124 @@
|
||||
#include <string.h>
|
||||
#include <malloc.h>
|
||||
|
||||
#include "hash.h"
|
||||
|
||||
#define HASH_SIZE 50
|
||||
|
||||
//static hash_t *hash_table[HASH_SIZE];
|
||||
static h_entry_t *pool_free, *alloc_entries;
|
||||
static int n_entries;
|
||||
|
||||
void h_init(hash_t *hash)
|
||||
{
|
||||
memset(hash->entries, 0, sizeof(h_entry_t)*hash->size);
|
||||
}
|
||||
|
||||
hash_t *h_create(int size)
|
||||
{
|
||||
hash_t *hash;
|
||||
|
||||
if ( !(hash=calloc(sizeof(hash_t) + size*(sizeof (h_entry_t *)), 1)) )
|
||||
return NULL;
|
||||
hash->size=size;
|
||||
return hash;
|
||||
}
|
||||
|
||||
void h_destroy(hash_t *h)
|
||||
{
|
||||
h_entry_t *tmp;
|
||||
|
||||
for (int i=0; i<h->size; ++i) {
|
||||
while (h->entries[i]) {
|
||||
tmp=h->entries[i]->next;
|
||||
h_entry_free(h->entries[i]);
|
||||
h->entries[i]=tmp;
|
||||
}
|
||||
}
|
||||
free(h);
|
||||
}
|
||||
|
||||
/* Daniel J. Bernstein's djb2 hash function */
|
||||
unsigned long hash_djb2(const unsigned char *str, const int len)
|
||||
{
|
||||
unsigned long hash = 5381;
|
||||
|
||||
for (int i=0; i<len; ++i)
|
||||
hash = ((hash << 5) + hash) + str[i]; /* hash * 33 + str[i] */
|
||||
return hash;
|
||||
}
|
||||
|
||||
void h_entry_free(h_entry_t *e)
|
||||
{
|
||||
e->next=pool_free;
|
||||
pool_free=e;
|
||||
}
|
||||
|
||||
h_entry_t *h_entry_find(hash_t *h, const unsigned char *s, const int l)
|
||||
{
|
||||
unsigned long hash=hash_djb2(s, l);
|
||||
h_entry_t *entry;
|
||||
int found=0;
|
||||
|
||||
# ifdef DEBUG
|
||||
printf("h_entry_find([%.*s]): hash=%#lx (%lu) - ", l, s, hash, hash%h->size);
|
||||
# endif
|
||||
hash%=h->size;
|
||||
for (entry=h->entries[hash]; entry; entry=entry->next) {
|
||||
if (l == entry->key_len && !memcmp(entry->data, s, l)) {
|
||||
found=1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
# ifdef DEBUG
|
||||
printf("ret=%p\n", found? (void *)entry: (void *)-1);
|
||||
# endif
|
||||
return found? entry: NULL;
|
||||
}
|
||||
|
||||
h_entry_t *h_entry_add(hash_t *h, const unsigned char *s, const int l, int *insert)
|
||||
{
|
||||
unsigned long hash;
|
||||
h_entry_t *entry;
|
||||
|
||||
*insert=0;
|
||||
|
||||
if (!pool_free) {
|
||||
register int i=n_entries;
|
||||
|
||||
n_entries+=ENTRY_ALLOC_SIZE;
|
||||
# ifdef DEBUG
|
||||
printf("get_hash: allocating %d new entries - total entries=%d\n",
|
||||
ENTRY_ALLOC_SIZE, n_entries);
|
||||
# endif
|
||||
alloc_entries=reallocarray(alloc_entries, n_entries, sizeof(h_entry_t));
|
||||
|
||||
for (; i<n_entries; ++i) { /* create free entries list */
|
||||
(alloc_entries+i)->next=pool_free;
|
||||
pool_free=alloc_entries+i;
|
||||
}
|
||||
}
|
||||
if ((entry=h_entry_find(h, s, l)))
|
||||
return entry;
|
||||
|
||||
*insert=1;
|
||||
|
||||
hash=hash_djb2(s, l)%h->size;
|
||||
|
||||
/* get a free entry from pool */
|
||||
entry=pool_free;
|
||||
pool_free=pool_free->next;
|
||||
|
||||
/* set entry in hash */
|
||||
entry->next=h->entries[hash];
|
||||
h->entries[hash]=entry;
|
||||
|
||||
entry->data=(unsigned char *)s;
|
||||
entry->key_len=l;
|
||||
|
||||
//assert(entry!=freenodes);
|
||||
# ifdef DEBUG
|
||||
printf("h_entry_add: %p\n", (void *)entry);
|
||||
# endif
|
||||
return entry;
|
||||
}
|
52
c/word-count/src/hash.h
Normal file
52
c/word-count/src/hash.h
Normal file
@@ -0,0 +1,52 @@
|
||||
#ifndef _HASH_H
|
||||
#define _HASH_H
|
||||
|
||||
#include <stdbool.h>
|
||||
|
||||
typedef struct h_entry {
|
||||
void *data;
|
||||
int key_len;
|
||||
struct h_entry *prev, *next;
|
||||
} h_entry_t;
|
||||
|
||||
typedef struct {
|
||||
int size;
|
||||
h_entry_t *entries[];
|
||||
} hash_t;
|
||||
|
||||
/* a few predefined hash sizes */
|
||||
typedef struct {
|
||||
int size;
|
||||
h_entry_t *entries[16];
|
||||
} hash_16_t;
|
||||
|
||||
typedef struct {
|
||||
int size;
|
||||
h_entry_t *entries[128];
|
||||
} hash_128_t;
|
||||
|
||||
typedef struct {
|
||||
int size;
|
||||
h_entry_t *entries[1024];
|
||||
} hash_1024_t;
|
||||
|
||||
#define ENTRY_ALLOC_SIZE 20
|
||||
|
||||
/* hash map functions */
|
||||
hash_t *h_create(int size);
|
||||
void h_init(hash_t *);
|
||||
void h_destroy(hash_t *);
|
||||
|
||||
/* static free_nodes */
|
||||
void set_pool_free_static(h_entry_t *p);
|
||||
|
||||
/* hash entries functions */
|
||||
h_entry_t *h_entry_add(hash_t *, const unsigned char *, const int, int *);
|
||||
h_entry_t *h_entry_find(hash_t *, const unsigned char *, const int);
|
||||
void h_entry_free(h_entry_t *);
|
||||
|
||||
|
||||
/* hash function */
|
||||
unsigned long hash_djb2(const unsigned char *str, const int len);
|
||||
|
||||
#endif
|
162
c/word-count/src/word_count.c
Normal file
162
c/word-count/src/word_count.c
Normal file
@@ -0,0 +1,162 @@
|
||||
#include <ctype.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "word_count.h"
|
||||
#include "hash.h"
|
||||
|
||||
/* See GNUmakefile in following link for explanation
|
||||
* https://exercism.io/my/solutions/103b2f7d92db42309c1988030f5202c7
|
||||
*/
|
||||
#if defined UNIT_TEST || defined DEBUG
|
||||
#include <stdlib.h>
|
||||
#endif
|
||||
|
||||
/* Some cases are not well explained. So I will consider words mixing
|
||||
* alphabetic characters and digits as invalid, as well as multiple "'"
|
||||
* inside an alphabetic word:
|
||||
* The following will return INVALID_WORD:
|
||||
* P2P
|
||||
* 0xFF
|
||||
* A'2
|
||||
* The following will return 2 numbers/words:
|
||||
* 1'2
|
||||
* A''B
|
||||
*/
|
||||
|
||||
/* get next word in string
|
||||
*/
|
||||
static word_t next_word(const char **p)
|
||||
{
|
||||
word_t res={NULL, 0};
|
||||
const char *q, *p1=*p;
|
||||
static char tmp[1024];
|
||||
int pos=0;
|
||||
|
||||
# ifdef DEBUG
|
||||
printf("next_word(%s)\n", *p);
|
||||
# endif
|
||||
for (; *p1 && !isalpha(*p1) && !isdigit(*p1); ++p1)
|
||||
;
|
||||
|
||||
if (*p1) {
|
||||
q=p1;
|
||||
/* alphabetic word */
|
||||
if (isalpha(*p1)) {
|
||||
for (; *q &&(isalpha(*q) || *q=='\''); q++) {
|
||||
if (*q=='\'' && *(q-1)=='\'') { /* two consecutive apostrophes */
|
||||
res.len=INVALID_WORD;
|
||||
goto end;
|
||||
}
|
||||
tmp[pos++]=tolower(*q);
|
||||
}
|
||||
if (*(q-1) == '\'')
|
||||
q--;
|
||||
else if (isdigit(*q)) { /* digit in word */
|
||||
res.len=INVALID_WORD;
|
||||
goto end;
|
||||
}
|
||||
} else {
|
||||
for (; *q &&(isdigit(*q)); q++)
|
||||
tmp[pos++]=tolower(*q);
|
||||
if (isalpha(*q)) { /* alphabetic char in number */
|
||||
res.len=INVALID_WORD;
|
||||
goto end;
|
||||
}
|
||||
|
||||
}
|
||||
res.word=tmp;
|
||||
res.len=q-p1;
|
||||
*p=q;
|
||||
}
|
||||
end:
|
||||
# ifdef DEBUG
|
||||
printf("next_word: [%s], %d\n", res.word? res.word: "NULL", res.len);
|
||||
# endif
|
||||
return res;
|
||||
}
|
||||
|
||||
static int insert_word(word_count_word_t *words, word_t w, int pos)
|
||||
{
|
||||
#ifdef DEBUG
|
||||
printf("insert words(len=%d word=[%.*s])\n", w.len, w.len, w.word);
|
||||
#endif
|
||||
|
||||
memcpy(words[pos].text, w.word, w.len);
|
||||
words[pos].text[w.len]=0;
|
||||
words[pos].count=0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
int count_words(const char *sentence, word_count_word_t *words)
|
||||
{
|
||||
word_t w;
|
||||
int current=0, new, index;
|
||||
hash_t *hash;
|
||||
h_entry_t *e;
|
||||
# ifdef DEBUG
|
||||
const char *s=sentence;
|
||||
# endif
|
||||
|
||||
hash=h_create(16);
|
||||
# ifdef DEBUG
|
||||
printf("count_words([%s], %p)\n", sentence, (void *)words);
|
||||
# endif
|
||||
for (; *sentence;) {
|
||||
w=next_word(&sentence);
|
||||
if (!w.word)
|
||||
break;
|
||||
if (w.len > MAX_WORD_LENGTH)
|
||||
return EXCESSIVE_LENGTH_WORD;
|
||||
if (!(e=h_entry_find(hash, (const unsigned char *)w.word, w.len))) {
|
||||
if (current==MAX_WORDS)
|
||||
return EXCESSIVE_NUMBER_OF_WORDS;
|
||||
insert_word(words, w, current);
|
||||
e=h_entry_add(hash, (void *)&words[current], w.len, &new);
|
||||
current++;
|
||||
}
|
||||
index=(word_count_word_t *)e->data-&words[0];
|
||||
words[index].count++;
|
||||
//sentence=w.word+w.len;
|
||||
//sentence+=w.len;
|
||||
# ifdef DEBUG
|
||||
printf("count_words: index=%d\n", index);
|
||||
printf("offset=%d\n", (int)(sentence-s));
|
||||
# endif
|
||||
}
|
||||
h_destroy(hash);
|
||||
return current;
|
||||
}
|
||||
|
||||
#ifdef UNIT_TEST
|
||||
static word_count_word_t wtable[MAX_WORDS];
|
||||
|
||||
static void reset_wtable()
|
||||
{
|
||||
memset(wtable, 0, sizeof(wtable));
|
||||
}
|
||||
static void print_wtable(int n)
|
||||
{
|
||||
for (int i=0; i<n; ++i) {
|
||||
printf ("%2d: %2d x \"%s\"\n", i, wtable[i].count, wtable[i].text);
|
||||
//djb2_hash(wtable[i].text));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
int main(int ac, char **av)
|
||||
{
|
||||
int arg=1;
|
||||
int res;
|
||||
//hash=h_create(16);
|
||||
for (; arg<ac; ++arg) {
|
||||
reset_wtable();
|
||||
res=count_words(av[arg], wtable);
|
||||
printf ("res=%d\n", res);
|
||||
print_wtable(res);
|
||||
}
|
||||
//printf("h_destroy 1\n");
|
||||
//h_destroy(hash);
|
||||
//printf("h_destroy 1\n");
|
||||
}
|
||||
#endif
|
41
c/word-count/src/word_count.h
Normal file
41
c/word-count/src/word_count.h
Normal file
@@ -0,0 +1,41 @@
|
||||
#ifndef WORD_COUNT_H
|
||||
#define WORD_COUNT_H
|
||||
|
||||
#define MAX_WORDS 20 // at most MAX_WORDS can be found in the test input string
|
||||
#define MAX_WORD_LENGTH 50 // no individual word can exceed this length
|
||||
|
||||
// results structure
|
||||
typedef struct word_count_word {
|
||||
char text[MAX_WORD_LENGTH + 1]; // allow for the string to be null-terminated
|
||||
int count;
|
||||
} word_count_word_t;
|
||||
|
||||
#define EXCESSIVE_LENGTH_WORD -1
|
||||
#define EXCESSIVE_NUMBER_OF_WORDS -2
|
||||
#define INVALID_WORD -3
|
||||
|
||||
// count_words - routine to classify the unique words and their frequency in a sentence
|
||||
// inputs:
|
||||
// sentence = a null-terminated string containing that is analyzed
|
||||
//
|
||||
// outputs:
|
||||
// words = allocated structure to record the words found and their frequency
|
||||
// uniqueWords - number of words in the words structure
|
||||
// returns a negative number if an error.
|
||||
// words will contain the results up to that point.
|
||||
int count_words(const char *sentence, word_count_word_t * words);
|
||||
|
||||
typedef struct {
|
||||
char *word;
|
||||
int len;
|
||||
} word_t;
|
||||
|
||||
/* See GNUmakefile in following link for explanation
|
||||
* https://exercism.io/my/solutions/103b2f7d92db42309c1988030f5202c7
|
||||
*/
|
||||
#ifdef TESTALL
|
||||
#undef TEST_IGNORE
|
||||
#define TEST_IGNORE() {}
|
||||
#endif
|
||||
|
||||
#endif
|
Reference in New Issue
Block a user