initial commit

2021-08-08 21:11:22 +02:00
commit fe7136d801
130 changed files with 6858 additions and 0 deletions
--- a/c/word-count/src/hash.c
+++ b/c/word-count/src/hash.c
@@ -0,0 +1,124 @@
+#include <string.h>
+#include <malloc.h>
+
+#include "hash.h"
+
+#define HASH_SIZE 50
+
+//static hash_t *hash_table[HASH_SIZE];
+static h_entry_t *pool_free, *alloc_entries;
+static int n_entries;
+
+void h_init(hash_t *hash)
+{
+    memset(hash->entries, 0, sizeof(h_entry_t)*hash->size);
+}
+
+hash_t *h_create(int size)
+{
+    hash_t *hash;
+
+    if ( !(hash=calloc(sizeof(hash_t) + size*(sizeof (h_entry_t *)), 1)) )
+        return NULL;
+    hash->size=size;
+    return hash;
+}
+
+void h_destroy(hash_t *h)
+{
+    h_entry_t *tmp;
+
+    for (int i=0; i<h->size; ++i) {
+        while (h->entries[i]) {
+            tmp=h->entries[i]->next;
+            h_entry_free(h->entries[i]);
+            h->entries[i]=tmp;
+        }
+    }
+    free(h);
+}
+
+/*  Daniel J. Bernstein's djb2 hash function */
+unsigned long hash_djb2(const unsigned char *str, const int len)
+{
+    unsigned long hash = 5381;
+
+    for (int i=0; i<len; ++i)
+        hash = ((hash << 5) + hash) + str[i]; /* hash * 33 + str[i] */
+    return hash;
+}
+
+void h_entry_free(h_entry_t *e)
+{
+    e->next=pool_free;
+    pool_free=e;
+}
+
+h_entry_t *h_entry_find(hash_t *h, const unsigned char *s, const int l)
+{
+    unsigned long hash=hash_djb2(s, l);
+    h_entry_t *entry;
+    int found=0;
+
+#   ifdef DEBUG
+    printf("h_entry_find([%.*s]): hash=%#lx (%lu) - ", l, s, hash, hash%h->size);
+#   endif
+    hash%=h->size;
+    for (entry=h->entries[hash]; entry; entry=entry->next) {
+        if (l == entry->key_len && !memcmp(entry->data, s, l)) {
+            found=1;
+            break;
+        }
+    }
+#   ifdef DEBUG
+    printf("ret=%p\n", found? (void *)entry: (void *)-1);
+#   endif
+    return found? entry: NULL;
+}
+
+h_entry_t *h_entry_add(hash_t *h, const unsigned char *s, const int l, int *insert)
+{
+    unsigned long hash;
+    h_entry_t *entry;
+
+    *insert=0;
+
+    if (!pool_free) {
+        register int i=n_entries;
+
+        n_entries+=ENTRY_ALLOC_SIZE;
+#       ifdef DEBUG
+        printf("get_hash: allocating %d new entries - total entries=%d\n",
+               ENTRY_ALLOC_SIZE, n_entries);
+#       endif
+        alloc_entries=reallocarray(alloc_entries, n_entries, sizeof(h_entry_t));
+
+        for (; i<n_entries; ++i) {                /* create free entries list */
+            (alloc_entries+i)->next=pool_free;
+            pool_free=alloc_entries+i;
+        }
+    }
+    if ((entry=h_entry_find(h, s, l)))
+        return entry;
+
+    *insert=1;
+
+    hash=hash_djb2(s, l)%h->size;
+
+    /* get a free entry from pool */
+    entry=pool_free;
+    pool_free=pool_free->next;
+
+    /* set entry in hash */
+    entry->next=h->entries[hash];
+    h->entries[hash]=entry;
+
+    entry->data=(unsigned char *)s;
+    entry->key_len=l;
+
+    //assert(entry!=freenodes);
+#   ifdef DEBUG
+    printf("h_entry_add: %p\n", (void *)entry);
+#   endif
+    return entry;
+}
--- a/c/word-count/src/hash.h
+++ b/c/word-count/src/hash.h
@@ -0,0 +1,52 @@
+#ifndef _HASH_H
+#define _HASH_H
+
+#include <stdbool.h>
+
+typedef struct h_entry {
+    void *data;
+    int key_len;
+    struct h_entry *prev, *next;
+} h_entry_t;
+
+typedef struct {
+    int size;
+    h_entry_t *entries[];
+} hash_t;
+
+/* a few predefined hash sizes */
+typedef struct {
+    int size;
+    h_entry_t *entries[16];
+} hash_16_t;
+
+typedef struct {
+    int size;
+    h_entry_t *entries[128];
+} hash_128_t;
+
+typedef struct {
+    int size;
+    h_entry_t *entries[1024];
+} hash_1024_t;
+
+#define ENTRY_ALLOC_SIZE 20
+
+/* hash map functions */
+hash_t *h_create(int size);
+void h_init(hash_t *);
+void h_destroy(hash_t *);
+
+/* static free_nodes */
+void set_pool_free_static(h_entry_t *p);
+
+/* hash entries functions */
+h_entry_t *h_entry_add(hash_t *, const unsigned char *, const int, int *);
+h_entry_t *h_entry_find(hash_t *, const unsigned char *, const int);
+void h_entry_free(h_entry_t *);
+
+
+/* hash function */
+unsigned long hash_djb2(const unsigned char *str, const int len);
+
+#endif
--- a/c/word-count/src/word_count.c
+++ b/c/word-count/src/word_count.c
@@ -0,0 +1,162 @@
+#include <ctype.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "word_count.h"
+#include "hash.h"
+
+/* See GNUmakefile in following link for explanation
+ * https://exercism.io/my/solutions/103b2f7d92db42309c1988030f5202c7
+ */
+#if defined UNIT_TEST || defined DEBUG
+#include <stdlib.h>
+#endif
+
+/* Some cases are not well explained. So I will consider words mixing
+ * alphabetic characters and digits as invalid, as well as multiple "'"
+ * inside an alphabetic word:
+ * The following will return INVALID_WORD:
+ *   P2P
+ *   0xFF
+ *   A'2
+ * The following will return 2 numbers/words:
+ *   1'2
+ *   A''B
+ */
+
+/* get next word in string
+ */
+static word_t next_word(const char **p)
+{
+    word_t res={NULL, 0};
+    const char *q, *p1=*p;
+    static char tmp[1024];
+    int pos=0;
+
+#   ifdef DEBUG
+    printf("next_word(%s)\n", *p);
+#   endif
+    for (; *p1 && !isalpha(*p1) && !isdigit(*p1); ++p1)
+        ;
+
+    if (*p1) {
+        q=p1;
+        /* alphabetic word */
+        if (isalpha(*p1)) {
+            for (; *q &&(isalpha(*q) || *q=='\''); q++) {
+                if (*q=='\'' && *(q-1)=='\'') {   /* two consecutive apostrophes */
+                    res.len=INVALID_WORD;
+                    goto end;
+                }
+                tmp[pos++]=tolower(*q);
+            }
+            if (*(q-1) == '\'')
+                q--;
+            else if (isdigit(*q)) {               /* digit in word */
+                res.len=INVALID_WORD;
+                goto end;
+            }
+        } else {
+            for (; *q &&(isdigit(*q)); q++)
+                tmp[pos++]=tolower(*q);
+            if (isalpha(*q)) {                    /* alphabetic char in number */
+                res.len=INVALID_WORD;
+                goto end;
+            }
+
+        }
+        res.word=tmp;
+        res.len=q-p1;
+        *p=q;
+    }
+end:
+#   ifdef DEBUG
+    printf("next_word: [%s], %d\n", res.word? res.word: "NULL", res.len);
+#   endif
+    return res;
+}
+
+static int insert_word(word_count_word_t *words, word_t w, int pos)
+{
+#ifdef DEBUG
+    printf("insert words(len=%d word=[%.*s])\n", w.len, w.len, w.word);
+#endif
+
+    memcpy(words[pos].text, w.word, w.len);
+    words[pos].text[w.len]=0;
+    words[pos].count=0;
+    return 1;
+}
+
+int count_words(const char *sentence, word_count_word_t *words)
+{
+    word_t w;
+    int current=0, new, index;
+    hash_t *hash;
+    h_entry_t *e;
+#   ifdef DEBUG
+    const char *s=sentence;
+#   endif
+
+    hash=h_create(16);
+#   ifdef DEBUG
+    printf("count_words([%s], %p)\n", sentence, (void *)words);
+#   endif
+    for (; *sentence;) {
+        w=next_word(&sentence);
+        if (!w.word)
+            break;
+        if (w.len > MAX_WORD_LENGTH)
+            return EXCESSIVE_LENGTH_WORD;
+        if (!(e=h_entry_find(hash, (const unsigned char *)w.word, w.len))) {
+            if (current==MAX_WORDS)
+                return EXCESSIVE_NUMBER_OF_WORDS;
+            insert_word(words, w, current);
+            e=h_entry_add(hash, (void *)&words[current], w.len, &new);
+            current++;
+        }
+        index=(word_count_word_t *)e->data-&words[0];
+        words[index].count++;
+        //sentence=w.word+w.len;
+        //sentence+=w.len;
+#       ifdef DEBUG
+        printf("count_words: index=%d\n", index);
+        printf("offset=%d\n", (int)(sentence-s));
+#       endif
+    }
+    h_destroy(hash);
+    return current;
+}
+
+#ifdef UNIT_TEST
+static word_count_word_t wtable[MAX_WORDS];
+
+static void reset_wtable()
+{
+    memset(wtable, 0, sizeof(wtable));
+}
+static void print_wtable(int n)
+{
+    for (int i=0; i<n; ++i) {
+        printf ("%2d: %2d x \"%s\"\n", i, wtable[i].count, wtable[i].text);
+                //djb2_hash(wtable[i].text));
+    }
+
+}
+
+int main(int ac, char **av)
+{
+    int arg=1;
+    int res;
+    //hash=h_create(16);
+    for (; arg<ac; ++arg) {
+        reset_wtable();
+        res=count_words(av[arg], wtable);
+        printf ("res=%d\n", res);
+        print_wtable(res);
+    }
+    //printf("h_destroy 1\n");
+    //h_destroy(hash);
+    //printf("h_destroy 1\n");
+}
+#endif
--- a/c/word-count/src/word_count.h
+++ b/c/word-count/src/word_count.h
@@ -0,0 +1,41 @@
+#ifndef WORD_COUNT_H
+#define WORD_COUNT_H
+
+#define MAX_WORDS 20            // at most MAX_WORDS can be found in the test input string
+#define MAX_WORD_LENGTH 50      // no individual word can exceed this length
+
+// results structure
+typedef struct word_count_word {
+   char text[MAX_WORD_LENGTH + 1];      // allow for the string to be null-terminated
+   int count;
+} word_count_word_t;
+
+#define EXCESSIVE_LENGTH_WORD     -1
+#define EXCESSIVE_NUMBER_OF_WORDS -2
+#define INVALID_WORD              -3
+
+// count_words - routine to classify the unique words and their frequency in a sentence
+// inputs:
+//    sentence =  a null-terminated string containing that is analyzed
+//
+// outputs:
+//    words = allocated structure to record the words found and their frequency
+//    uniqueWords - number of words in the words structure
+//           returns a negative number if an error.
+//           words will contain the results up to that point.
+int count_words(const char *sentence, word_count_word_t * words);
+
+typedef struct {
+    char *word;
+    int len;
+} word_t;
+
+/* See GNUmakefile in following link for explanation
+ * https://exercism.io/my/solutions/103b2f7d92db42309c1988030f5202c7
+ */
+#ifdef  TESTALL
+#undef  TEST_IGNORE
+#define TEST_IGNORE() {}
+#endif
+
+#endif