initial commit

This commit is contained in:
2021-08-08 21:11:22 +02:00
commit fe7136d801
130 changed files with 6858 additions and 0 deletions

51
c/word-count/GNUmakefile Normal file
View File

@@ -0,0 +1,51 @@
# The original 'makefile' has a flaw:
# 1) it overrides CFLAGS
# 2) it does not pass extra "FLAGS" to $(CC) that could come from environment
#
# It means :
# - we need to edit 'makefile' for different builds (DEBUG, etc...), which is
# not practical at all.
# - Also, it does not allow to run all tests without editing the test source
# code.
#
# To use this makefile (GNU make only):
# "make": build with all predefined tests (without editing test source code)
# "make debugall": build with all predefined tests and debug code
# "make mem": perform memcheck with all tests enabled
# "make unit": build standalone (unit) test
# "make debug": build standalone test with debugging code
#
# Original 'makefile' targets can be used (test, memcheck, clean, ...)
.PHONY: default all mem unit debug std debugtest
default: all
# default is to build with all predefined tests
BUILD := teststall
include makefile
all: CFLAGS+=-DTESTALL
all: clean test
debugall: CFLAGS+=-DDEBUG
debugall: all
debugtest: CFLAGS+=-DDEBUG
debugtest: test
mem: CFLAGS+=-DTESTALL
mem: clean memcheck
unit: CFLAGS+=-DUNIT_TEST
unit: clean std
debug: CFLAGS+=-DUNIT_TEST -DDEBUG
debug: clean std
debugtest: CFLAGS+=-DDEBUG
debugtest: test
std: src/*.c src/*.h
$(CC) $(CFLAGS) src/*.c -o test.out

71
c/word-count/README.md Normal file
View File

@@ -0,0 +1,71 @@
# Word Count
Given a phrase, count the occurrences of each _word_ in that phrase.
For the purposes of this exercise you can expect that a _word_ will always be one of:
1. A _number_ composed of one or more ASCII digits (ie "0" or "1234") OR
2. A _simple word_ composed of one or more ASCII letters (ie "a" or "they") OR
3. A _contraction_ of two _simple words_ joined by a single apostrophe (ie "it's" or "they're")
When counting words you can assume the following rules:
1. The count is _case insensitive_ (ie "You", "you", and "YOU" are 3 uses of the same word)
2. The count is _unordered_; the tests will ignore how words and counts are ordered
3. Other than the apostrophe in a _contraction_ all forms of _punctuation_ are ignored
4. The words can be separated by _any_ form of whitespace (ie "\t", "\n", " ")
For example, for the phrase `"That's the password: 'PASSWORD 123'!", cried the Special Agent.\nSo I fled.` the count would be:
```text
that's: 1
the: 2
password: 2
123: 1
cried: 1
special: 1
agent: 1
so: 1
i: 1
fled: 1
```
- Note that the tests for this exercise expect the output words to be proper C strings. That is, they should be NUL terminated. See https://en.wikipedia.org/wiki/C_string_handling
## Getting Started
Make sure you have read the "Guides" section of the
[C track][c-track] on the Exercism site. This covers
the basic information on setting up the development environment expected
by the exercises.
## Passing the Tests
Get the first test compiling, linking and passing by following the [three
rules of test-driven development][3-tdd-rules].
The included makefile can be used to create and run the tests using the `test`
task.
make test
Create just the functions you need to satisfy any compiler errors and get the
test to fail. Then write just enough code to get the test to pass. Once you've
done that, move onto the next test.
As you progress through the tests, take the time to refactor your
implementation for readability and expressiveness and then go on to the next
test.
Try to use standard C99 facilities in preference to writing your own
low-level algorithms or facilities by hand.
## Source
This is a classic toy problem, but we were reminded of it by seeing it in the Go Tour.
## Submitting Incomplete Solutions
It's possible to submit an incomplete solution so you can see how others have completed the exercise.
[c-track]: https://exercism.io/my/tracks/c
[3-tdd-rules]: http://butunclebob.com/ArticleS.UncleBob.TheThreeRulesOfTdd

37
c/word-count/makefile Normal file
View File

@@ -0,0 +1,37 @@
### If you wish to use extra libraries (math.h for instance),
### add their flags here (-lm in our case) in the "LIBS" variable.
LIBS = -lm
###
CFLAGS = -std=c99
CFLAGS += -g
CFLAGS += -Wall
CFLAGS += -Wextra
CFLAGS += -pedantic
CFLAGS += -Werror
CFLAGS += -Wmissing-declarations
CFLAGS += -DUNITY_SUPPORT_64
ASANFLAGS = -fsanitize=address
ASANFLAGS += -fno-common
ASANFLAGS += -fno-omit-frame-pointer
.PHONY: test
test: tests.out
@./tests.out
.PHONY: memcheck
memcheck: test/*.c src/*.c src/*.h
@echo Compiling $@
@$(CC) $(ASANFLAGS) $(CFLAGS) src/*.c test/vendor/unity.c test/*.c -o memcheck.out $(LIBS)
@./memcheck.out
@echo "Memory check passed"
.PHONY: clean
clean:
rm -rf *.o *.out *.out.dSYM
tests.out: test/*.c src/*.c src/*.h
@echo Compiling $@
@$(CC) $(CFLAGS) src/*.c test/vendor/unity.c test/*.c -o tests.out $(LIBS)

124
c/word-count/src/hash.c Normal file
View File

@@ -0,0 +1,124 @@
#include <string.h>
#include <malloc.h>
#include "hash.h"
#define HASH_SIZE 50
//static hash_t *hash_table[HASH_SIZE];
static h_entry_t *pool_free, *alloc_entries;
static int n_entries;
void h_init(hash_t *hash)
{
memset(hash->entries, 0, sizeof(h_entry_t)*hash->size);
}
hash_t *h_create(int size)
{
hash_t *hash;
if ( !(hash=calloc(sizeof(hash_t) + size*(sizeof (h_entry_t *)), 1)) )
return NULL;
hash->size=size;
return hash;
}
void h_destroy(hash_t *h)
{
h_entry_t *tmp;
for (int i=0; i<h->size; ++i) {
while (h->entries[i]) {
tmp=h->entries[i]->next;
h_entry_free(h->entries[i]);
h->entries[i]=tmp;
}
}
free(h);
}
/* Daniel J. Bernstein's djb2 hash function */
unsigned long hash_djb2(const unsigned char *str, const int len)
{
unsigned long hash = 5381;
for (int i=0; i<len; ++i)
hash = ((hash << 5) + hash) + str[i]; /* hash * 33 + str[i] */
return hash;
}
void h_entry_free(h_entry_t *e)
{
e->next=pool_free;
pool_free=e;
}
h_entry_t *h_entry_find(hash_t *h, const unsigned char *s, const int l)
{
unsigned long hash=hash_djb2(s, l);
h_entry_t *entry;
int found=0;
# ifdef DEBUG
printf("h_entry_find([%.*s]): hash=%#lx (%lu) - ", l, s, hash, hash%h->size);
# endif
hash%=h->size;
for (entry=h->entries[hash]; entry; entry=entry->next) {
if (l == entry->key_len && !memcmp(entry->data, s, l)) {
found=1;
break;
}
}
# ifdef DEBUG
printf("ret=%p\n", found? (void *)entry: (void *)-1);
# endif
return found? entry: NULL;
}
h_entry_t *h_entry_add(hash_t *h, const unsigned char *s, const int l, int *insert)
{
unsigned long hash;
h_entry_t *entry;
*insert=0;
if (!pool_free) {
register int i=n_entries;
n_entries+=ENTRY_ALLOC_SIZE;
# ifdef DEBUG
printf("get_hash: allocating %d new entries - total entries=%d\n",
ENTRY_ALLOC_SIZE, n_entries);
# endif
alloc_entries=reallocarray(alloc_entries, n_entries, sizeof(h_entry_t));
for (; i<n_entries; ++i) { /* create free entries list */
(alloc_entries+i)->next=pool_free;
pool_free=alloc_entries+i;
}
}
if ((entry=h_entry_find(h, s, l)))
return entry;
*insert=1;
hash=hash_djb2(s, l)%h->size;
/* get a free entry from pool */
entry=pool_free;
pool_free=pool_free->next;
/* set entry in hash */
entry->next=h->entries[hash];
h->entries[hash]=entry;
entry->data=(unsigned char *)s;
entry->key_len=l;
//assert(entry!=freenodes);
# ifdef DEBUG
printf("h_entry_add: %p\n", (void *)entry);
# endif
return entry;
}

52
c/word-count/src/hash.h Normal file
View File

@@ -0,0 +1,52 @@
#ifndef _HASH_H
#define _HASH_H
#include <stdbool.h>
typedef struct h_entry {
void *data;
int key_len;
struct h_entry *prev, *next;
} h_entry_t;
typedef struct {
int size;
h_entry_t *entries[];
} hash_t;
/* a few predefined hash sizes */
typedef struct {
int size;
h_entry_t *entries[16];
} hash_16_t;
typedef struct {
int size;
h_entry_t *entries[128];
} hash_128_t;
typedef struct {
int size;
h_entry_t *entries[1024];
} hash_1024_t;
#define ENTRY_ALLOC_SIZE 20
/* hash map functions */
hash_t *h_create(int size);
void h_init(hash_t *);
void h_destroy(hash_t *);
/* static free_nodes */
void set_pool_free_static(h_entry_t *p);
/* hash entries functions */
h_entry_t *h_entry_add(hash_t *, const unsigned char *, const int, int *);
h_entry_t *h_entry_find(hash_t *, const unsigned char *, const int);
void h_entry_free(h_entry_t *);
/* hash function */
unsigned long hash_djb2(const unsigned char *str, const int len);
#endif

View File

@@ -0,0 +1,162 @@
#include <ctype.h>
#include <stdio.h>
#include <string.h>
#include "word_count.h"
#include "hash.h"
/* See GNUmakefile in following link for explanation
* https://exercism.io/my/solutions/103b2f7d92db42309c1988030f5202c7
*/
#if defined UNIT_TEST || defined DEBUG
#include <stdlib.h>
#endif
/* Some cases are not well explained. So I will consider words mixing
* alphabetic characters and digits as invalid, as well as multiple "'"
* inside an alphabetic word:
* The following will return INVALID_WORD:
* P2P
* 0xFF
* A'2
* The following will return 2 numbers/words:
* 1'2
* A''B
*/
/* get next word in string
*/
static word_t next_word(const char **p)
{
word_t res={NULL, 0};
const char *q, *p1=*p;
static char tmp[1024];
int pos=0;
# ifdef DEBUG
printf("next_word(%s)\n", *p);
# endif
for (; *p1 && !isalpha(*p1) && !isdigit(*p1); ++p1)
;
if (*p1) {
q=p1;
/* alphabetic word */
if (isalpha(*p1)) {
for (; *q &&(isalpha(*q) || *q=='\''); q++) {
if (*q=='\'' && *(q-1)=='\'') { /* two consecutive apostrophes */
res.len=INVALID_WORD;
goto end;
}
tmp[pos++]=tolower(*q);
}
if (*(q-1) == '\'')
q--;
else if (isdigit(*q)) { /* digit in word */
res.len=INVALID_WORD;
goto end;
}
} else {
for (; *q &&(isdigit(*q)); q++)
tmp[pos++]=tolower(*q);
if (isalpha(*q)) { /* alphabetic char in number */
res.len=INVALID_WORD;
goto end;
}
}
res.word=tmp;
res.len=q-p1;
*p=q;
}
end:
# ifdef DEBUG
printf("next_word: [%s], %d\n", res.word? res.word: "NULL", res.len);
# endif
return res;
}
static int insert_word(word_count_word_t *words, word_t w, int pos)
{
#ifdef DEBUG
printf("insert words(len=%d word=[%.*s])\n", w.len, w.len, w.word);
#endif
memcpy(words[pos].text, w.word, w.len);
words[pos].text[w.len]=0;
words[pos].count=0;
return 1;
}
int count_words(const char *sentence, word_count_word_t *words)
{
word_t w;
int current=0, new, index;
hash_t *hash;
h_entry_t *e;
# ifdef DEBUG
const char *s=sentence;
# endif
hash=h_create(16);
# ifdef DEBUG
printf("count_words([%s], %p)\n", sentence, (void *)words);
# endif
for (; *sentence;) {
w=next_word(&sentence);
if (!w.word)
break;
if (w.len > MAX_WORD_LENGTH)
return EXCESSIVE_LENGTH_WORD;
if (!(e=h_entry_find(hash, (const unsigned char *)w.word, w.len))) {
if (current==MAX_WORDS)
return EXCESSIVE_NUMBER_OF_WORDS;
insert_word(words, w, current);
e=h_entry_add(hash, (void *)&words[current], w.len, &new);
current++;
}
index=(word_count_word_t *)e->data-&words[0];
words[index].count++;
//sentence=w.word+w.len;
//sentence+=w.len;
# ifdef DEBUG
printf("count_words: index=%d\n", index);
printf("offset=%d\n", (int)(sentence-s));
# endif
}
h_destroy(hash);
return current;
}
#ifdef UNIT_TEST
static word_count_word_t wtable[MAX_WORDS];
static void reset_wtable()
{
memset(wtable, 0, sizeof(wtable));
}
static void print_wtable(int n)
{
for (int i=0; i<n; ++i) {
printf ("%2d: %2d x \"%s\"\n", i, wtable[i].count, wtable[i].text);
//djb2_hash(wtable[i].text));
}
}
int main(int ac, char **av)
{
int arg=1;
int res;
//hash=h_create(16);
for (; arg<ac; ++arg) {
reset_wtable();
res=count_words(av[arg], wtable);
printf ("res=%d\n", res);
print_wtable(res);
}
//printf("h_destroy 1\n");
//h_destroy(hash);
//printf("h_destroy 1\n");
}
#endif

View File

@@ -0,0 +1,41 @@
#ifndef WORD_COUNT_H
#define WORD_COUNT_H
#define MAX_WORDS 20 // at most MAX_WORDS can be found in the test input string
#define MAX_WORD_LENGTH 50 // no individual word can exceed this length
// results structure
typedef struct word_count_word {
char text[MAX_WORD_LENGTH + 1]; // allow for the string to be null-terminated
int count;
} word_count_word_t;
#define EXCESSIVE_LENGTH_WORD -1
#define EXCESSIVE_NUMBER_OF_WORDS -2
#define INVALID_WORD -3
// count_words - routine to classify the unique words and their frequency in a sentence
// inputs:
// sentence = a null-terminated string containing that is analyzed
//
// outputs:
// words = allocated structure to record the words found and their frequency
// uniqueWords - number of words in the words structure
// returns a negative number if an error.
// words will contain the results up to that point.
int count_words(const char *sentence, word_count_word_t * words);
typedef struct {
char *word;
int len;
} word_t;
/* See GNUmakefile in following link for explanation
* https://exercism.io/my/solutions/103b2f7d92db42309c1988030f5202c7
*/
#ifdef TESTALL
#undef TEST_IGNORE
#define TEST_IGNORE() {}
#endif
#endif