initial commit

This commit is contained in:
2021-08-08 21:11:22 +02:00
commit fe7136d801
130 changed files with 6858 additions and 0 deletions

View File

@@ -0,0 +1,162 @@
#include <ctype.h>
#include <stdio.h>
#include <string.h>
#include "word_count.h"
#include "hash.h"
/* See GNUmakefile in following link for explanation
* https://exercism.io/my/solutions/103b2f7d92db42309c1988030f5202c7
*/
#if defined UNIT_TEST || defined DEBUG
#include <stdlib.h>
#endif
/* Some cases are not well explained. So I will consider words mixing
* alphabetic characters and digits as invalid, as well as multiple "'"
* inside an alphabetic word:
* The following will return INVALID_WORD:
* P2P
* 0xFF
* A'2
* The following will return 2 numbers/words:
* 1'2
* A''B
*/
/* get next word in string
*/
static word_t next_word(const char **p)
{
word_t res={NULL, 0};
const char *q, *p1=*p;
static char tmp[1024];
int pos=0;
# ifdef DEBUG
printf("next_word(%s)\n", *p);
# endif
for (; *p1 && !isalpha(*p1) && !isdigit(*p1); ++p1)
;
if (*p1) {
q=p1;
/* alphabetic word */
if (isalpha(*p1)) {
for (; *q &&(isalpha(*q) || *q=='\''); q++) {
if (*q=='\'' && *(q-1)=='\'') { /* two consecutive apostrophes */
res.len=INVALID_WORD;
goto end;
}
tmp[pos++]=tolower(*q);
}
if (*(q-1) == '\'')
q--;
else if (isdigit(*q)) { /* digit in word */
res.len=INVALID_WORD;
goto end;
}
} else {
for (; *q &&(isdigit(*q)); q++)
tmp[pos++]=tolower(*q);
if (isalpha(*q)) { /* alphabetic char in number */
res.len=INVALID_WORD;
goto end;
}
}
res.word=tmp;
res.len=q-p1;
*p=q;
}
end:
# ifdef DEBUG
printf("next_word: [%s], %d\n", res.word? res.word: "NULL", res.len);
# endif
return res;
}
static int insert_word(word_count_word_t *words, word_t w, int pos)
{
#ifdef DEBUG
printf("insert words(len=%d word=[%.*s])\n", w.len, w.len, w.word);
#endif
memcpy(words[pos].text, w.word, w.len);
words[pos].text[w.len]=0;
words[pos].count=0;
return 1;
}
int count_words(const char *sentence, word_count_word_t *words)
{
word_t w;
int current=0, new, index;
hash_t *hash;
h_entry_t *e;
# ifdef DEBUG
const char *s=sentence;
# endif
hash=h_create(16);
# ifdef DEBUG
printf("count_words([%s], %p)\n", sentence, (void *)words);
# endif
for (; *sentence;) {
w=next_word(&sentence);
if (!w.word)
break;
if (w.len > MAX_WORD_LENGTH)
return EXCESSIVE_LENGTH_WORD;
if (!(e=h_entry_find(hash, (const unsigned char *)w.word, w.len))) {
if (current==MAX_WORDS)
return EXCESSIVE_NUMBER_OF_WORDS;
insert_word(words, w, current);
e=h_entry_add(hash, (void *)&words[current], w.len, &new);
current++;
}
index=(word_count_word_t *)e->data-&words[0];
words[index].count++;
//sentence=w.word+w.len;
//sentence+=w.len;
# ifdef DEBUG
printf("count_words: index=%d\n", index);
printf("offset=%d\n", (int)(sentence-s));
# endif
}
h_destroy(hash);
return current;
}
#ifdef UNIT_TEST
static word_count_word_t wtable[MAX_WORDS];
static void reset_wtable()
{
memset(wtable, 0, sizeof(wtable));
}
static void print_wtable(int n)
{
for (int i=0; i<n; ++i) {
printf ("%2d: %2d x \"%s\"\n", i, wtable[i].count, wtable[i].text);
//djb2_hash(wtable[i].text));
}
}
int main(int ac, char **av)
{
int arg=1;
int res;
//hash=h_create(16);
for (; arg<ac; ++arg) {
reset_wtable();
res=count_words(av[arg], wtable);
printf ("res=%d\n", res);
print_wtable(res);
}
//printf("h_destroy 1\n");
//h_destroy(hash);
//printf("h_destroy 1\n");
}
#endif