C: run-length-encoding

This commit is contained in:
2021-09-10 18:21:53 +02:00
parent 9a3d216a65
commit 67b2d3ec40
9 changed files with 544 additions and 0 deletions

View File

@@ -0,0 +1,56 @@
# The original 'makefile' has a flaw:
# 1) it overrides CFLAGS
# 2) it does not pass extra "FLAGS" to $(CC) that could come from environment
#
# It means :
# - we need to edit 'makefile' for different builds (DEBUG, etc...), which is
# not practical at all.
# - Also, it does not allow to run all tests without editing the test source
# code.
#
# To use this makefile (GNU make only):
# 1) copy it into exercise directory
# 2) add ex.h to exercise include file
# 3) add ex.c to exercise source code, and create a suitable main function
# 4) use make with one of the following targets :
# all: compile and run all predefined tests.
# nowarn: compile with no -Werror, and run all predefined tests
# debug: compile with -DDEBUG and run all predefined tests
# mem: perform memcheck with all tests enabled
# unit: build standalone (unit) bimary
# unitnowarn: build standalone (unit) binary with -Werror disabled
# unitdebug: build standalone binary with -DDEBUG
#
# Original 'makefile' targets can be used (test, memcheck, clean, ...)
.PHONY: default all nowarn debug mem unit unitnowarn unitdebug standalone
default: all
ALLSOURCES:=$(wildcard ./*.c)
TESTSOURCES:=$(wildcard ./test_*.c)
SRC:=$(filter-out $(TESTSOURCES),$(ALLSOURCES))
include makefile
all: CFLAGS+=-DTESTALL
all: clean test
nowarn: CFLAGS:=$(filter-out -Werror,$(CFLAGS))
nowarn: clean all
debug: CFLAGS+=-DDEBUG
debug: all
mem: CFLAGS+=-DTESTALL
mem: clean memcheck
unitnowarn: CFLAGS:=$(filter-out -Werror,$(CFLAGS))
unitnowarn: clean unit
unitdebug: CFLAGS+=-DDEBUG
unitdebug: clean unit
unit: CFLAGS+=-DUNIT_TEST
unit: *.c *.h
$(CC) $(CFLAGS) $(SRC) -o tests.out $(LIBS)

View File

@@ -0,0 +1,63 @@
# Help
## Running the tests
Get the first test compiling, linking and passing by following the [three rules of test-driven development][3-tdd-rules].
The included makefile can be used to create and run the tests using the `test` task.
```console
$ make test
```
Create just the functions you need to satisfy any compiler errors and get the test to fail.
Then write just enough code to get the test to pass.
Once you've done that, move onto the next test.
As you progress through the tests, take the time to refactor your implementation for readability and expressiveness and then go on to the next test.
Try to use standard C99 facilities in preference to writing your own low-level algorithms or facilities by hand.
[3-tdd-rules]: http://butunclebob.com/ArticleS.UncleBob.TheThreeRulesOfTdd
## Submitting your solution
You can submit your solution using the `exercism submit run_length_encoding.c run_length_encoding.h` command.
This command will upload your solution to the Exercism website and print the solution page's URL.
It's possible to submit an incomplete solution which allows you to:
- See how others have completed the exercise
- Request help from a mentor
## Need to get help?
If you'd like help solving the exercise, check the following pages:
- The [C track's documentation](https://exercism.org/docs/tracks/c)
- [Exercism's support channel on gitter](https://gitter.im/exercism/support)
- The [Frequently Asked Questions](https://exercism.org/docs/using/faqs)
Should those resources not suffice, you could submit your (incomplete) solution to request mentoring.
Make sure you have read the "Guides" section of the [C track][c-track] on the Exercism site.
This covers the basic information on setting up the development environment expected by the exercises.
## Submitting Incomplete Solutions
If you are struggling with a particular exercise, it is possible to submit an incomplete solution so you can see how others have completed the exercise.
## Resources
To get help if having trouble, you can use the following resources:
- [StackOverflow][] can be used to search for your problem and see if it has been answered already. You can also ask and answer questions.
- [CPPReference][] can be used to look up information on C concepts, operators, types, standard library functions and more.
- [TutorialsPoint][] has similar content as CPPReference in its C programming section.
- [The C Programming][K&R] book by K&R is the original source of the language and is still useful today.
[c-track]: https://exercism.io/my/tracks/c
[stackoverflow]: http://stackoverflow.com/questions/tagged/c
[cppreference]: https://en.cppreference.com/w/c
[tutorialspoint]: https://www.tutorialspoint.com/cprogramming/
[K&R]: https://www.amazon.com/Programming-Language-2nd-Brian-Kernighan/dp/0131103628/

View File

@@ -0,0 +1,47 @@
# Run Length Encoding
Welcome to Run Length Encoding on Exercism's C Track.
If you need help running the tests or submitting your code, check out `HELP.md`.
## Instructions
Implement run-length encoding and decoding.
Run-length encoding (RLE) is a simple form of data compression, where runs
(consecutive data elements) are replaced by just one data value and count.
For example we can represent the original 53 characters with only 13.
```text
"WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWB" -> "12WB12W3B24WB"
```
RLE allows the original data to be perfectly reconstructed from
the compressed data, which makes it a lossless data compression.
```text
"AABCCCDEEEE" -> "2AB3CD4E" -> "AABCCCDEEEE"
```
For simplicity, you can assume that the unencoded string will only contain
the letters A through Z (either lower or upper case) and whitespace. This way
data to be encoded will never contain any numbers and numbers inside data to
be decoded always represent the count for the following character.
## Source
### Created by
- @vlzware
### Contributed to by
- @h-3-0
- @patricksjackson
- @QLaille
- @ryanplusplus
- @wolf99
### Based on
Wikipedia - https://en.wikipedia.org/wiki/Run-length_encoding

View File

@@ -0,0 +1,45 @@
#ifndef __BR_COMMON_H
#define __BR_COMMON_H
/* ${LINUX_SRC}/include/linux/compiler_attributes.h, around line 30
*
* __has_attribute is supported on gcc >= 5, clang >= 2.9 and icc >= 17.
* For other compilers, simple implementation (for __falltrough__ only)
*/
#ifndef __has_attribute
# define __has_attribute(x) __GCC4_has_attribute_##x
# define __GCC4_has_attribute___assume_aligned__ (__GNUC_MINOR__ >= 9)
# define __GCC4_has_attribute___copy__ 0
# define __GCC4_has_attribute___designated_init__ 0
# define __GCC4_has_attribute___externally_visible__ 1
# define __GCC4_has_attribute___no_caller_saved_registers__ 0
# define __GCC4_has_attribute___noclone__ 1
# define __GCC4_has_attribute___nonstring__ 0
# define __GCC4_has_attribute___no_sanitize_address__ (__GNUC_MINOR__ >= 8)
# define __GCC4_has_attribute___no_sanitize_undefined__ (__GNUC_MINOR__ >= 9)
# define __GCC4_has_attribute___fallthrough__ 0
# define __GCC4_has_attribute___fallthrough__ 0
#endif
/* ${LINUX_SRC}/include/linux/compiler_attributes.h, around line 200
*/
#if __has_attribute(__fallthrough__)
# define fallthrough __attribute__((__fallthrough__))
#else
# define fallthrough do {} while (0); /* fallthrough */
#endif
/* See GNUmakefile below for explanation
* https://github.com/braoult/exercism/blob/master/c/templates/GNUmakefile
*/
#if defined UNIT_TEST || defined DEBUG
# include <stdio.h>
# include <stdlib.h>
#endif
#ifdef TESTALL
# undef TEST_IGNORE
# define TEST_IGNORE() {}
#endif
#endif /* __BR_COMMON_H */

View File

@@ -0,0 +1,26 @@
#include "run_length_encoding.h"
/* See GNUmakefile below for explanation
* https://github.com/braoult/exercism/blob/master/c/templates/GNUmakefile
*/
#ifdef UNIT_TEST
int main(int ac, char **av)
{
int arg=1, what;
char *res;
what = *av[arg++]; /* 'e', 'd' */
for (; arg<ac; ++arg) {
switch (what) {
case 'e':
printf("enc[%s]=%d [%s]\n", av[arg], encode_len(av[arg]),
res=encode(av[arg]));
printf("dec[%s]=%d [%s]\n", res, decode_len(res), decode(res));
break;
case 'd':
printf("dec[%s]=%d [%s]\n", av[arg], encode_len(av[arg]),
res=decode(av[arg]));
printf("enc[%s]=%d [%s]\n", res, decode_len(res), decode(res));
}
}
}
#endif

View File

@@ -0,0 +1,37 @@
### If you wish to use extra libraries (math.h for instance),
### add their flags here (-lm in our case) in the "LIBS" variable.
LIBS = -lm
###
CFLAGS = -std=c99
CFLAGS += -g
CFLAGS += -Wall
CFLAGS += -Wextra
CFLAGS += -pedantic
CFLAGS += -Werror
CFLAGS += -Wmissing-declarations
CFLAGS += -DUNITY_SUPPORT_64
ASANFLAGS = -fsanitize=address
ASANFLAGS += -fno-common
ASANFLAGS += -fno-omit-frame-pointer
.PHONY: test
test: tests.out
@./tests.out
.PHONY: memcheck
memcheck: ./*.c ./*.h
@echo Compiling $@
@$(CC) $(ASANFLAGS) $(CFLAGS) test-framework/unity.c ./*.c -o memcheck.out $(LIBS)
@./memcheck.out
@echo "Memory check passed"
.PHONY: clean
clean:
rm -rf *.o *.out *.out.dSYM
tests.out: ./*.c ./*.h
@echo Compiling $@
@$(CC) $(CFLAGS) test-framework/unity.c ./*.c -o tests.out $(LIBS)

View File

@@ -0,0 +1,101 @@
#include "run_length_encoding.h"
#include <stdio.h>
#include <malloc.h>
#include <string.h>
#include <ctype.h>
#include <stdlib.h>
/* We have 2 choices for encoding : allocate a max length string (i.e. the
* size of input string), maybe shrinking with realloc() before returning or
* trying to calculate first the the exact output length.
*
* For decoding, if we want to avoid an arbitrary MAX_LENGTH, or, worse,
* a smaller string with multiple realloc() when we need more space, we
* also can first calculate the output string size.
*
* We will go for the second method (calculate exact length first for both
* encode() and decode().
*
* Additional checks: we allow only printable characters (isprint()), and
* disallow digits for encode() and ending digits for decode().
*/
int encode_len(const char *str)
{
int len=0, c;
if (!str)
return -1;
for (c=*str; c; c=*str) {
int sublen=0, log=1;
if (isdigit(c) || !isprint(c))
return -1;
for (; *str == c; str++, sublen++)
;
if (sublen > 1)
do
log++;
while (sublen /= 10);
len += log;
}
return len+1;
}
char *encode(const char *str)
{
char *res, *p;
int c, len;
if (!(str && (len = encode_len(str)) > 0 && (res = malloc(len))))
return NULL;
p=res;
for (c=*str; c; *p++ = c, c=*str) {
int sublen = 0;
for (; *str == c; str++, sublen++) /* count consecutive chars */
;
if (sublen > 1)
p += sprintf(p, "%d", sublen);
}
*p = 0;
return res;
}
int decode_len(const char *str)
{
int len=0;
if (!str)
return -1;
for (int c = *str; c; c = *++str, len++) {
if (isdigit(c)) {
int sublen = strtold(str, (char **)&str);
if ((c = *str) == 0 || !isprint(c)) /* the char to duplicate */
return -1;
len += sublen - 1;
}
}
return len+1;
}
char *decode(const char *str)
{
char *res, *p;
int len;
if (!(str && (len = decode_len(str)) > 0 && (res = malloc(len))))
return NULL;
p=res;
for (int c = *str; c; *p++ = c, c = *++str) {
if (isdigit(c)) {
int sublen = strtold(str, (char **)&str);
c = *str; /* the char to duplicate */
while (--sublen) /* or (optimized) memset ? */
*p++ = c;
}
}
*p = 0;
return res;
}

View File

@@ -0,0 +1,13 @@
#ifndef RUN_LENGTH_ENCODING_H
#define RUN_LENGTH_ENCODING_H
int encode_len(const char *str);
char *encode(const char *text);
int decode_len(const char *str);
char *decode(const char *data);
#endif
#include "br-common.h"

View File

@@ -0,0 +1,156 @@
#include "test-framework/unity.h"
#include "run_length_encoding.h"
#include <stdlib.h>
void setUp(void)
{
}
void tearDown(void)
{
}
static void test_encode_empty_string(void)
{
char *res = encode("");
TEST_ASSERT_EQUAL_STRING("", res);
free(res);
}
static void test_encode_single_characters_only_are_encoded_without_count(void)
{
TEST_IGNORE(); // delete this line to run test
char *res = encode("XYZ");
TEST_ASSERT_EQUAL_STRING("XYZ", res);
free(res);
}
static void test_encode_string_with_no_single_characters(void)
{
TEST_IGNORE();
char *res = encode("AABBBCCCC");
TEST_ASSERT_EQUAL_STRING("2A3B4C", res);
free(res);
}
static void test_encode_single_characters_mixed_with_repeated_characters(void)
{
TEST_IGNORE();
char *res = encode("WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWB");
TEST_ASSERT_EQUAL_STRING("12WB12W3B24WB", res);
free(res);
}
static void test_encode_multiple_whitespace_mixed_in_string(void)
{
TEST_IGNORE();
char *res = encode(" hsqq qww ");
TEST_ASSERT_EQUAL_STRING("2 hs2q q2w2 ", res);
free(res);
}
static void test_encode_lowercase_characters(void)
{
TEST_IGNORE();
char *res = encode("aabbbcccc");
TEST_ASSERT_EQUAL_STRING("2a3b4c", res);
free(res);
}
static void test_decode_empty_string(void)
{
TEST_IGNORE();
char *res = decode("");
TEST_ASSERT_EQUAL_STRING("", res);
free(res);
}
static void test_decode_single_characters_only(void)
{
TEST_IGNORE();
char *res = decode("XYZ");
TEST_ASSERT_EQUAL_STRING("XYZ", res);
free(res);
}
static void test_decode_string_with_no_single_characters(void)
{
TEST_IGNORE();
char *res = decode("2A3B4C");
TEST_ASSERT_EQUAL_STRING("AABBBCCCC", res);
free(res);
}
static void test_decode_single_characters_with_repeated_characters(void)
{
TEST_IGNORE();
char *res = decode("12WB12W3B24WB");
TEST_ASSERT_EQUAL_STRING
("WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWB", res);
free(res);
}
static void test_decode_multiple_whitespace_mixed_in_string(void)
{
TEST_IGNORE();
char *res = decode("2 hs2q q2w2 ");
TEST_ASSERT_EQUAL_STRING(" hsqq qww ", res);
free(res);
}
static void test_decode_lower_case_string(void)
{
TEST_IGNORE();
char *res = decode("2a3b4c");
TEST_ASSERT_EQUAL_STRING("aabbbcccc", res);
free(res);
}
static void
test_consistency_encode_followed_by_decode_gives_original_string(void)
{
TEST_IGNORE();
char *res_enc = encode("zzz ZZ zZ");
char *res_dec = decode(res_enc);
TEST_ASSERT_EQUAL_STRING("zzz ZZ zZ", res_dec);
free(res_enc);
free(res_dec);
}
static void test_encode_invalid_input_contains_digits(void)
{
TEST_IGNORE();
char *res_enc = encode("AABB1A");
TEST_ASSERT_NULL(res_enc);
}
static void test_decode_invalid_input_ends_with_digits(void)
{
TEST_IGNORE();
char *res_dec = decode("AABBA2");
TEST_ASSERT_NULL(res_dec);
}
int main(void)
{
UnityBegin("test_run_length_encoding.c");
RUN_TEST(test_encode_empty_string);
RUN_TEST(test_encode_single_characters_only_are_encoded_without_count);
RUN_TEST(test_encode_string_with_no_single_characters);
RUN_TEST(test_encode_single_characters_mixed_with_repeated_characters);
RUN_TEST(test_encode_multiple_whitespace_mixed_in_string);
RUN_TEST(test_encode_lowercase_characters);
RUN_TEST(test_decode_empty_string);
RUN_TEST(test_decode_single_characters_only);
RUN_TEST(test_decode_string_with_no_single_characters);
RUN_TEST(test_decode_single_characters_with_repeated_characters);
RUN_TEST(test_decode_multiple_whitespace_mixed_in_string);
RUN_TEST(test_decode_lower_case_string);
RUN_TEST(test_consistency_encode_followed_by_decode_gives_original_string);
RUN_TEST(test_encode_invalid_input_contains_digits);
RUN_TEST(test_decode_invalid_input_ends_with_digits);
return UnityEnd();
}