C: run-length-encoding

2021-09-10 18:21:53 +02:00
parent 9a3d216a65
commit 67b2d3ec40
9 changed files with 544 additions and 0 deletions
--- a/c/run-length-encoding/GNUmakefile
+++ b/c/run-length-encoding/GNUmakefile
@@ -0,0 +1,56 @@
+# The original 'makefile' has a flaw:
+# 1) it overrides CFLAGS
+# 2) it does not pass extra "FLAGS" to $(CC) that could come from environment
+#
+# It means :
+# - we need to edit 'makefile' for different builds (DEBUG, etc...), which is
+#   not practical at all.
+# - Also, it does not allow to run all tests without editing the test source
+#   code.
+#
+# To use this makefile (GNU make only):
+# 1) copy it into exercise directory
+# 2) add ex.h to exercise include file
+# 3) add ex.c to exercise source code, and create a suitable main function
+# 4) use make with one of the following targets :
+#   all: compile and run all predefined tests.
+#   nowarn: compile with no -Werror, and run all predefined tests
+#   debug: compile with -DDEBUG and run all predefined tests
+#   mem: perform memcheck with all tests enabled
+#   unit: build standalone (unit) bimary
+#   unitnowarn: build standalone (unit) binary with -Werror disabled
+#   unitdebug: build standalone binary with -DDEBUG
+#
+# Original 'makefile' targets can be used (test, memcheck, clean, ...)
+
+.PHONY: default all nowarn debug mem unit unitnowarn unitdebug standalone
+
+default: all
+
+ALLSOURCES:=$(wildcard ./*.c)
+TESTSOURCES:=$(wildcard ./test_*.c)
+SRC:=$(filter-out $(TESTSOURCES),$(ALLSOURCES))
+
+include makefile
+
+all: CFLAGS+=-DTESTALL
+all: clean test
+
+nowarn: CFLAGS:=$(filter-out -Werror,$(CFLAGS))
+nowarn: clean all
+
+debug: CFLAGS+=-DDEBUG
+debug: all
+
+mem: CFLAGS+=-DTESTALL
+mem: clean memcheck
+
+unitnowarn: CFLAGS:=$(filter-out -Werror,$(CFLAGS))
+unitnowarn: clean unit
+
+unitdebug: CFLAGS+=-DDEBUG
+unitdebug: clean unit
+
+unit: CFLAGS+=-DUNIT_TEST
+unit: *.c *.h
+	$(CC) $(CFLAGS) $(SRC) -o tests.out  $(LIBS)
--- a/c/run-length-encoding/HELP.md
+++ b/c/run-length-encoding/HELP.md
@@ -0,0 +1,63 @@
+# Help
+
+## Running the tests
+
+Get the first test compiling, linking and passing by following the [three rules of test-driven development][3-tdd-rules].
+
+The included makefile can be used to create and run the tests using the `test` task.
+
+```console
+$ make test
+```
+
+Create just the functions you need to satisfy any compiler errors and get the test to fail.
+Then write just enough code to get the test to pass.
+Once you've done that, move onto the next test.
+
+As you progress through the tests, take the time to refactor your implementation for readability and expressiveness and then go on to the next test.
+
+Try to use standard C99 facilities in preference to writing your own low-level algorithms or facilities by hand.
+
+[3-tdd-rules]: http://butunclebob.com/ArticleS.UncleBob.TheThreeRulesOfTdd
+
+## Submitting your solution
+
+You can submit your solution using the `exercism submit run_length_encoding.c run_length_encoding.h` command.
+This command will upload your solution to the Exercism website and print the solution page's URL.
+
+It's possible to submit an incomplete solution which allows you to:
+
+- See how others have completed the exercise
+- Request help from a mentor
+
+## Need to get help?
+
+If you'd like help solving the exercise, check the following pages:
+
+- The [C track's documentation](https://exercism.org/docs/tracks/c)
+- [Exercism's support channel on gitter](https://gitter.im/exercism/support)
+- The [Frequently Asked Questions](https://exercism.org/docs/using/faqs)
+
+Should those resources not suffice, you could submit your (incomplete) solution to request mentoring.
+
+Make sure you have read the "Guides" section of the [C track][c-track] on the Exercism site.
+This covers the basic information on setting up the development environment expected by the exercises.
+
+## Submitting Incomplete Solutions
+
+If you are struggling with a particular exercise, it is possible to submit an incomplete solution so you can see how others have completed the exercise.
+
+## Resources
+
+To get help if having trouble, you can use the following resources:
+
+- [StackOverflow][] can be used to search for your problem and see if it has been answered already. You can also ask and answer questions.
+- [CPPReference][] can be used to look up information on C concepts, operators, types, standard library functions and more.
+- [TutorialsPoint][] has similar content as CPPReference in its C programming section.
+- [The C Programming][K&R] book by K&R is the original source of the language and is still useful today.
+
+[c-track]: https://exercism.io/my/tracks/c
+[stackoverflow]: http://stackoverflow.com/questions/tagged/c
+[cppreference]: https://en.cppreference.com/w/c
+[tutorialspoint]: https://www.tutorialspoint.com/cprogramming/
+[K&R]: https://www.amazon.com/Programming-Language-2nd-Brian-Kernighan/dp/0131103628/
--- a/c/run-length-encoding/README.md
+++ b/c/run-length-encoding/README.md
@@ -0,0 +1,47 @@
+# Run Length Encoding
+
+Welcome to Run Length Encoding on Exercism's C Track.
+If you need help running the tests or submitting your code, check out `HELP.md`.
+
+## Instructions
+
+Implement run-length encoding and decoding.
+
+Run-length encoding (RLE) is a simple form of data compression, where runs
+(consecutive data elements) are replaced by just one data value and count.
+
+For example we can represent the original 53 characters with only 13.
+
+```text
+"WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWB"  ->  "12WB12W3B24WB"
+```
+
+RLE allows the original data to be perfectly reconstructed from
+the compressed data, which makes it a lossless data compression.
+
+```text
+"AABCCCDEEEE"  ->  "2AB3CD4E"  ->  "AABCCCDEEEE"
+```
+
+For simplicity, you can assume that the unencoded string will only contain
+the letters A through Z (either lower or upper case) and whitespace. This way
+data to be encoded will never contain any numbers and numbers inside data to
+be decoded always represent the count for the following character.
+
+## Source
+
+### Created by
+
+- @vlzware
+
+### Contributed to by
+
+- @h-3-0
+- @patricksjackson
+- @QLaille
+- @ryanplusplus
+- @wolf99
+
+### Based on
+
+Wikipedia - https://en.wikipedia.org/wiki/Run-length_encoding
--- a/c/run-length-encoding/br-common.h
+++ b/c/run-length-encoding/br-common.h
@@ -0,0 +1,45 @@
+#ifndef __BR_COMMON_H
+#define __BR_COMMON_H
+
+/* ${LINUX_SRC}/include/linux/compiler_attributes.h, around line 30
+ *
+ * __has_attribute is supported on gcc >= 5, clang >= 2.9 and icc >= 17.
+ * For other compilers, simple implementation (for __falltrough__ only)
+ */
+#ifndef __has_attribute
+#  define __has_attribute(x) __GCC4_has_attribute_##x
+#  define __GCC4_has_attribute___assume_aligned__      (__GNUC_MINOR__ >= 9)
+#  define __GCC4_has_attribute___copy__                0
+#  define __GCC4_has_attribute___designated_init__     0
+#  define __GCC4_has_attribute___externally_visible__  1
+#  define __GCC4_has_attribute___no_caller_saved_registers__ 0
+#  define __GCC4_has_attribute___noclone__             1
+#  define __GCC4_has_attribute___nonstring__           0
+#  define __GCC4_has_attribute___no_sanitize_address__ (__GNUC_MINOR__ >= 8)
+#  define __GCC4_has_attribute___no_sanitize_undefined__ (__GNUC_MINOR__ >= 9)
+#  define __GCC4_has_attribute___fallthrough__         0
+#  define __GCC4_has_attribute___fallthrough__         0
+#endif
+
+/* ${LINUX_SRC}/include/linux/compiler_attributes.h, around line 200
+ */
+#if __has_attribute(__fallthrough__)
+#  define fallthrough                    __attribute__((__fallthrough__))
+#else
+#  define fallthrough                    do {} while (0);  /* fallthrough */
+#endif
+
+/* See GNUmakefile below for explanation
+ * https://github.com/braoult/exercism/blob/master/c/templates/GNUmakefile
+ */
+#if defined UNIT_TEST || defined DEBUG
+#  include <stdio.h>
+#  include <stdlib.h>
+#endif
+
+#ifdef  TESTALL
+#  undef  TEST_IGNORE
+#  define TEST_IGNORE() {}
+#endif
+
+#endif  /* __BR_COMMON_H */
--- a/c/run-length-encoding/main.c
+++ b/c/run-length-encoding/main.c
@@ -0,0 +1,26 @@
+#include "run_length_encoding.h"
+/* See GNUmakefile below for explanation
+ * https://github.com/braoult/exercism/blob/master/c/templates/GNUmakefile
+ */
+#ifdef UNIT_TEST
+int main(int ac, char **av)
+{
+    int arg=1, what;
+    char *res;
+
+    what = *av[arg++];                            /* 'e', 'd' */
+    for (; arg<ac; ++arg) {
+        switch (what) {
+            case 'e':
+                printf("enc[%s]=%d [%s]\n", av[arg], encode_len(av[arg]),
+                       res=encode(av[arg]));
+                printf("dec[%s]=%d [%s]\n", res, decode_len(res), decode(res));
+                break;
+            case 'd':
+                printf("dec[%s]=%d [%s]\n", av[arg], encode_len(av[arg]),
+                       res=decode(av[arg]));
+                printf("enc[%s]=%d [%s]\n", res, decode_len(res), decode(res));
+        }
+    }
+}
+#endif
--- a/c/run-length-encoding/makefile
+++ b/c/run-length-encoding/makefile
@@ -0,0 +1,37 @@
+### If you wish to use extra libraries (math.h for instance),
+### add their flags here (-lm in our case) in the "LIBS" variable.
+
+LIBS = -lm
+
+###
+CFLAGS  = -std=c99
+CFLAGS += -g
+CFLAGS += -Wall
+CFLAGS += -Wextra
+CFLAGS += -pedantic
+CFLAGS += -Werror
+CFLAGS += -Wmissing-declarations
+CFLAGS += -DUNITY_SUPPORT_64
+
+ASANFLAGS  = -fsanitize=address
+ASANFLAGS += -fno-common
+ASANFLAGS += -fno-omit-frame-pointer
+
+.PHONY: test
+test: tests.out
+	@./tests.out
+
+.PHONY: memcheck
+memcheck: ./*.c ./*.h
+	@echo Compiling $@
+	@$(CC) $(ASANFLAGS) $(CFLAGS) test-framework/unity.c ./*.c -o memcheck.out $(LIBS)
+	@./memcheck.out
+	@echo "Memory check passed"
+
+.PHONY: clean
+clean:
+	rm -rf *.o *.out *.out.dSYM
+
+tests.out: ./*.c ./*.h
+	@echo Compiling $@
+	@$(CC) $(CFLAGS) test-framework/unity.c ./*.c -o tests.out $(LIBS)
--- a/c/run-length-encoding/run_length_encoding.c
+++ b/c/run-length-encoding/run_length_encoding.c
@@ -0,0 +1,101 @@
+#include "run_length_encoding.h"
+#include <stdio.h>
+#include <malloc.h>
+#include <string.h>
+#include <ctype.h>
+#include <stdlib.h>
+
+/* We have 2 choices for encoding : allocate a max length string (i.e. the
+ * size of input string), maybe shrinking with realloc() before returning or
+ * trying to calculate first the the exact output length.
+ *
+ * For decoding, if we want to avoid an arbitrary MAX_LENGTH, or, worse,
+ * a smaller string with multiple realloc() when we need more space, we
+ * also can first calculate the output string size.
+ *
+ * We will go for the second method (calculate exact length first for both
+ * encode() and decode().
+ *
+ * Additional checks: we allow only printable characters (isprint()), and
+ * disallow digits for encode() and ending digits for decode().
+ */
+
+int encode_len(const char  *str)
+{
+    int len=0,  c;
+
+    if (!str)
+        return -1;
+    for (c=*str; c; c=*str) {
+        int sublen=0, log=1;
+        if (isdigit(c) || !isprint(c))
+            return -1;
+        for (; *str == c; str++, sublen++)
+            ;
+        if (sublen > 1)
+            do
+                log++;
+            while (sublen /= 10);
+        len += log;
+    }
+    return len+1;
+}
+
+char *encode(const char *str)
+{
+    char *res, *p;
+    int c, len;
+
+    if (!(str && (len = encode_len(str)) > 0 && (res = malloc(len))))
+        return NULL;
+    p=res;
+
+    for (c=*str; c; *p++ = c, c=*str) {
+        int sublen = 0;
+        for (; *str == c; str++, sublen++)        /* count consecutive chars */
+            ;
+        if (sublen > 1)
+            p += sprintf(p, "%d", sublen);
+    }
+    *p = 0;
+    return res;
+}
+
+int decode_len(const char  *str)
+{
+    int len=0;
+
+    if (!str)
+        return -1;
+    for (int c = *str; c; c = *++str, len++) {
+        if (isdigit(c)) {
+            int sublen = strtold(str, (char **)&str);
+            if ((c = *str) == 0 || !isprint(c))   /* the char to duplicate */
+                return -1;
+            len += sublen - 1;
+        }
+    }
+    return len+1;
+}
+
+
+char *decode(const char *str)
+{
+    char *res, *p;
+    int len;
+
+    if (!(str && (len = decode_len(str)) > 0 && (res = malloc(len))))
+        return NULL;
+    p=res;
+
+    for (int c = *str; c; *p++ = c, c = *++str) {
+        if (isdigit(c)) {
+            int sublen = strtold(str, (char **)&str);
+            c = *str;                             /* the char to duplicate */
+            while (--sublen)                      /* or (optimized) memset ? */
+                *p++ = c;
+        }
+    }
+    *p = 0;
+    return res;
+}
--- a/c/run-length-encoding/run_length_encoding.h
+++ b/c/run-length-encoding/run_length_encoding.h
@@ -0,0 +1,13 @@
+#ifndef RUN_LENGTH_ENCODING_H
+#define RUN_LENGTH_ENCODING_H
+
+
+int encode_len(const char  *str);
+char *encode(const char *text);
+
+int decode_len(const char  *str);
+char *decode(const char *data);
+
+#endif
+
+#include "br-common.h"
--- a/c/run-length-encoding/test_run_length_encoding.c
+++ b/c/run-length-encoding/test_run_length_encoding.c
@@ -0,0 +1,156 @@
+#include "test-framework/unity.h"
+#include "run_length_encoding.h"
+#include <stdlib.h>
+
+void setUp(void)
+{
+}
+
+void tearDown(void)
+{
+}
+
+static void test_encode_empty_string(void)
+{
+   char *res = encode("");
+   TEST_ASSERT_EQUAL_STRING("", res);
+   free(res);
+}
+
+static void test_encode_single_characters_only_are_encoded_without_count(void)
+{
+   TEST_IGNORE();               // delete this line to run test
+   char *res = encode("XYZ");
+   TEST_ASSERT_EQUAL_STRING("XYZ", res);
+   free(res);
+}
+
+static void test_encode_string_with_no_single_characters(void)
+{
+   TEST_IGNORE();
+   char *res = encode("AABBBCCCC");
+   TEST_ASSERT_EQUAL_STRING("2A3B4C", res);
+   free(res);
+}
+
+static void test_encode_single_characters_mixed_with_repeated_characters(void)
+{
+   TEST_IGNORE();
+   char *res = encode("WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWB");
+   TEST_ASSERT_EQUAL_STRING("12WB12W3B24WB", res);
+   free(res);
+}
+
+static void test_encode_multiple_whitespace_mixed_in_string(void)
+{
+   TEST_IGNORE();
+   char *res = encode("  hsqq qww  ");
+   TEST_ASSERT_EQUAL_STRING("2 hs2q q2w2 ", res);
+   free(res);
+}
+
+static void test_encode_lowercase_characters(void)
+{
+   TEST_IGNORE();
+   char *res = encode("aabbbcccc");
+   TEST_ASSERT_EQUAL_STRING("2a3b4c", res);
+   free(res);
+}
+
+static void test_decode_empty_string(void)
+{
+   TEST_IGNORE();
+   char *res = decode("");
+   TEST_ASSERT_EQUAL_STRING("", res);
+   free(res);
+}
+
+static void test_decode_single_characters_only(void)
+{
+   TEST_IGNORE();
+   char *res = decode("XYZ");
+   TEST_ASSERT_EQUAL_STRING("XYZ", res);
+   free(res);
+}
+
+static void test_decode_string_with_no_single_characters(void)
+{
+   TEST_IGNORE();
+   char *res = decode("2A3B4C");
+   TEST_ASSERT_EQUAL_STRING("AABBBCCCC", res);
+   free(res);
+}
+
+static void test_decode_single_characters_with_repeated_characters(void)
+{
+   TEST_IGNORE();
+   char *res = decode("12WB12W3B24WB");
+   TEST_ASSERT_EQUAL_STRING
+       ("WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWB", res);
+   free(res);
+}
+
+static void test_decode_multiple_whitespace_mixed_in_string(void)
+{
+   TEST_IGNORE();
+   char *res = decode("2 hs2q q2w2 ");
+   TEST_ASSERT_EQUAL_STRING("  hsqq qww  ", res);
+   free(res);
+}
+
+static void test_decode_lower_case_string(void)
+{
+   TEST_IGNORE();
+   char *res = decode("2a3b4c");
+   TEST_ASSERT_EQUAL_STRING("aabbbcccc", res);
+   free(res);
+}
+
+static void
+test_consistency_encode_followed_by_decode_gives_original_string(void)
+{
+   TEST_IGNORE();
+   char *res_enc = encode("zzz ZZ  zZ");
+   char *res_dec = decode(res_enc);
+   TEST_ASSERT_EQUAL_STRING("zzz ZZ  zZ", res_dec);
+   free(res_enc);
+   free(res_dec);
+}
+
+static void test_encode_invalid_input_contains_digits(void)
+{
+   TEST_IGNORE();
+   char *res_enc = encode("AABB1A");
+   TEST_ASSERT_NULL(res_enc);
+}
+
+static void test_decode_invalid_input_ends_with_digits(void)
+{
+   TEST_IGNORE();
+   char *res_dec = decode("AABBA2");
+   TEST_ASSERT_NULL(res_dec);
+}
+
+
+int main(void)
+{
+   UnityBegin("test_run_length_encoding.c");
+
+   RUN_TEST(test_encode_empty_string);
+   RUN_TEST(test_encode_single_characters_only_are_encoded_without_count);
+   RUN_TEST(test_encode_string_with_no_single_characters);
+   RUN_TEST(test_encode_single_characters_mixed_with_repeated_characters);
+   RUN_TEST(test_encode_multiple_whitespace_mixed_in_string);
+   RUN_TEST(test_encode_lowercase_characters);
+   RUN_TEST(test_decode_empty_string);
+   RUN_TEST(test_decode_single_characters_only);
+   RUN_TEST(test_decode_string_with_no_single_characters);
+   RUN_TEST(test_decode_single_characters_with_repeated_characters);
+   RUN_TEST(test_decode_multiple_whitespace_mixed_in_string);
+   RUN_TEST(test_decode_lower_case_string);
+   RUN_TEST(test_consistency_encode_followed_by_decode_gives_original_string);
+   RUN_TEST(test_encode_invalid_input_contains_digits);
+   RUN_TEST(test_decode_invalid_input_ends_with_digits);
+
+   return UnityEnd();
+}