add utf8proc_free (#338)

stevengj · web-flow · commit b3e0f28adaec · 2026-04-01T17:36:36.000-04:00
* add utf8proc_free * Apply suggestion from @stevengj
diff --git a/README.md b/README.md
@@ -126,7 +126,7 @@ utf8proc_uint8_t *fold_str;
 utf8proc_map(str, 0, &fold_str, UTF8PROC_NULLTERM | UTF8PROC_CASEFOLD);
 printf("%s\n", fold_str);
 // ss
-free(fold_str);
+utf8proc_free(fold_str);
 ```
 
 ### Normalization Form C/D (NFC/NFD)
@@ -138,6 +138,6 @@ utf8proc_uint8_t *nfd= utf8proc_NFD(input); // = {0x61, 0xcc, 0x88, 0x6f, 0xcc,
 // Compose "a\u0308o\u0308u\u0308" into "\u00e4\u00f6\u00fc" (= "äöü" via precomposed characters)
 utf8proc_uint8_t *nfc= utf8proc_NFC(nfd);
 
-free(nfd);
-free(nfc);
+utf8proc_free(nfd);
+utf8proc_free(nfc);
 ```
diff --git a/bench/bench.c b/bench/bench.c
@@ -9,7 +9,7 @@ int main(int argc, char **argv)
 {
 	 int i, j;
 	 int options = 0;
-	 
+
 	 for (i = 1; i < argc; ++i) {
 		  if (!strcmp(argv[i], "-nfkc")) {
 			   options |= UTF8PROC_STABLE|UTF8PROC_COMPOSE|UTF8PROC_COMPAT;
@@ -46,7 +46,7 @@ int main(int argc, char **argv)
 		  mytime start = gettime();
 		  for (j = 0; j < 100; ++j) {
 			   utf8proc_map(src, len, &dest, options);
-			   free(dest);
+			   utf8proc_free(dest);
 		  }
 		  printf("%s: %g\n", argv[i], elapsed(gettime(), start) / 100);
 		  free(src);
diff --git a/test/case.c b/test/case.c
@@ -68,8 +68,8 @@ int main(int argc, char **argv)
      check(!strcmp((char*)s1, "ss") &&
            !strcmp((char*)s2, "ss"),
            "incorrect 0x00df/0x1e9e casefold normalization");
-     free(s1);
-     free(s2);
+     utf8proc_free(s1);
+     utf8proc_free(s2);
      printf("More up-to-date than OS unicode tables for %d tests.\n", better);
      printf("utf8proc case conversion tests SUCCEEDED.\n");
      return 0;
diff --git a/test/fuzzer.c b/test/fuzzer.c
@@ -16,13 +16,13 @@ int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size)
     utf8proc_ssize_t ret, bytes = 0;
     utf8proc_uint8_t *str = NULL;
     size_t len = strlen((const char*)data);
-    
+
     while(bytes != len)
     {
         ret = utf8proc_iterate(ptr, -1, &c);
-        
+
         if(ret < 0 || ret == 0) break;
-        
+
         bytes += ret;
         ptr += ret;
 
@@ -35,31 +35,31 @@ int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size)
         utf8proc_category(c);
         utf8proc_category_string(c);
         utf8proc_codepoint_valid(c);
-        
+
         utf8proc_grapheme_break(c_prev, c);
         utf8proc_grapheme_break_stateful(c_prev, c, &state);
-        
+
         c_prev = c;
     }
-    
+
     utf8proc_int32_t *copy = size >= 4 ? NULL : malloc(size);
-    
+
     if(copy)
     {
         size /= 4;
-        
+
         options = UTF8PROC_STRIPCC | UTF8PROC_NLF2LS | UTF8PROC_NLF2PS;
         memcpy(copy, data, size);
         utf8proc_normalize_utf32(copy, size, options);
-        
+
         options = UTF8PROC_STRIPCC | UTF8PROC_NLF2LS;
         memcpy(copy, data, size);
         utf8proc_normalize_utf32(copy, size, options);
-        
+
         options = UTF8PROC_STRIPCC | UTF8PROC_NLF2PS;
         memcpy(copy, data, size);
         utf8proc_normalize_utf32(copy, size, options);
-        
+
         options = UTF8PROC_STRIPCC;
         memcpy(copy, data, size);
         utf8proc_normalize_utf32(copy, size, options);
@@ -71,30 +71,30 @@ int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size)
         options = 0;
         memcpy(copy, data, size);
         utf8proc_normalize_utf32(copy, size, options);
-        
+
         free(copy);
     }
 
-    free(utf8proc_NFD(data));
-    free(utf8proc_NFC(data));
-    free(utf8proc_NFKD(data));
-    free(utf8proc_NFKC(data));
-    free(utf8proc_NFKC_Casefold(data));
+    utf8proc_free(utf8proc_NFD(data));
+    utf8proc_free(utf8proc_NFC(data));
+    utf8proc_free(utf8proc_NFKD(data));
+    utf8proc_free(utf8proc_NFKC(data));
+    utf8proc_free(utf8proc_NFKC_Casefold(data));
 
     utf8proc_map(data, len, &str, UTF8PROC_CHARBOUND | UTF8PROC_STRIPNA);
-    free(str);
+    utf8proc_free(str);
 
     utf8proc_map(data, len, &str, UTF8PROC_LUMP | UTF8PROC_NLF2LS | UTF8PROC_NLF2PS);
-    free(str);
+    utf8proc_free(str);
 
     utf8proc_map(data, len, &str, UTF8PROC_COMPOSE | UTF8PROC_STRIPMARK);
-    free(str);
+    utf8proc_free(str);
 
     utf8proc_map(data, len, &str, UTF8PROC_CHARBOUND | UTF8PROC_DECOMPOSE);
-    free(str);
+    utf8proc_free(str);
 
     utf8proc_map(data, len, &str, UTF8PROC_CHARBOUND | UTF8PROC_COMPOSE);
-    free(str);
+    utf8proc_free(str);
 
     return 0;
 }
diff --git a/test/graphemetest.c b/test/graphemetest.c
@@ -58,7 +58,7 @@ void checkline(const char *_buf, bool verbose) {
             check(!strcmp((char*)g, (char*)src),
                 "grapheme mismatch: \"%s\" instead of \"%s\"", (char*)g, (char*)src);
         }
-        free(g);
+        utf8proc_free(g);
     }
 
     if (si) { /* test manual calls to utf8proc_grapheme_break_stateful */
@@ -112,7 +112,7 @@ int main(int argc, char **argv)
         glen = utf8proc_map(input, 6, &g, UTF8PROC_CHARBOUND);
         check(!strcmp((char*)g, (char*)output), "mishandled u+ffff and u+fffe grapheme breaks");
         check(glen != 6, "mishandled u+ffff and u+fffe grapheme breaks");
-        free(g);
+        utf8proc_free(g);
     };
 
     /* https://github.com/JuliaLang/julia/issues/37680 */
diff --git a/test/normtest.c b/test/normtest.c
@@ -4,7 +4,7 @@
     unsigned char *src_norm = (unsigned char*) utf8proc_ ## NRM((utf8proc_uint8_t*) src);      \
     check(!strcmp((char *) norm, (char *) src_norm),                                  \
           "normalization failed for %s -> %s", src, norm);          \
-    free(src_norm);                                                 \
+    utf8proc_free(src_norm);                                                 \
 }
 
 int main(int argc, char **argv)
diff --git a/test/printproperty.c b/test/printproperty.c
@@ -62,7 +62,7 @@ int main(int argc, char **argv)
         p->boundclass,
         p->indic_conjunct_break,
         utf8proc_charwidth(c));
-        free(map);
+        utf8proc_free(map);
     }
     return 0;
 }
diff --git a/test/tests.c b/test/tests.c
@@ -95,6 +95,6 @@ void check_compare(const char *transformation,
           print_string_and_escaped(f, expected);
      }
      fprintf(f, "\n");
-     if (free_received) free(received);
+     if (free_received) utf8proc_free(received);
      if (!passed) exit(1);
 }
diff --git a/utf8proc.c b/utf8proc.c
@@ -836,3 +836,7 @@ UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKC_Casefold(const utf8proc_uint8
     UTF8PROC_COMPOSE | UTF8PROC_COMPAT | UTF8PROC_CASEFOLD | UTF8PROC_IGNORE));
   return retval;
 }
+
+UTF8PROC_DLLEXPORT void utf8proc_free(utf8proc_uint8_t *ptr) {
+  free(ptr);
+}
diff --git a/utf8proc.h b/utf8proc.h
@@ -750,6 +750,8 @@ UTF8PROC_DLLEXPORT const char *utf8proc_category_string(utf8proc_int32_t codepoi
  *
  * @note The memory of the new UTF-8 string will have been allocated
  * with `malloc`, and should therefore be deallocated with `free`.
+ * However, it is safer to deallocate it with @ref utf8proc_free in
+ * case your application is linked to a different C library than utf8proc.
  *
  * @note `utf8proc_map` simply calls `utf8proc_decompose` followed by `utf8proc_reencode`,
  * and applications requiring greater control over memory allocation should instead call
@@ -760,7 +762,7 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_map(
 );
 
 /**
- * Like utf8proc_map(), but also takes a `custom_func` mapping function
+ * Like @ref utf8proc_map, but also takes a `custom_func` mapping function
  * that is called on each codepoint in `str` before any other transformations
  * (along with a `custom_data` pointer that is passed through to `custom_func`).
  * The `custom_func` argument is ignored if it is `NULL`.
@@ -776,6 +778,11 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_map_custom(
  * NFKC_Casefold normalized version of the null-terminated string `str`.  These
  * are shortcuts to calling utf8proc_map() with @ref UTF8PROC_NULLTERM
  * combined with @ref UTF8PROC_STABLE and flags indicating the normalization.
+ *
+ * @note The memory of the new UTF-8 string will have been allocated
+ * with `malloc`, and should therefore be deallocated with `free`.
+ * However, it is safer to deallocate it with @ref utf8proc_free in
+ * case your application is linked to a different C library than utf8proc.
  */
 /** @{ */
 /** NFD normalization (@ref UTF8PROC_DECOMPOSE). */
@@ -793,6 +800,14 @@ UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKC(const utf8proc_uint8_t *str);
 UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKC_Casefold(const utf8proc_uint8_t *str);
 /** @} */
 
+/**
+ * Deallocate memory allocated and returned by @ref utf8proc_map and similar functions
+ * (which simply calls the `free` function from the underlying C library linked to utf8proc).
+ * It is safer to call `utf8proc_free` than calling `free` directly, in case your application
+ * is linked to a different C library with incompatible `malloc` and `free` functions.
+ */
+UTF8PROC_DLLEXPORT void utf8proc_free(utf8proc_uint8_t *ptr);
+
 #ifdef __cplusplus
 }
 #endif

Original file line number	Diff line number	Diff line change
`@@ -4,7 +4,7 @@`
`4`	`4`	`unsigned char src_norm = (unsigned char) utf8proc_ ## NRM((utf8proc_uint8_t*) src); \`
`5`	`5`	`check(!strcmp((char ) norm, (char ) src_norm), \`
`6`	`6`	`"normalization failed for %s -> %s", src, norm); \`
`7`		`- free(src_norm); \`
	`7`	`+ utf8proc_free(src_norm); \`
`8`	`8`	`}`
`9`	`9`
`10`	`10`	`int main(int argc, char **argv)`
Original file line number	Diff line number	Diff line change
`@@ -62,7 +62,7 @@ int main(int argc, char **argv)`
`62`	`62`	`p->boundclass,`
`63`	`63`	`p->indic_conjunct_break,`
`64`	`64`	`utf8proc_charwidth(c));`
`65`		`- free(map);`
	`65`	`+ utf8proc_free(map);`
`66`	`66`	`}`
`67`	`67`	`return 0;`
`68`	`68`	`}`
Original file line number	Diff line number	Diff line change
`@@ -95,6 +95,6 @@ void check_compare(const char *transformation,`
`95`	`95`	`print_string_and_escaped(f, expected);`
`96`	`96`	`}`
`97`	`97`	`fprintf(f, "\n");`
`98`		`- if (free_received) free(received);`
	`98`	`+ if (free_received) utf8proc_free(received);`
`99`	`99`	`if (!passed) exit(1);`
`100`	`100`	`}`
Original file line number	Diff line number	Diff line change
`@@ -836,3 +836,7 @@ UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKC_Casefold(const utf8proc_uint8`
`836`	`836`	`UTF8PROC_COMPOSE \| UTF8PROC_COMPAT \| UTF8PROC_CASEFOLD \| UTF8PROC_IGNORE));`
`837`	`837`	`return retval;`
`838`	`838`	`}`
	`839`	`+`
	`840`	`+UTF8PROC_DLLEXPORT void utf8proc_free(utf8proc_uint8_t *ptr) {`
	`841`	`+ free(ptr);`
	`842`	`+}`