Skip to content

Commit b3e0f28

Browse files
authored
add utf8proc_free (#338)
* add utf8proc_free * Apply suggestion from @stevengj
1 parent e46d2db commit b3e0f28

10 files changed

Lines changed: 54 additions & 35 deletions

File tree

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ utf8proc_uint8_t *fold_str;
126126
utf8proc_map(str, 0, &fold_str, UTF8PROC_NULLTERM | UTF8PROC_CASEFOLD);
127127
printf("%s\n", fold_str);
128128
// ss
129-
free(fold_str);
129+
utf8proc_free(fold_str);
130130
```
131131
132132
### Normalization Form C/D (NFC/NFD)
@@ -138,6 +138,6 @@ utf8proc_uint8_t *nfd= utf8proc_NFD(input); // = {0x61, 0xcc, 0x88, 0x6f, 0xcc,
138138
// Compose "a\u0308o\u0308u\u0308" into "\u00e4\u00f6\u00fc" (= "äöü" via precomposed characters)
139139
utf8proc_uint8_t *nfc= utf8proc_NFC(nfd);
140140
141-
free(nfd);
142-
free(nfc);
141+
utf8proc_free(nfd);
142+
utf8proc_free(nfc);
143143
```

bench/bench.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ int main(int argc, char **argv)
99
{
1010
int i, j;
1111
int options = 0;
12-
12+
1313
for (i = 1; i < argc; ++i) {
1414
if (!strcmp(argv[i], "-nfkc")) {
1515
options |= UTF8PROC_STABLE|UTF8PROC_COMPOSE|UTF8PROC_COMPAT;
@@ -46,7 +46,7 @@ int main(int argc, char **argv)
4646
mytime start = gettime();
4747
for (j = 0; j < 100; ++j) {
4848
utf8proc_map(src, len, &dest, options);
49-
free(dest);
49+
utf8proc_free(dest);
5050
}
5151
printf("%s: %g\n", argv[i], elapsed(gettime(), start) / 100);
5252
free(src);

test/case.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,8 +68,8 @@ int main(int argc, char **argv)
6868
check(!strcmp((char*)s1, "ss") &&
6969
!strcmp((char*)s2, "ss"),
7070
"incorrect 0x00df/0x1e9e casefold normalization");
71-
free(s1);
72-
free(s2);
71+
utf8proc_free(s1);
72+
utf8proc_free(s2);
7373
printf("More up-to-date than OS unicode tables for %d tests.\n", better);
7474
printf("utf8proc case conversion tests SUCCEEDED.\n");
7575
return 0;

test/fuzzer.c

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,13 @@ int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size)
1616
utf8proc_ssize_t ret, bytes = 0;
1717
utf8proc_uint8_t *str = NULL;
1818
size_t len = strlen((const char*)data);
19-
19+
2020
while(bytes != len)
2121
{
2222
ret = utf8proc_iterate(ptr, -1, &c);
23-
23+
2424
if(ret < 0 || ret == 0) break;
25-
25+
2626
bytes += ret;
2727
ptr += ret;
2828

@@ -35,31 +35,31 @@ int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size)
3535
utf8proc_category(c);
3636
utf8proc_category_string(c);
3737
utf8proc_codepoint_valid(c);
38-
38+
3939
utf8proc_grapheme_break(c_prev, c);
4040
utf8proc_grapheme_break_stateful(c_prev, c, &state);
41-
41+
4242
c_prev = c;
4343
}
44-
44+
4545
utf8proc_int32_t *copy = size >= 4 ? NULL : malloc(size);
46-
46+
4747
if(copy)
4848
{
4949
size /= 4;
50-
50+
5151
options = UTF8PROC_STRIPCC | UTF8PROC_NLF2LS | UTF8PROC_NLF2PS;
5252
memcpy(copy, data, size);
5353
utf8proc_normalize_utf32(copy, size, options);
54-
54+
5555
options = UTF8PROC_STRIPCC | UTF8PROC_NLF2LS;
5656
memcpy(copy, data, size);
5757
utf8proc_normalize_utf32(copy, size, options);
58-
58+
5959
options = UTF8PROC_STRIPCC | UTF8PROC_NLF2PS;
6060
memcpy(copy, data, size);
6161
utf8proc_normalize_utf32(copy, size, options);
62-
62+
6363
options = UTF8PROC_STRIPCC;
6464
memcpy(copy, data, size);
6565
utf8proc_normalize_utf32(copy, size, options);
@@ -71,30 +71,30 @@ int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size)
7171
options = 0;
7272
memcpy(copy, data, size);
7373
utf8proc_normalize_utf32(copy, size, options);
74-
74+
7575
free(copy);
7676
}
7777

78-
free(utf8proc_NFD(data));
79-
free(utf8proc_NFC(data));
80-
free(utf8proc_NFKD(data));
81-
free(utf8proc_NFKC(data));
82-
free(utf8proc_NFKC_Casefold(data));
78+
utf8proc_free(utf8proc_NFD(data));
79+
utf8proc_free(utf8proc_NFC(data));
80+
utf8proc_free(utf8proc_NFKD(data));
81+
utf8proc_free(utf8proc_NFKC(data));
82+
utf8proc_free(utf8proc_NFKC_Casefold(data));
8383

8484
utf8proc_map(data, len, &str, UTF8PROC_CHARBOUND | UTF8PROC_STRIPNA);
85-
free(str);
85+
utf8proc_free(str);
8686

8787
utf8proc_map(data, len, &str, UTF8PROC_LUMP | UTF8PROC_NLF2LS | UTF8PROC_NLF2PS);
88-
free(str);
88+
utf8proc_free(str);
8989

9090
utf8proc_map(data, len, &str, UTF8PROC_COMPOSE | UTF8PROC_STRIPMARK);
91-
free(str);
91+
utf8proc_free(str);
9292

9393
utf8proc_map(data, len, &str, UTF8PROC_CHARBOUND | UTF8PROC_DECOMPOSE);
94-
free(str);
94+
utf8proc_free(str);
9595

9696
utf8proc_map(data, len, &str, UTF8PROC_CHARBOUND | UTF8PROC_COMPOSE);
97-
free(str);
97+
utf8proc_free(str);
9898

9999
return 0;
100100
}

test/graphemetest.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ void checkline(const char *_buf, bool verbose) {
5858
check(!strcmp((char*)g, (char*)src),
5959
"grapheme mismatch: \"%s\" instead of \"%s\"", (char*)g, (char*)src);
6060
}
61-
free(g);
61+
utf8proc_free(g);
6262
}
6363

6464
if (si) { /* test manual calls to utf8proc_grapheme_break_stateful */
@@ -112,7 +112,7 @@ int main(int argc, char **argv)
112112
glen = utf8proc_map(input, 6, &g, UTF8PROC_CHARBOUND);
113113
check(!strcmp((char*)g, (char*)output), "mishandled u+ffff and u+fffe grapheme breaks");
114114
check(glen != 6, "mishandled u+ffff and u+fffe grapheme breaks");
115-
free(g);
115+
utf8proc_free(g);
116116
};
117117

118118
/* https://github.com/JuliaLang/julia/issues/37680 */

test/normtest.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
unsigned char *src_norm = (unsigned char*) utf8proc_ ## NRM((utf8proc_uint8_t*) src); \
55
check(!strcmp((char *) norm, (char *) src_norm), \
66
"normalization failed for %s -> %s", src, norm); \
7-
free(src_norm); \
7+
utf8proc_free(src_norm); \
88
}
99

1010
int main(int argc, char **argv)

test/printproperty.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ int main(int argc, char **argv)
6262
p->boundclass,
6363
p->indic_conjunct_break,
6464
utf8proc_charwidth(c));
65-
free(map);
65+
utf8proc_free(map);
6666
}
6767
return 0;
6868
}

test/tests.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,6 @@ void check_compare(const char *transformation,
9595
print_string_and_escaped(f, expected);
9696
}
9797
fprintf(f, "\n");
98-
if (free_received) free(received);
98+
if (free_received) utf8proc_free(received);
9999
if (!passed) exit(1);
100100
}

utf8proc.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -836,3 +836,7 @@ UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKC_Casefold(const utf8proc_uint8
836836
UTF8PROC_COMPOSE | UTF8PROC_COMPAT | UTF8PROC_CASEFOLD | UTF8PROC_IGNORE));
837837
return retval;
838838
}
839+
840+
UTF8PROC_DLLEXPORT void utf8proc_free(utf8proc_uint8_t *ptr) {
841+
free(ptr);
842+
}

utf8proc.h

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -750,6 +750,8 @@ UTF8PROC_DLLEXPORT const char *utf8proc_category_string(utf8proc_int32_t codepoi
750750
*
751751
* @note The memory of the new UTF-8 string will have been allocated
752752
* with `malloc`, and should therefore be deallocated with `free`.
753+
* However, it is safer to deallocate it with @ref utf8proc_free in
754+
* case your application is linked to a different C library than utf8proc.
753755
*
754756
* @note `utf8proc_map` simply calls `utf8proc_decompose` followed by `utf8proc_reencode`,
755757
* and applications requiring greater control over memory allocation should instead call
@@ -760,7 +762,7 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_map(
760762
);
761763

762764
/**
763-
* Like utf8proc_map(), but also takes a `custom_func` mapping function
765+
* Like @ref utf8proc_map, but also takes a `custom_func` mapping function
764766
* that is called on each codepoint in `str` before any other transformations
765767
* (along with a `custom_data` pointer that is passed through to `custom_func`).
766768
* The `custom_func` argument is ignored if it is `NULL`.
@@ -776,6 +778,11 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_map_custom(
776778
* NFKC_Casefold normalized version of the null-terminated string `str`. These
777779
* are shortcuts to calling utf8proc_map() with @ref UTF8PROC_NULLTERM
778780
* combined with @ref UTF8PROC_STABLE and flags indicating the normalization.
781+
*
782+
* @note The memory of the new UTF-8 string will have been allocated
783+
* with `malloc`, and should therefore be deallocated with `free`.
784+
* However, it is safer to deallocate it with @ref utf8proc_free in
785+
* case your application is linked to a different C library than utf8proc.
779786
*/
780787
/** @{ */
781788
/** NFD normalization (@ref UTF8PROC_DECOMPOSE). */
@@ -793,6 +800,14 @@ UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKC(const utf8proc_uint8_t *str);
793800
UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKC_Casefold(const utf8proc_uint8_t *str);
794801
/** @} */
795802

803+
/**
804+
* Deallocate memory allocated and returned by @ref utf8proc_map and similar functions
805+
* (which simply calls the `free` function from the underlying C library linked to utf8proc).
806+
* It is safer to call `utf8proc_free` than calling `free` directly, in case your application
807+
* is linked to a different C library with incompatible `malloc` and `free` functions.
808+
*/
809+
UTF8PROC_DLLEXPORT void utf8proc_free(utf8proc_uint8_t *ptr);
810+
796811
#ifdef __cplusplus
797812
}
798813
#endif

0 commit comments

Comments
 (0)