Improve error reporting when serializing non-Unicode strings to JSON
* src/coding.c (utf8_string_p): New helper function. (syms_of_coding) <utf-8-unix>: Move from json.c. * src/json.c (json_check_utf8): New helper function. (lisp_to_json_toplevel_1, lisp_to_json): Use it. To save a bit of time, check for invalid UTF-8 strings only after encountering an error, since Jansson already rejects them. * test/src/json-tests.el (json-serialize/invalid-unicode): Adapt expected error symbol.
This commit is contained in:
22
src/coding.c
22
src/coding.c
@@ -6360,6 +6360,27 @@ check_utf_8 (struct coding_system *coding)
|
||||
}
|
||||
|
||||
|
||||
/* Return whether STRING is a valid UTF-8 string. STRING must be a
|
||||
unibyte string. */
|
||||
|
||||
bool
|
||||
utf8_string_p (Lisp_Object string)
|
||||
{
|
||||
eassert (!STRING_MULTIBYTE (string));
|
||||
struct coding_system coding;
|
||||
setup_coding_system (Qutf_8_unix, &coding);
|
||||
/* We initialize only the fields that check_utf_8 accesses. */
|
||||
coding.head_ascii = -1;
|
||||
coding.src_pos = 0;
|
||||
coding.src_pos_byte = 0;
|
||||
coding.src_chars = SCHARS (string);
|
||||
coding.src_bytes = SBYTES (string);
|
||||
coding.src_object = string;
|
||||
coding.eol_seen = EOL_SEEN_NONE;
|
||||
return check_utf_8 (&coding) != -1;
|
||||
}
|
||||
|
||||
|
||||
/* Detect how end-of-line of a text of length SRC_BYTES pointed by
|
||||
SOURCE is encoded. If CATEGORY is one of
|
||||
coding_category_utf_16_XXXX, assume that CR and LF are encoded by
|
||||
@@ -10846,6 +10867,7 @@ syms_of_coding (void)
|
||||
DEFSYM (Qiso_2022, "iso-2022");
|
||||
|
||||
DEFSYM (Qutf_8, "utf-8");
|
||||
DEFSYM (Qutf_8_unix, "utf-8-unix");
|
||||
DEFSYM (Qutf_8_emacs, "utf-8-emacs");
|
||||
|
||||
#if defined (WINDOWSNT) || defined (CYGWIN)
|
||||
|
||||
Reference in New Issue
Block a user