libunibreak 5.1
Loading...
Searching...
No Matches
wordbreak.h
Go to the documentation of this file.
1/* vim: set expandtab tabstop=4 softtabstop=4 shiftwidth=4: */
2
3/*
4 * Word breaking in a Unicode sequence. Designed to be used in a
5 * generic text renderer.
6 *
7 * Copyright (C) 2013-2019 Tom Hacohen <tom at stosb dot com>
8 * Copyright (C) 2018 Wu Yongwei <wuyongwei at gmail dot com>
9 *
10 * This software is provided 'as-is', without any express or implied
11 * warranty. In no event will the author be held liable for any damages
12 * arising from the use of this software.
13 *
14 * Permission is granted to anyone to use this software for any purpose,
15 * including commercial applications, and to alter it and redistribute
16 * it freely, subject to the following restrictions:
17 *
18 * 1. The origin of this software must not be misrepresented; you must
19 * not claim that you wrote the original software. If you use this
20 * software in a product, an acknowledgement in the product
21 * documentation would be appreciated but is not required.
22 * 2. Altered source versions must be plainly marked as such, and must
23 * not be misrepresented as being the original software.
24 * 3. This notice may not be removed or altered from any source
25 * distribution.
26 *
27 * The main reference is Unicode Standard Annex 29 (UAX #29):
28 * <URL:http://unicode.org/reports/tr29>
29 *
30 * When this library was designed, this annex was at Revision 17, for
31 * Unicode 6.0.0:
32 * <URL:http://www.unicode.org/reports/tr29/tr29-17.html>
33 *
34 * This library has been updated according to Revision 37, for
35 * Unicode 13.0.0:
36 * <URL:http://www.unicode.org/reports/tr29/tr29-37.html>
37 *
38 * The Unicode Terms of Use are available at
39 * <URL:http://www.unicode.org/copyright.html>
40 */
41
50#ifndef WORDBREAK_H
51#define WORDBREAK_H
52
53#include <stddef.h>
54#include "unibreakbase.h"
55
56#ifdef __cplusplus
57extern "C" {
58#endif
59
60#define WORDBREAK_BREAK 0
61#define WORDBREAK_NOBREAK 1
62#define WORDBREAK_INSIDEACHAR 2
64void init_wordbreak(void);
66 const utf8_t *s, size_t len, const char* lang, char *brks);
68 const utf16_t *s, size_t len, const char* lang, char *brks);
70 const utf32_t *s, size_t len, const char* lang, char *brks);
71
72#ifdef __cplusplus
73}
74#endif
75
76#endif /* WORDBREAK_H */
Header file for common definitions in the libunibreak library.
unsigned short utf16_t
Type for UTF-16 data points.
Definition unibreakbase.h:48
unsigned int utf32_t
Type for UTF-32 data points.
Definition unibreakbase.h:49
unsigned char utf8_t
Type for UTF-8 data points.
Definition unibreakbase.h:47
void set_wordbreaks_utf32(const utf32_t *s, size_t len, const char *lang, char *brks)
Sets the word breaking information for a UTF-32 input string.
Definition wordbreak.c:535
void set_wordbreaks_utf16(const utf16_t *s, size_t len, const char *lang, char *brks)
Sets the word breaking information for a UTF-16 input string.
Definition wordbreak.c:515
void init_wordbreak(void)
Initializes the wordbreak internals.
Definition wordbreak.c:63
void set_wordbreaks_utf8(const utf8_t *s, size_t len, const char *lang, char *brks)
Sets the word breaking information for a UTF-8 input string.
Definition wordbreak.c:495