Allow ruby versions 3.2 and 3.4 for installation
This commit is contained in:
202
libs/libruby/ruby/internal/encoding/coderange.h
vendored
Normal file
202
libs/libruby/ruby/internal/encoding/coderange.h
vendored
Normal file
@@ -0,0 +1,202 @@
|
||||
#ifndef RUBY_INTERNAL_ENCODING_CODERANGE_H /*-*-C++-*-vi:se ft=cpp:*/
|
||||
#define RUBY_INTERNAL_ENCODING_CODERANGE_H
|
||||
/**
|
||||
* @file
|
||||
* @author Ruby developers <ruby-core@ruby-lang.org>
|
||||
* @copyright This file is a part of the programming language Ruby.
|
||||
* Permission is hereby granted, to either redistribute and/or
|
||||
* modify this file, provided that the conditions mentioned in the
|
||||
* file COPYING are met. Consult the file for details.
|
||||
* @warning Symbols prefixed with either `RBIMPL` or `rbimpl` are
|
||||
* implementation details. Don't take them as canon. They could
|
||||
* rapidly appear then vanish. The name (path) of this header file
|
||||
* is also an implementation detail. Do not expect it to persist
|
||||
* at the place it is now. Developers are free to move it anywhere
|
||||
* anytime at will.
|
||||
* @note To ruby-core: remember that this header can be possibly
|
||||
* recursively included from extension libraries written in C++.
|
||||
* Do not expect for instance `__VA_ARGS__` is always available.
|
||||
* We assume C99 for ruby itself but we don't assume languages of
|
||||
* extension libraries. They could be written in C++98.
|
||||
* @brief Routines for code ranges.
|
||||
*/
|
||||
|
||||
#include "ruby/internal/attr/const.h"
|
||||
#include "ruby/internal/attr/pure.h"
|
||||
#include "ruby/internal/dllexport.h"
|
||||
#include "ruby/internal/fl_type.h"
|
||||
#include "ruby/internal/value.h"
|
||||
|
||||
RBIMPL_SYMBOL_EXPORT_BEGIN()
|
||||
|
||||
/** What rb_enc_str_coderange() returns. */
|
||||
enum ruby_coderange_type {
|
||||
|
||||
/** The object's coderange is unclear yet. */
|
||||
RUBY_ENC_CODERANGE_UNKNOWN = 0,
|
||||
|
||||
/** The object holds 0 to 127 inclusive and nothing else. */
|
||||
RUBY_ENC_CODERANGE_7BIT = ((int)RUBY_FL_USER8),
|
||||
|
||||
/** The object's encoding and contents are consistent each other */
|
||||
RUBY_ENC_CODERANGE_VALID = ((int)RUBY_FL_USER9),
|
||||
|
||||
/** The object holds invalid/malformed/broken character(s). */
|
||||
RUBY_ENC_CODERANGE_BROKEN = ((int)(RUBY_FL_USER8|RUBY_FL_USER9)),
|
||||
|
||||
/** Where the coderange resides. */
|
||||
RUBY_ENC_CODERANGE_MASK = (RUBY_ENC_CODERANGE_7BIT|
|
||||
RUBY_ENC_CODERANGE_VALID|
|
||||
RUBY_ENC_CODERANGE_BROKEN)
|
||||
};
|
||||
|
||||
RBIMPL_ATTR_CONST()
|
||||
/**
|
||||
* @private
|
||||
*
|
||||
* This is an implementation detail of #RB_ENC_CODERANGE_CLEAN_P. People don't
|
||||
* use it directly.
|
||||
*
|
||||
* @param[in] cr An enum ::ruby_coderange_type.
|
||||
* @retval 1 It is.
|
||||
* @retval 0 It isn't.
|
||||
*/
|
||||
static inline int
|
||||
rb_enc_coderange_clean_p(int cr)
|
||||
{
|
||||
return (cr ^ (cr >> 1)) & RUBY_ENC_CODERANGE_7BIT;
|
||||
}
|
||||
|
||||
RBIMPL_ATTR_CONST()
|
||||
/**
|
||||
* Queries if a code range is "clean". "Clean" in this context means it is
|
||||
* known and valid.
|
||||
*
|
||||
* @param[in] cr An enum ::ruby_coderange_type.
|
||||
* @retval 1 It is.
|
||||
* @retval 0 It isn't.
|
||||
*/
|
||||
static inline bool
|
||||
RB_ENC_CODERANGE_CLEAN_P(enum ruby_coderange_type cr)
|
||||
{
|
||||
return rb_enc_coderange_clean_p(cr);
|
||||
}
|
||||
|
||||
RBIMPL_ATTR_PURE_UNLESS_DEBUG()
|
||||
/**
|
||||
* Queries the (inline) code range of the passed object. The object must be
|
||||
* capable of having inline encoding. Using this macro needs deep
|
||||
* understanding of bit level object binary layout.
|
||||
*
|
||||
* @param[in] obj Target object.
|
||||
* @return An enum ::ruby_coderange_type.
|
||||
*/
|
||||
static inline enum ruby_coderange_type
|
||||
RB_ENC_CODERANGE(VALUE obj)
|
||||
{
|
||||
VALUE ret = RB_FL_TEST_RAW(obj, RUBY_ENC_CODERANGE_MASK);
|
||||
|
||||
return RBIMPL_CAST((enum ruby_coderange_type)ret);
|
||||
}
|
||||
|
||||
RBIMPL_ATTR_PURE_UNLESS_DEBUG()
|
||||
/**
|
||||
* Queries the (inline) code range of the passed object is
|
||||
* ::RUBY_ENC_CODERANGE_7BIT. The object must be capable of having inline
|
||||
* encoding. Using this macro needs deep understanding of bit level object
|
||||
* binary layout.
|
||||
*
|
||||
* @param[in] obj Target object.
|
||||
* @retval 1 It is ascii only.
|
||||
* @retval 0 Otherwise (including cases when the range is not known).
|
||||
*/
|
||||
static inline bool
|
||||
RB_ENC_CODERANGE_ASCIIONLY(VALUE obj)
|
||||
{
|
||||
return RB_ENC_CODERANGE(obj) == RUBY_ENC_CODERANGE_7BIT;
|
||||
}
|
||||
|
||||
/**
|
||||
* Destructively modifies the passed object so that its (inline) code range is
|
||||
* the passed one. The object must be capable of having inline encoding.
|
||||
* Using this macro needs deep understanding of bit level object binary layout.
|
||||
*
|
||||
* @param[out] obj Target object.
|
||||
* @param[out] cr An enum ::ruby_coderange_type.
|
||||
* @post `obj`'s code range is `cr`.
|
||||
*/
|
||||
static inline void
|
||||
RB_ENC_CODERANGE_SET(VALUE obj, enum ruby_coderange_type cr)
|
||||
{
|
||||
RB_FL_UNSET_RAW(obj, RUBY_ENC_CODERANGE_MASK);
|
||||
RB_FL_SET_RAW(obj, cr);
|
||||
}
|
||||
|
||||
/**
|
||||
* Destructively clears the passed object's (inline) code range. The object
|
||||
* must be capable of having inline encoding. Using this macro needs deep
|
||||
* understanding of bit level object binary layout.
|
||||
*
|
||||
* @param[out] obj Target object.
|
||||
* @post `obj`'s code range is ::RUBY_ENC_CODERANGE_UNKNOWN.
|
||||
*/
|
||||
static inline void
|
||||
RB_ENC_CODERANGE_CLEAR(VALUE obj)
|
||||
{
|
||||
RB_FL_UNSET_RAW(obj, RUBY_ENC_CODERANGE_MASK);
|
||||
}
|
||||
|
||||
RBIMPL_ATTR_CONST()
|
||||
/* assumed ASCII compatibility */
|
||||
/**
|
||||
* "Mix" two code ranges into one. This is handy for instance when you
|
||||
* concatenate two strings into one. Consider one of then is valid but the
|
||||
* other isn't. The result must be invalid. This macro computes that kind of
|
||||
* mixture.
|
||||
*
|
||||
* @param[in] a An enum ::ruby_coderange_type.
|
||||
* @param[in] b Another enum ::ruby_coderange_type.
|
||||
* @return The `a` "and" `b`.
|
||||
*/
|
||||
static inline enum ruby_coderange_type
|
||||
RB_ENC_CODERANGE_AND(enum ruby_coderange_type a, enum ruby_coderange_type b)
|
||||
{
|
||||
if (a == RUBY_ENC_CODERANGE_7BIT) {
|
||||
return b;
|
||||
}
|
||||
else if (a != RUBY_ENC_CODERANGE_VALID) {
|
||||
return RUBY_ENC_CODERANGE_UNKNOWN;
|
||||
}
|
||||
else if (b == RUBY_ENC_CODERANGE_7BIT) {
|
||||
return RUBY_ENC_CODERANGE_VALID;
|
||||
}
|
||||
else {
|
||||
return b;
|
||||
}
|
||||
}
|
||||
|
||||
#define ENC_CODERANGE_MASK RUBY_ENC_CODERANGE_MASK /**< @old{RUBY_ENC_CODERANGE_MASK} */
|
||||
#define ENC_CODERANGE_UNKNOWN RUBY_ENC_CODERANGE_UNKNOWN /**< @old{RUBY_ENC_CODERANGE_UNKNOWN} */
|
||||
#define ENC_CODERANGE_7BIT RUBY_ENC_CODERANGE_7BIT /**< @old{RUBY_ENC_CODERANGE_7BIT} */
|
||||
#define ENC_CODERANGE_VALID RUBY_ENC_CODERANGE_VALID /**< @old{RUBY_ENC_CODERANGE_VALID} */
|
||||
#define ENC_CODERANGE_BROKEN RUBY_ENC_CODERANGE_BROKEN /**< @old{RUBY_ENC_CODERANGE_BROKEN} */
|
||||
#define ENC_CODERANGE_CLEAN_P(cr) RB_ENC_CODERANGE_CLEAN_P(cr) /**< @old{RB_ENC_CODERANGE_CLEAN_P} */
|
||||
#define ENC_CODERANGE(obj) RB_ENC_CODERANGE(obj) /**< @old{RB_ENC_CODERANGE} */
|
||||
#define ENC_CODERANGE_ASCIIONLY(obj) RB_ENC_CODERANGE_ASCIIONLY(obj) /**< @old{RB_ENC_CODERANGE_ASCIIONLY} */
|
||||
#define ENC_CODERANGE_SET(obj,cr) RB_ENC_CODERANGE_SET(obj,cr) /**< @old{RB_ENC_CODERANGE_SET} */
|
||||
#define ENC_CODERANGE_CLEAR(obj) RB_ENC_CODERANGE_CLEAR(obj) /**< @old{RB_ENC_CODERANGE_CLEAR} */
|
||||
#define ENC_CODERANGE_AND(a, b) RB_ENC_CODERANGE_AND(a, b) /**< @old{RB_ENC_CODERANGE_AND} */
|
||||
#define ENCODING_CODERANGE_SET(obj, encindex, cr) RB_ENCODING_CODERANGE_SET(obj, encindex, cr) /**< @old{RB_ENCODING_CODERANGE_SET} */
|
||||
|
||||
/** @cond INTERNAL_MACRO */
|
||||
#define RB_ENC_CODERANGE RB_ENC_CODERANGE
|
||||
#define RB_ENC_CODERANGE_AND RB_ENC_CODERANGE_AND
|
||||
#define RB_ENC_CODERANGE_ASCIIONLY RB_ENC_CODERANGE_ASCIIONLY
|
||||
#define RB_ENC_CODERANGE_CLEAN_P RB_ENC_CODERANGE_CLEAN_P
|
||||
#define RB_ENC_CODERANGE_CLEAR RB_ENC_CODERANGE_CLEAR
|
||||
#define RB_ENC_CODERANGE_SET RB_ENC_CODERANGE_SET
|
||||
/** @endcond */
|
||||
|
||||
RBIMPL_SYMBOL_EXPORT_END()
|
||||
|
||||
#endif /* RUBY_INTERNAL_ENCODING_CODERANGE_H */
|
||||
258
libs/libruby/ruby/internal/encoding/ctype.h
vendored
Normal file
258
libs/libruby/ruby/internal/encoding/ctype.h
vendored
Normal file
@@ -0,0 +1,258 @@
|
||||
#ifndef RUBY_INTERNAL_ENCODING_CTYPE_H /*-*-C++-*-vi:se ft=cpp:*/
|
||||
#define RUBY_INTERNAL_ENCODING_CTYPE_H
|
||||
/**
|
||||
* @file
|
||||
* @author Ruby developers <ruby-core@ruby-lang.org>
|
||||
* @copyright This file is a part of the programming language Ruby.
|
||||
* Permission is hereby granted, to either redistribute and/or
|
||||
* modify this file, provided that the conditions mentioned in the
|
||||
* file COPYING are met. Consult the file for details.
|
||||
* @warning Symbols prefixed with either `RBIMPL` or `rbimpl` are
|
||||
* implementation details. Don't take them as canon. They could
|
||||
* rapidly appear then vanish. The name (path) of this header file
|
||||
* is also an implementation detail. Do not expect it to persist
|
||||
* at the place it is now. Developers are free to move it anywhere
|
||||
* anytime at will.
|
||||
* @note To ruby-core: remember that this header can be possibly
|
||||
* recursively included from extension libraries written in C++.
|
||||
* Do not expect for instance `__VA_ARGS__` is always available.
|
||||
* We assume C99 for ruby itself but we don't assume languages of
|
||||
* extension libraries. They could be written in C++98.
|
||||
* @brief Routines to query chacater types.
|
||||
*/
|
||||
|
||||
#include "ruby/onigmo.h"
|
||||
#include "ruby/internal/attr/const.h"
|
||||
#include "ruby/internal/dllexport.h"
|
||||
#include "ruby/internal/encoding/encoding.h"
|
||||
#include "ruby/internal/value.h"
|
||||
|
||||
RBIMPL_SYMBOL_EXPORT_BEGIN()
|
||||
|
||||
/**
|
||||
* Queries if the passed pointer points to a newline character. What is a
|
||||
* newline and what is not depends on the passed encoding.
|
||||
*
|
||||
* @param[in] p Pointer to a possibly-middle of a character.
|
||||
* @param[in] end End of the string.
|
||||
* @param[in] enc Encoding.
|
||||
* @retval false It isn't.
|
||||
* @retval true It is.
|
||||
*/
|
||||
static inline bool
|
||||
rb_enc_is_newline(const char *p, const char *e, rb_encoding *enc)
|
||||
{
|
||||
OnigUChar *up = RBIMPL_CAST((OnigUChar *)p);
|
||||
OnigUChar *ue = RBIMPL_CAST((OnigUChar *)e);
|
||||
|
||||
return ONIGENC_IS_MBC_NEWLINE(enc, up, ue);
|
||||
}
|
||||
|
||||
/**
|
||||
* Queries if the passed code point is of passed character type in the passed
|
||||
* encoding. The "character type" here is a set of macros defined in onigmo.h,
|
||||
* like `ONIGENC_CTYPE_PUNCT`.
|
||||
*
|
||||
* @param[in] c An `OnigCodePoint` value.
|
||||
* @param[in] t An `OnigCtype` value.
|
||||
* @param[in] enc A `rb_encoding*` value.
|
||||
* @retval true `c` is of `t` in `enc`.
|
||||
* @retval false Otherwise.
|
||||
*/
|
||||
static inline bool
|
||||
rb_enc_isctype(OnigCodePoint c, OnigCtype t, rb_encoding *enc)
|
||||
{
|
||||
return ONIGENC_IS_CODE_CTYPE(enc, c, t);
|
||||
}
|
||||
|
||||
/**
|
||||
* Identical to rb_isascii(), except it additionally takes an encoding.
|
||||
*
|
||||
* @param[in] c A code point.
|
||||
* @param[in] enc An encoding.
|
||||
* @retval false `c` is out of range of ASCII character set in `enc`.
|
||||
* @retval true Otherwise.
|
||||
*
|
||||
* @internal
|
||||
*
|
||||
* `enc` is ignored. This is at least an intentional implementation detail
|
||||
* (not a bug). But there could be rooms for future extensions.
|
||||
*/
|
||||
static inline bool
|
||||
rb_enc_isascii(OnigCodePoint c, rb_encoding *enc)
|
||||
{
|
||||
return ONIGENC_IS_CODE_ASCII(c);
|
||||
}
|
||||
|
||||
/**
|
||||
* Identical to rb_isalpha(), except it additionally takes an encoding.
|
||||
*
|
||||
* @param[in] c A code point.
|
||||
* @param[in] enc An encoding.
|
||||
* @retval true `enc` classifies `c` as "ALPHA".
|
||||
* @retval false Otherwise.
|
||||
*/
|
||||
static inline bool
|
||||
rb_enc_isalpha(OnigCodePoint c, rb_encoding *enc)
|
||||
{
|
||||
return ONIGENC_IS_CODE_ALPHA(enc, c);
|
||||
}
|
||||
|
||||
/**
|
||||
* Identical to rb_islower(), except it additionally takes an encoding.
|
||||
*
|
||||
* @param[in] c A code point.
|
||||
* @param[in] enc An encoding.
|
||||
* @retval true `enc` classifies `c` as "LOWER".
|
||||
* @retval false Otherwise.
|
||||
*/
|
||||
static inline bool
|
||||
rb_enc_islower(OnigCodePoint c, rb_encoding *enc)
|
||||
{
|
||||
return ONIGENC_IS_CODE_LOWER(enc, c);
|
||||
}
|
||||
|
||||
/**
|
||||
* Identical to rb_isupper(), except it additionally takes an encoding.
|
||||
*
|
||||
* @param[in] c A code point.
|
||||
* @param[in] enc An encoding.
|
||||
* @retval true `enc` classifies `c` as "UPPER".
|
||||
* @retval false Otherwise.
|
||||
*/
|
||||
static inline bool
|
||||
rb_enc_isupper(OnigCodePoint c, rb_encoding *enc)
|
||||
{
|
||||
return ONIGENC_IS_CODE_UPPER(enc, c);
|
||||
}
|
||||
|
||||
/**
|
||||
* Identical to rb_iscntrl(), except it additionally takes an encoding.
|
||||
*
|
||||
* @param[in] c A code point.
|
||||
* @param[in] enc An encoding.
|
||||
* @retval true `enc` classifies `c` as "CNTRL".
|
||||
* @retval false Otherwise.
|
||||
*/
|
||||
static inline bool
|
||||
rb_enc_iscntrl(OnigCodePoint c, rb_encoding *enc)
|
||||
{
|
||||
return ONIGENC_IS_CODE_CNTRL(enc, c);
|
||||
}
|
||||
|
||||
/**
|
||||
* Identical to rb_ispunct(), except it additionally takes an encoding.
|
||||
*
|
||||
* @param[in] c A code point.
|
||||
* @param[in] enc An encoding.
|
||||
* @retval true `enc` classifies `c` as "PUNCT".
|
||||
* @retval false Otherwise.
|
||||
*/
|
||||
static inline bool
|
||||
rb_enc_ispunct(OnigCodePoint c, rb_encoding *enc)
|
||||
{
|
||||
return ONIGENC_IS_CODE_PUNCT(enc, c);
|
||||
}
|
||||
|
||||
/**
|
||||
* Identical to rb_isalnum(), except it additionally takes an encoding.
|
||||
*
|
||||
* @param[in] c A code point.
|
||||
* @param[in] enc An encoding.
|
||||
* @retval true `enc` classifies `c` as "ANUM".
|
||||
* @retval false Otherwise.
|
||||
*/
|
||||
static inline bool
|
||||
rb_enc_isalnum(OnigCodePoint c, rb_encoding *enc)
|
||||
{
|
||||
return ONIGENC_IS_CODE_ALNUM(enc, c);
|
||||
}
|
||||
|
||||
/**
|
||||
* Identical to rb_isprint(), except it additionally takes an encoding.
|
||||
*
|
||||
* @param[in] c A code point.
|
||||
* @param[in] enc An encoding.
|
||||
* @retval true `enc` classifies `c` as "PRINT".
|
||||
* @retval false Otherwise.
|
||||
*/
|
||||
static inline bool
|
||||
rb_enc_isprint(OnigCodePoint c, rb_encoding *enc)
|
||||
{
|
||||
return ONIGENC_IS_CODE_PRINT(enc, c);
|
||||
}
|
||||
|
||||
/**
|
||||
* Identical to rb_isspace(), except it additionally takes an encoding.
|
||||
*
|
||||
* @param[in] c A code point.
|
||||
* @param[in] enc An encoding.
|
||||
* @retval true `enc` classifies `c` as "PRINT".
|
||||
* @retval false Otherwise.
|
||||
*/
|
||||
static inline bool
|
||||
rb_enc_isspace(OnigCodePoint c, rb_encoding *enc)
|
||||
{
|
||||
return ONIGENC_IS_CODE_SPACE(enc, c);
|
||||
}
|
||||
|
||||
/**
|
||||
* Identical to rb_isdigit(), except it additionally takes an encoding.
|
||||
*
|
||||
* @param[in] c A code point.
|
||||
* @param[in] enc An encoding.
|
||||
* @retval true `enc` classifies `c` as "DIGIT".
|
||||
* @retval false Otherwise.
|
||||
*/
|
||||
static inline bool
|
||||
rb_enc_isdigit(OnigCodePoint c, rb_encoding *enc)
|
||||
{
|
||||
return ONIGENC_IS_CODE_DIGIT(enc, c);
|
||||
}
|
||||
|
||||
RBIMPL_ATTR_CONST()
|
||||
/**
|
||||
* Identical to rb_toupper(), except it additionally takes an encoding.
|
||||
*
|
||||
* @param[in] c A code point.
|
||||
* @param[in] enc An encoding.
|
||||
* @return `c`'s (Ruby's definition of) upper case counterpart.
|
||||
*
|
||||
* @internal
|
||||
*
|
||||
* As `RBIMPL_ATTR_CONST` implies this function ignores `enc`.
|
||||
*/
|
||||
int rb_enc_toupper(int c, rb_encoding *enc);
|
||||
|
||||
RBIMPL_ATTR_CONST()
|
||||
/**
|
||||
* Identical to rb_tolower(), except it additionally takes an encoding.
|
||||
*
|
||||
* @param[in] c A code point.
|
||||
* @param[in] enc An encoding.
|
||||
* @return `c`'s (Ruby's definition of) lower case counterpart.
|
||||
*
|
||||
* @internal
|
||||
*
|
||||
* As `RBIMPL_ATTR_CONST` implies this function ignores `enc`.
|
||||
*/
|
||||
int rb_enc_tolower(int c, rb_encoding *enc);
|
||||
|
||||
RBIMPL_SYMBOL_EXPORT_END()
|
||||
|
||||
/** @cond INTERNAL_MACRO */
|
||||
#define rb_enc_is_newline rb_enc_is_newline
|
||||
#define rb_enc_isalnum rb_enc_isalnum
|
||||
#define rb_enc_isalpha rb_enc_isalpha
|
||||
#define rb_enc_isascii rb_enc_isascii
|
||||
#define rb_enc_isctype rb_enc_isctype
|
||||
#define rb_enc_isdigit rb_enc_isdigit
|
||||
#define rb_enc_islower rb_enc_islower
|
||||
#define rb_enc_isprint rb_enc_isprint
|
||||
#define rb_enc_iscntrl rb_enc_iscntrl
|
||||
#define rb_enc_ispunct rb_enc_ispunct
|
||||
#define rb_enc_isspace rb_enc_isspace
|
||||
#define rb_enc_isupper rb_enc_isupper
|
||||
/** @endcond */
|
||||
|
||||
#endif /* RUBY_INTERNAL_ENCODING_CTYPE_H */
|
||||
1060
libs/libruby/ruby/internal/encoding/encoding.h
vendored
Normal file
1060
libs/libruby/ruby/internal/encoding/encoding.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
184
libs/libruby/ruby/internal/encoding/pathname.h
vendored
Normal file
184
libs/libruby/ruby/internal/encoding/pathname.h
vendored
Normal file
@@ -0,0 +1,184 @@
|
||||
#ifndef RUBY_INTERNAL_ENCODING_PATHNAME_H /*-*-C++-*-vi:se ft=cpp:*/
|
||||
#define RUBY_INTERNAL_ENCODING_PATHNAME_H
|
||||
/**
|
||||
* @file
|
||||
* @author Ruby developers <ruby-core@ruby-lang.org>
|
||||
* @copyright This file is a part of the programming language Ruby.
|
||||
* Permission is hereby granted, to either redistribute and/or
|
||||
* modify this file, provided that the conditions mentioned in the
|
||||
* file COPYING are met. Consult the file for details.
|
||||
* @warning Symbols prefixed with either `RBIMPL` or `rbimpl` are
|
||||
* implementation details. Don't take them as canon. They could
|
||||
* rapidly appear then vanish. The name (path) of this header file
|
||||
* is also an implementation detail. Do not expect it to persist
|
||||
* at the place it is now. Developers are free to move it anywhere
|
||||
* anytime at will.
|
||||
* @note To ruby-core: remember that this header can be possibly
|
||||
* recursively included from extension libraries written in C++.
|
||||
* Do not expect for instance `__VA_ARGS__` is always available.
|
||||
* We assume C99 for ruby itself but we don't assume languages of
|
||||
* extension libraries. They could be written in C++98.
|
||||
* @brief Routines to manipulate encodings of pathnames.
|
||||
*/
|
||||
|
||||
#include "ruby/internal/attr/nonnull.h"
|
||||
#include "ruby/internal/dllexport.h"
|
||||
#include "ruby/internal/encoding/encoding.h"
|
||||
#include "ruby/internal/value.h"
|
||||
|
||||
RBIMPL_SYMBOL_EXPORT_BEGIN()
|
||||
RBIMPL_ATTR_NONNULL(())
|
||||
/**
|
||||
* Returns a path component directly adjacent to the passed pointer.
|
||||
*
|
||||
* ```
|
||||
* "/multi/byte/encoded/pathname.txt"
|
||||
* ^ ^ ^
|
||||
* | | +--- end
|
||||
* | +--- @return
|
||||
* +--- path
|
||||
* ```
|
||||
*
|
||||
* @param[in] path Where to start scanning.
|
||||
* @param[in] end End of the path string.
|
||||
* @param[in] enc Encoding of the string.
|
||||
* @return A pointer in the passed string where the next path component
|
||||
* resides, or `end` if there is no next path component.
|
||||
*/
|
||||
char *rb_enc_path_next(const char *path, const char *end, rb_encoding *enc);
|
||||
|
||||
RBIMPL_ATTR_NONNULL(())
|
||||
/**
|
||||
* Seeks for non-prefix part of a pathname. This can be a no-op when the OS
|
||||
* has no such concept like a path prefix. But there are OSes where path
|
||||
* prefixes do exist.
|
||||
*
|
||||
* ```
|
||||
* "C:\multi\byte\encoded\pathname.txt"
|
||||
* ^ ^ ^
|
||||
* | | +--- end
|
||||
* | +--- @return
|
||||
* +--- path
|
||||
* ```
|
||||
*
|
||||
* @param[in] path Where to start scanning.
|
||||
* @param[in] end End of the path string.
|
||||
* @param[in] enc Encoding of the string.
|
||||
* @return A pointer in the passed string where non-prefix part starts, or
|
||||
* `path` if the OS does not have path prefix.
|
||||
*/
|
||||
char *rb_enc_path_skip_prefix(const char *path, const char *end, rb_encoding *enc);
|
||||
|
||||
RBIMPL_ATTR_NONNULL(())
|
||||
/**
|
||||
* Returns the last path component.
|
||||
*
|
||||
* ```
|
||||
* "/multi/byte/encoded/pathname.txt"
|
||||
* ^ ^ ^
|
||||
* | | +--- end
|
||||
* | +--- @return
|
||||
* +--- path
|
||||
* ```
|
||||
*
|
||||
* @param[in] path Where to start scanning.
|
||||
* @param[in] end End of the path string.
|
||||
* @param[in] enc Encoding of the string.
|
||||
* @return A pointer in the passed string where the last path component
|
||||
* resides, or `end` if there is no more path component.
|
||||
*/
|
||||
char *rb_enc_path_last_separator(const char *path, const char *end, rb_encoding *enc);
|
||||
|
||||
RBIMPL_ATTR_NONNULL(())
|
||||
/**
|
||||
* This just returns the passed end basically. It makes difference in case the
|
||||
* passed string ends with tons of path separators like the following:
|
||||
*
|
||||
* ```
|
||||
* "/path/that/ends/with/lots/of/slashes//////////////"
|
||||
* ^ ^ ^
|
||||
* | | +--- end
|
||||
* | +--- @return
|
||||
* +--- path
|
||||
* ```
|
||||
*
|
||||
* @param[in] path Where to start scanning.
|
||||
* @param[in] end End of the path string.
|
||||
* @param[in] enc Encoding of the string.
|
||||
* @return A pointer in the passed string where the trailing path
|
||||
* separators start, or `end` if there is no trailing path
|
||||
* separators.
|
||||
*
|
||||
* @internal
|
||||
*
|
||||
* It seems this function was introduced to mimic what POSIX says about
|
||||
* `basename(3)`.
|
||||
*/
|
||||
char *rb_enc_path_end(const char *path, const char *end, rb_encoding *enc);
|
||||
|
||||
RBIMPL_ATTR_NONNULL((1, 4))
|
||||
/**
|
||||
* Our own encoding-aware version of `basename(3)`. Normally, this function
|
||||
* returns the last path component of the given name. However in case the
|
||||
* passed name ends with a path separator, it returns the name of the
|
||||
* directory, not the last (empty) component. Also if the passed name is a
|
||||
* root directory, it returns that root directory. Note however that Windows
|
||||
* filesystem have drive letters, which this function does not return.
|
||||
*
|
||||
* @param[in] name Target path.
|
||||
* @param[out] baselen Return buffer.
|
||||
* @param[in,out] alllen Number of bytes of `name`.
|
||||
* @param[enc] enc Encoding of `name`.
|
||||
* @return The rightmost component of `name`.
|
||||
* @post `baselen`, if passed, is updated to be the number of bytes
|
||||
* of the returned basename.
|
||||
* @post `alllen`, if passed, is updated to be the number of bytes of
|
||||
* strings not considered as the basename.
|
||||
*/
|
||||
const char *ruby_enc_find_basename(const char *name, long *baselen, long *alllen, rb_encoding *enc);
|
||||
|
||||
RBIMPL_ATTR_NONNULL((1, 3))
|
||||
/**
|
||||
* Our own encoding-aware version of `extname`. This function first applies
|
||||
* rb_enc_path_last_separator() to the passed name and only concerns its return
|
||||
* value (ignores any parent directories). This function returns complicated
|
||||
* results:
|
||||
*
|
||||
* ```CXX
|
||||
* auto path = "...";
|
||||
* auto len = strlen(path);
|
||||
* auto ret = ruby_enc_find_extname(path, &len, rb_ascii8bit_encoding());
|
||||
*
|
||||
* switch(len) {
|
||||
* case 0:
|
||||
* if (ret == 0) {
|
||||
* // `path` is a file without extensions.
|
||||
* }
|
||||
* else {
|
||||
* // `path` is a dotfile.
|
||||
* // `ret` is the file's name.
|
||||
* }
|
||||
* break;
|
||||
*
|
||||
* case 1:
|
||||
* // `path` _ends_ with a dot.
|
||||
* // `ret` is that dot.
|
||||
* break;
|
||||
*
|
||||
* default:
|
||||
* // `path` has an extension.
|
||||
* // `ret` is that extension.
|
||||
* }
|
||||
* ```
|
||||
*
|
||||
* @param[in] name Target path.
|
||||
* @param[in,out] len Number of bytes of `name`.
|
||||
* @param[in] enc Encoding of `name`.
|
||||
* @return See above.
|
||||
* @post `len`, if passed, is updated (see above).
|
||||
*/
|
||||
const char *ruby_enc_find_extname(const char *name, long *len, rb_encoding *enc);
|
||||
|
||||
RBIMPL_SYMBOL_EXPORT_END()
|
||||
|
||||
#endif /* RUBY_INTERNAL_ENCODING_PATHNAME_H */
|
||||
46
libs/libruby/ruby/internal/encoding/re.h
vendored
Normal file
46
libs/libruby/ruby/internal/encoding/re.h
vendored
Normal file
@@ -0,0 +1,46 @@
|
||||
#ifndef RUBY_INTERNAL_ENCODING_RE_H /*-*-C++-*-vi:se ft=cpp:*/
|
||||
#define RUBY_INTERNAL_ENCODING_RE_H
|
||||
/**
|
||||
* @file
|
||||
* @author Ruby developers <ruby-core@ruby-lang.org>
|
||||
* @copyright This file is a part of the programming language Ruby.
|
||||
* Permission is hereby granted, to either redistribute and/or
|
||||
* modify this file, provided that the conditions mentioned in the
|
||||
* file COPYING are met. Consult the file for details.
|
||||
* @warning Symbols prefixed with either `RBIMPL` or `rbimpl` are
|
||||
* implementation details. Don't take them as canon. They could
|
||||
* rapidly appear then vanish. The name (path) of this header file
|
||||
* is also an implementation detail. Do not expect it to persist
|
||||
* at the place it is now. Developers are free to move it anywhere
|
||||
* anytime at will.
|
||||
* @note To ruby-core: remember that this header can be possibly
|
||||
* recursively included from extension libraries written in C++.
|
||||
* Do not expect for instance `__VA_ARGS__` is always available.
|
||||
* We assume C99 for ruby itself but we don't assume languages of
|
||||
* extension libraries. They could be written in C++98.
|
||||
* @brief Routines to manipulate encodings of symbols.
|
||||
*/
|
||||
|
||||
#include "ruby/internal/dllexport.h"
|
||||
#include "ruby/internal/encoding/encoding.h"
|
||||
#include "ruby/internal/value.h"
|
||||
|
||||
RBIMPL_SYMBOL_EXPORT_BEGIN()
|
||||
|
||||
/**
|
||||
* Identical to rb_reg_new(), except it additionally takes an encoding.
|
||||
*
|
||||
* @param[in] ptr A memory region of `len` bytes length.
|
||||
* @param[in] len Length of `ptr`, in bytes, not including the
|
||||
* terminating NUL character.
|
||||
* @param[in] enc Encoding of `ptr`.
|
||||
* @param[in] opts Options e.g. ONIG_OPTION_MULTILINE.
|
||||
* @exception rb_eRegexpError Failed to compile `ptr`.
|
||||
* @return An allocated new instance of ::rb_cRegexp, of `enc` encoding,
|
||||
* whose expression is compiled according to `ptr`.
|
||||
*/
|
||||
VALUE rb_enc_reg_new(const char *ptr, long len, rb_encoding *enc, int opts);
|
||||
|
||||
RBIMPL_SYMBOL_EXPORT_END()
|
||||
|
||||
#endif /* RUBY_INTERNAL_ENCODING_RE_H */
|
||||
78
libs/libruby/ruby/internal/encoding/sprintf.h
vendored
Normal file
78
libs/libruby/ruby/internal/encoding/sprintf.h
vendored
Normal file
@@ -0,0 +1,78 @@
|
||||
#ifndef RUBY_INTERNAL_ENCODING_SPRINTF_H /*-*-C++-*-vi:se ft=cpp:*/
|
||||
#define RUBY_INTERNAL_ENCODING_SPRINTF_H
|
||||
/**
|
||||
* @file
|
||||
* @author Ruby developers <ruby-core@ruby-lang.org>
|
||||
* @copyright This file is a part of the programming language Ruby.
|
||||
* Permission is hereby granted, to either redistribute and/or
|
||||
* modify this file, provided that the conditions mentioned in the
|
||||
* file COPYING are met. Consult the file for details.
|
||||
* @warning Symbols prefixed with either `RBIMPL` or `rbimpl` are
|
||||
* implementation details. Don't take them as canon. They could
|
||||
* rapidly appear then vanish. The name (path) of this header file
|
||||
* is also an implementation detail. Do not expect it to persist
|
||||
* at the place it is now. Developers are free to move it anywhere
|
||||
* anytime at will.
|
||||
* @note To ruby-core: remember that this header can be possibly
|
||||
* recursively included from extension libraries written in C++.
|
||||
* Do not expect for instance `__VA_ARGS__` is always available.
|
||||
* We assume C99 for ruby itself but we don't assume languages of
|
||||
* extension libraries. They could be written in C++98.
|
||||
* @brief Routines to manipulate encodings of symbols.
|
||||
*/
|
||||
#include "ruby/internal/config.h"
|
||||
#include <stdarg.h>
|
||||
#include "ruby/internal/attr/format.h"
|
||||
#include "ruby/internal/attr/nonnull.h"
|
||||
#include "ruby/internal/attr/noreturn.h"
|
||||
#include "ruby/internal/dllexport.h"
|
||||
#include "ruby/internal/encoding/encoding.h"
|
||||
#include "ruby/internal/value.h"
|
||||
|
||||
RBIMPL_SYMBOL_EXPORT_BEGIN()
|
||||
RBIMPL_ATTR_NONNULL((2))
|
||||
RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 2, 3)
|
||||
/**
|
||||
* Identical to rb_sprintf(), except it additionally takes an encoding. The
|
||||
* passed encoding rules both the incoming format specifier and the resulting
|
||||
* string.
|
||||
*
|
||||
* @param[in] enc Encoding of `fmt`.
|
||||
* @param[in] fmt A `printf`-like format specifier.
|
||||
* @param[in] ... Variadic number of contents to format.
|
||||
* @return A rendered new instance of ::rb_cString, of `enc` encoding.
|
||||
*/
|
||||
VALUE rb_enc_sprintf(rb_encoding *enc, const char *fmt, ...);
|
||||
|
||||
RBIMPL_ATTR_NONNULL((2))
|
||||
RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 2, 0)
|
||||
/**
|
||||
* Identical to rb_enc_sprintf(), except it takes a `va_list` instead of
|
||||
* variadic arguments. It can also be seen as a routine identical to
|
||||
* rb_vsprintf(), except it additionally takes an encoding.
|
||||
*
|
||||
* @param[in] enc Encoding of `fmt`.
|
||||
* @param[in] fmt A `printf`-like format specifier.
|
||||
* @param[in] ap Contents to format.
|
||||
* @return A rendered new instance of ::rb_cString, of `enc` encoding.
|
||||
*/
|
||||
VALUE rb_enc_vsprintf(rb_encoding *enc, const char *fmt, va_list ap);
|
||||
|
||||
RBIMPL_ATTR_NORETURN()
|
||||
RBIMPL_ATTR_NONNULL((3))
|
||||
RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 3, 4)
|
||||
/**
|
||||
* Identical to rb_raise(), except it additionally takes an encoding.
|
||||
*
|
||||
* @param[in] enc Encoding of the generating exception.
|
||||
* @param[in] exc A subclass of ::rb_eException.
|
||||
* @param[in] fmt Format specifier string compatible with rb_sprintf().
|
||||
* @param[in] ... Contents of the message.
|
||||
* @exception exc The specified exception.
|
||||
* @note It never returns.
|
||||
*/
|
||||
void rb_enc_raise(rb_encoding *enc, VALUE exc, const char *fmt, ...);
|
||||
|
||||
RBIMPL_SYMBOL_EXPORT_END()
|
||||
|
||||
#endif /* RUBY_INTERNAL_ENCODING_SPRINTF_H */
|
||||
346
libs/libruby/ruby/internal/encoding/string.h
vendored
Normal file
346
libs/libruby/ruby/internal/encoding/string.h
vendored
Normal file
@@ -0,0 +1,346 @@
|
||||
#ifndef RUBY_INTERNAL_ENCODING_STRING_H /*-*-C++-*-vi:se ft=cpp:*/
|
||||
#define RUBY_INTERNAL_ENCODING_STRING_H
|
||||
/**
|
||||
* @file
|
||||
* @author Ruby developers <ruby-core@ruby-lang.org>
|
||||
* @copyright This file is a part of the programming language Ruby.
|
||||
* Permission is hereby granted, to either redistribute and/or
|
||||
* modify this file, provided that the conditions mentioned in the
|
||||
* file COPYING are met. Consult the file for details.
|
||||
* @warning Symbols prefixed with either `RBIMPL` or `rbimpl` are
|
||||
* implementation details. Don't take them as canon. They could
|
||||
* rapidly appear then vanish. The name (path) of this header file
|
||||
* is also an implementation detail. Do not expect it to persist
|
||||
* at the place it is now. Developers are free to move it anywhere
|
||||
* anytime at will.
|
||||
* @note To ruby-core: remember that this header can be possibly
|
||||
* recursively included from extension libraries written in C++.
|
||||
* Do not expect for instance `__VA_ARGS__` is always available.
|
||||
* We assume C99 for ruby itself but we don't assume languages of
|
||||
* extension libraries. They could be written in C++98.
|
||||
* @brief Routines to manipulate encodings of strings.
|
||||
*/
|
||||
|
||||
#include "ruby/internal/dllexport.h"
|
||||
#include "ruby/internal/value.h"
|
||||
#include "ruby/internal/encoding/encoding.h"
|
||||
#include "ruby/internal/attr/nonnull.h"
|
||||
#include "ruby/internal/intern/string.h" /* rbimpl_strlen */
|
||||
|
||||
RBIMPL_SYMBOL_EXPORT_BEGIN()
|
||||
|
||||
/**
|
||||
* Identical to rb_enc_str_new(), except it additionally takes an encoding.
|
||||
*
|
||||
* @param[in] ptr A memory region of `len` bytes length.
|
||||
* @param[in] len Length of `ptr`, in bytes, not including the
|
||||
* terminating NUL character.
|
||||
* @param[in] enc Encoding of `ptr`.
|
||||
* @exception rb_eNoMemError Failed to allocate `len+1` bytes.
|
||||
* @exception rb_eArgError `len` is negative.
|
||||
* @return An instance of ::rb_cString, of `len` bytes length, of `enc`
|
||||
* encoding, whose contents are verbatim copy of `ptr`.
|
||||
* @pre At least `len` bytes of continuous memory region shall be
|
||||
* accessible via `ptr`.
|
||||
* @note `enc` can be a null pointer. It can also be seen as a routine
|
||||
* identical to rb_usascii_str_new() then.
|
||||
*/
|
||||
VALUE rb_enc_str_new(const char *ptr, long len, rb_encoding *enc);
|
||||
|
||||
RBIMPL_ATTR_NONNULL((1))
|
||||
/**
|
||||
* Identical to rb_enc_str_new(), except it assumes the passed pointer is a
|
||||
* pointer to a C string. It can also be seen as a routine identical to
|
||||
* rb_str_new_cstr(), except it additionally takes an encoding.
|
||||
*
|
||||
* @param[in] ptr A C string.
|
||||
* @param[in] enc Encoding of `ptr`.
|
||||
* @exception rb_eNoMemError Failed to allocate memory.
|
||||
* @return An instance of ::rb_cString, of `enc` encoding, whose contents
|
||||
* are verbatim copy of `ptr`.
|
||||
* @pre `ptr` must not be a null pointer.
|
||||
* @pre Because `ptr` is a C string it makes no sense for `enc` to be
|
||||
* something like UTF-32.
|
||||
* @note `enc` can be a null pointer. It can also be seen as a routine
|
||||
* identical to rb_usascii_str_new_cstr() then.
|
||||
*/
|
||||
VALUE rb_enc_str_new_cstr(const char *ptr, rb_encoding *enc);
|
||||
|
||||
/**
|
||||
* Identical to rb_enc_str_new(), except it takes a C string literal. It can
|
||||
* also be seen as a routine identical to rb_str_new_static(), except it
|
||||
* additionally takes an encoding.
|
||||
*
|
||||
* @param[in] ptr A C string literal.
|
||||
* @param[in] len `strlen(ptr)`.
|
||||
* @param[in] enc Encoding of `ptr`.
|
||||
* @exception rb_eArgError `len` out of range of `size_t`.
|
||||
* @pre `ptr` must be a C string constant.
|
||||
* @return An instance of ::rb_cString, of `enc` encoding, whose backend
|
||||
* storage is the passed C string literal.
|
||||
* @warning It is a very bad idea to write to a C string literal (often
|
||||
* immediate SEGV shall occur). Consider return values of this
|
||||
* function be read-only.
|
||||
* @note `enc` can be a null pointer. It can also be seen as a routine
|
||||
* identical to rb_usascii_str_new_static() then.
|
||||
*/
|
||||
VALUE rb_enc_str_new_static(const char *ptr, long len, rb_encoding *enc);
|
||||
|
||||
/**
|
||||
* Identical to rb_enc_str_new(), except it returns a "f"string. It can also
|
||||
* be seen as a routine identical to rb_interned_str(), except it additionally
|
||||
* takes an encoding.
|
||||
*
|
||||
* @param[in] ptr A memory region of `len` bytes length.
|
||||
* @param[in] len Length of `ptr`, in bytes, not including the
|
||||
* terminating NUL character.
|
||||
* @param[in] enc Encoding of `ptr`.
|
||||
* @exception rb_eArgError `len` is negative.
|
||||
* @return A found or created instance of ::rb_cString, of `len` bytes
|
||||
* length, of `enc` encoding, whose contents are identical to that
|
||||
* of `ptr`.
|
||||
* @pre At least `len` bytes of continuous memory region shall be
|
||||
* accessible via `ptr`.
|
||||
* @note `enc` can be a null pointer.
|
||||
*/
|
||||
VALUE rb_enc_interned_str(const char *ptr, long len, rb_encoding *enc);
|
||||
|
||||
RBIMPL_ATTR_NONNULL((1))
|
||||
/**
|
||||
* Identical to rb_enc_str_new_cstr(), except it returns a "f"string. It can
|
||||
* also be seen as a routine identical to rb_interned_str_cstr(), except it
|
||||
* additionally takes an encoding.
|
||||
*
|
||||
* @param[in] ptr A memory region of `len` bytes length.
|
||||
* @param[in] enc Encoding of `ptr`.
|
||||
* @return A found or created instance of ::rb_cString of `enc` encoding,
|
||||
* whose contents are identical to that of `ptr`.
|
||||
* @pre At least `len` bytes of continuous memory region shall be
|
||||
* accessible via `ptr`.
|
||||
* @note `enc` can be a null pointer.
|
||||
*/
|
||||
VALUE rb_enc_interned_str_cstr(const char *ptr, rb_encoding *enc);
|
||||
|
||||
/**
|
||||
* Counts the number of characters of the passed string, according to the
|
||||
* passed encoding. This has to be complicated. The passed string could be
|
||||
* invalid and/or broken. This routine would scan from the beginning til the
|
||||
* end, byte by byte, to seek out character boundaries. Could be super slow.
|
||||
*
|
||||
* @param[in] head Leftmost pointer to the string.
|
||||
* @param[in] tail Rightmost pointer to the string.
|
||||
* @param[in] enc Encoding of the string.
|
||||
* @return Number of characters exist in `head` .. `tail`. The definition
|
||||
* of "character" depends on the passed `enc`.
|
||||
*/
|
||||
long rb_enc_strlen(const char *head, const char *tail, rb_encoding *enc);
|
||||
|
||||
/**
|
||||
* Queries the n-th character. Like rb_enc_strlen() this function can be fast
|
||||
* or slow depending on the contents. Don't expect characters to be uniformly
|
||||
* distributed across the entire string.
|
||||
*
|
||||
* @param[in] head Leftmost pointer to the string.
|
||||
* @param[in] tail Rightmost pointer to the string.
|
||||
* @param[in] nth Requested index of characters.
|
||||
* @param[in] enc Encoding of the string.
|
||||
* @return Pointer to the first byte of the character that is `nth`
|
||||
* character ahead of `head`, or `tail` if there is no such
|
||||
* character (OOB etc). The definition of "character" depends on
|
||||
* the passed `enc`.
|
||||
*/
|
||||
char *rb_enc_nth(const char *head, const char *tail, long nth, rb_encoding *enc);
|
||||
|
||||
/**
|
||||
* Identical to rb_enc_get_index(), except the return type.
|
||||
*
|
||||
* @param[in] obj Object in question.
|
||||
* @exception rb_eTypeError `obj` is incapable of having an encoding.
|
||||
* @return `obj`'s encoding.
|
||||
*/
|
||||
VALUE rb_obj_encoding(VALUE obj);
|
||||
|
||||
/**
|
||||
* Identical to rb_str_cat(), except it additionally takes an encoding.
|
||||
*
|
||||
* @param[out] str Destination object.
|
||||
* @param[in] ptr Contents to append.
|
||||
* @param[in] len Length of `src`, in bytes.
|
||||
* @param[in] enc Encoding of `ptr`.
|
||||
* @exception rb_eArgError `len` is negative.
|
||||
* @exception rb_eEncCompatError `enc` is not compatible with `str`.
|
||||
* @return The passed `dst`.
|
||||
* @post The contents of `ptr` is copied, transcoded into `dst`'s
|
||||
* encoding, then pasted into `dst`'s end.
|
||||
*/
|
||||
VALUE rb_enc_str_buf_cat(VALUE str, const char *ptr, long len, rb_encoding *enc);
|
||||
|
||||
/**
|
||||
* Encodes the passed code point into a series of bytes.
|
||||
*
|
||||
* @param[in] code Code point.
|
||||
* @param[in] enc Target encoding scheme.
|
||||
* @exception rb_eRangeError `enc` does not glean `code`.
|
||||
* @return An instance of ::rb_cString, of `enc` encoding, whose sole
|
||||
* contents is `code` represented in `enc`.
|
||||
* @note No way to encode code points bigger than UINT_MAX.
|
||||
*
|
||||
* @internal
|
||||
*
|
||||
* In other languages, APIs like this one could be seen as the primitive
|
||||
* routines where encodings' "encode" feature are implemented. However in case
|
||||
* of Ruby this is not the primitive one. We directly manipulate encoded
|
||||
* strings. Encoding conversion routines transcode an encoded string directly
|
||||
* to another one; not via a code point array.
|
||||
*/
|
||||
VALUE rb_enc_uint_chr(unsigned int code, rb_encoding *enc);
|
||||
|
||||
/**
|
||||
* Identical to rb_external_str_new(), except it additionally takes an
|
||||
* encoding. However the whole point of rb_external_str_new() is to encode a
|
||||
* string into default external encoding. Being able to specify arbitrary
|
||||
* encoding just ruins the designed purpose the function meseems.
|
||||
*
|
||||
* @param[in] ptr A memory region of `len` bytes length.
|
||||
* @param[in] len Length of `ptr`, in bytes, not including the
|
||||
* terminating NUL character.
|
||||
* @param[in] enc Target encoding scheme.
|
||||
* @exception rb_eArgError `len` is negative.
|
||||
* @return An instance of ::rb_cString. In case encoding conversion from
|
||||
* "default internal" to `enc` is fully defined over the given
|
||||
* contents, then the return value is a string of `enc` encoding,
|
||||
* whose contents are the converted ones. Otherwise the string is
|
||||
* a junk.
|
||||
* @warning It doesn't raise on a conversion failure and silently ends up in
|
||||
* a corrupted output. You can know the failure by querying
|
||||
* `valid_encoding?` of the result object.
|
||||
*
|
||||
* @internal
|
||||
*
|
||||
* @shyouhei has no idea why this one does not follow the naming convention
|
||||
* that others obey. It seems to him that this should have been called
|
||||
* `rb_enc_external_str_new`.
|
||||
*/
|
||||
VALUE rb_external_str_new_with_enc(const char *ptr, long len, rb_encoding *enc);
|
||||
|
||||
/**
|
||||
* Identical to rb_str_export(), except it additionally takes an encoding.
|
||||
*
|
||||
* @param[in] obj Target object.
|
||||
* @param[in] enc Target encoding.
|
||||
* @exception rb_eTypeError No implicit conversion to String.
|
||||
* @return Converted ruby string of `enc` encoding.
|
||||
*/
|
||||
VALUE rb_str_export_to_enc(VALUE obj, rb_encoding *enc);
|
||||
|
||||
/**
|
||||
* Encoding conversion main routine.
|
||||
*
|
||||
* @param[in] str String to convert.
|
||||
* @param[in] from Source encoding.
|
||||
* @param[in] to Destination encoding.
|
||||
* @return A copy of `str`, with conversion from `from` to `to` applied.
|
||||
* @note `from` can be a null pointer. `str`'s encoding is taken then.
|
||||
* @note `to` can be a null pointer. No-op then.
|
||||
*/
|
||||
VALUE rb_str_conv_enc(VALUE str, rb_encoding *from, rb_encoding *to);
|
||||
|
||||
/**
|
||||
* Identical to rb_str_conv_enc(), except it additionally takes IO encoder
|
||||
* options. The extra arguments can be constructed using io_extract_modeenc()
|
||||
* etc.
|
||||
*
|
||||
* @param[in] str String to convert.
|
||||
* @param[in] from Source encoding.
|
||||
* @param[in] to Destination encoding.
|
||||
* @param[in] ecflags A set of enum ::ruby_econv_flag_type.
|
||||
* @param[in] ecopts Optional hash.
|
||||
* @return A copy of `str`, with conversion from `from` to `to` applied.
|
||||
* @note `from` can be a null pointer. `str`'s encoding is taken then.
|
||||
* @note `to` can be a null pointer. No-op then.
|
||||
* @note `ecopts` can be ::RUBY_Qnil, which is equivalent to passing an
|
||||
* empty hash.
|
||||
*/
|
||||
VALUE rb_str_conv_enc_opts(VALUE str, rb_encoding *from, rb_encoding *to, int ecflags, VALUE ecopts);
|
||||
|
||||
/**
|
||||
* Scans the passed string to collect its code range. Because a Ruby's string
|
||||
* is mutable, its contents change from time to time; so does its code range.
|
||||
* A long-lived string tends to fall back to ::RUBY_ENC_CODERANGE_UNKNOWN.
|
||||
* This API scans it and re-assigns a fine-grained code range constant.
|
||||
*
|
||||
* @param[out] str A string.
|
||||
* @return An enum ::ruby_coderange_type.
|
||||
*/
|
||||
int rb_enc_str_coderange(VALUE str);
|
||||
|
||||
/**
|
||||
* Scans the passed string until it finds something odd. Returns the number of
|
||||
* bytes scanned. As the name implies this is suitable for repeated call. One
|
||||
* of its application is `IO#readlines`. The method reads from its receiver's
|
||||
* read buffer, maybe more than once, looking for newlines. But "newline" can
|
||||
* be different among encodings. This API is used to detect broken contents to
|
||||
* properly mark them as such.
|
||||
*
|
||||
* @param[in] str String to scan.
|
||||
* @param[in] end End of `str`.
|
||||
* @param[in] enc `str`'s encoding.
|
||||
* @param[out] cr Return buffer.
|
||||
* @return Distance between `str` and first such byte where broken.
|
||||
* @post `cr` has the code range type.
|
||||
*/
|
||||
long rb_str_coderange_scan_restartable(const char *str, const char *end, rb_encoding *enc, int *cr);
|
||||
|
||||
/**
|
||||
* Queries if the passed string is "ASCII only". An ASCII only string is a
|
||||
* string who doesn't have any non-ASCII characters at all. This doesn't
|
||||
* necessarily mean the string is in ASCII encoding. For instance a String of
|
||||
* CP932 encoding can quite much be ASCII only, depending on its contents.
|
||||
*
|
||||
* @param[in] str String in question.
|
||||
* @retval 1 It doesn't have non-ASCII characters.
|
||||
* @retval 0 It has characters that are out of ASCII.
|
||||
*/
|
||||
int rb_enc_str_asciionly_p(VALUE str);
|
||||
|
||||
RBIMPL_ATTR_NONNULL(())
|
||||
/**
|
||||
* Looks for the passed string in the passed buffer.
|
||||
*
|
||||
* @param[in] x Buffer that potentially includes `y`.
|
||||
* @param[in] m Number of bytes of `x`.
|
||||
* @param[in] y Query string.
|
||||
* @param[in] n Number of bytes of `y`.
|
||||
* @param[in] enc Encoding of both `x` and `y`.
|
||||
* @retval -1 Not found.
|
||||
* @retval otherwise Found index in `x`.
|
||||
* @note This API can match at a non-character-boundary.
|
||||
*/
|
||||
long rb_memsearch(const void *x, long m, const void *y, long n, rb_encoding *enc);
|
||||
|
||||
/** @cond INTERNAL_MACRO */
|
||||
RBIMPL_ATTR_NONNULL(())
|
||||
static inline VALUE
|
||||
rbimpl_enc_str_new_cstr(const char *str, rb_encoding *enc)
|
||||
{
|
||||
long len = rbimpl_strlen(str);
|
||||
|
||||
return rb_enc_str_new_static(str, len, enc);
|
||||
}
|
||||
|
||||
#define rb_enc_str_new(str, len, enc) \
|
||||
((RBIMPL_CONSTANT_P(str) && \
|
||||
RBIMPL_CONSTANT_P(len) ? \
|
||||
rb_enc_str_new_static: \
|
||||
rb_enc_str_new) ((str), (len), (enc)))
|
||||
|
||||
#define rb_enc_str_new_cstr(str, enc) \
|
||||
((RBIMPL_CONSTANT_P(str) ? \
|
||||
rbimpl_enc_str_new_cstr : \
|
||||
rb_enc_str_new_cstr) ((str), (enc)))
|
||||
|
||||
/** @endcond */
|
||||
|
||||
RBIMPL_SYMBOL_EXPORT_END()
|
||||
|
||||
#endif /* RUBY_INTERNAL_ENCODING_STRING_H */
|
||||
100
libs/libruby/ruby/internal/encoding/symbol.h
vendored
Normal file
100
libs/libruby/ruby/internal/encoding/symbol.h
vendored
Normal file
@@ -0,0 +1,100 @@
|
||||
#ifndef RUBY_INTERNAL_ENCODING_SYMBOL_H /*-*-C++-*-vi:se ft=cpp:*/
|
||||
#define RUBY_INTERNAL_ENCODING_SYMBOL_H
|
||||
/**
|
||||
* @file
|
||||
* @author Ruby developers <ruby-core@ruby-lang.org>
|
||||
* @copyright This file is a part of the programming language Ruby.
|
||||
* Permission is hereby granted, to either redistribute and/or
|
||||
* modify this file, provided that the conditions mentioned in the
|
||||
* file COPYING are met. Consult the file for details.
|
||||
* @warning Symbols prefixed with either `RBIMPL` or `rbimpl` are
|
||||
* implementation details. Don't take them as canon. They could
|
||||
* rapidly appear then vanish. The name (path) of this header file
|
||||
* is also an implementation detail. Do not expect it to persist
|
||||
* at the place it is now. Developers are free to move it anywhere
|
||||
* anytime at will.
|
||||
* @note To ruby-core: remember that this header can be possibly
|
||||
* recursively included from extension libraries written in C++.
|
||||
* Do not expect for instance `__VA_ARGS__` is always available.
|
||||
* We assume C99 for ruby itself but we don't assume languages of
|
||||
* extension libraries. They could be written in C++98.
|
||||
* @brief Routines to manipulate encodings of symbols.
|
||||
*/
|
||||
|
||||
#include "ruby/internal/attr/nonnull.h"
|
||||
#include "ruby/internal/dllexport.h"
|
||||
#include "ruby/internal/encoding/encoding.h"
|
||||
#include "ruby/internal/value.h"
|
||||
|
||||
RBIMPL_SYMBOL_EXPORT_BEGIN()
|
||||
|
||||
/**
|
||||
* Identical to rb_intern2(), except it additionally takes an encoding.
|
||||
*
|
||||
* @param[in] name The name of the id.
|
||||
* @param[in] len Length of `name`.
|
||||
* @param[in] enc `name`'s encoding.
|
||||
* @exception rb_eRuntimeError Too many symbols.
|
||||
* @return A (possibly new) id whose value is the given name.
|
||||
* @note These days Ruby internally has two kinds of symbols
|
||||
* (static/dynamic). Symbols created using this function would
|
||||
* become static ones; i.e. would never be garbage collected. It
|
||||
* is up to you to avoid memory leaks. Think twice before using
|
||||
* it.
|
||||
*/
|
||||
ID rb_intern3(const char *name, long len, rb_encoding *enc);
|
||||
|
||||
RBIMPL_ATTR_NONNULL(())
|
||||
/**
|
||||
* Identical to rb_symname_p(), except it additionally takes an encoding.
|
||||
*
|
||||
* @param[in] str A C string to check.
|
||||
* @param[in] enc `str`'s encoding.
|
||||
* @retval 1 It is a valid symbol name.
|
||||
* @retval 0 It is invalid as a symbol name.
|
||||
*/
|
||||
int rb_enc_symname_p(const char *str, rb_encoding *enc);
|
||||
|
||||
/**
|
||||
* Identical to rb_enc_symname_p(), except it additionally takes the passed
|
||||
* string's length. This is needed for strings containing NUL bytes, like in
|
||||
* case of UTF-32.
|
||||
*
|
||||
* @param[in] name A C string to check.
|
||||
* @param[in] len Number of bytes of `str`.
|
||||
* @param[in] enc `str`'s encoding.
|
||||
* @retval 1 It is a valid symbol name.
|
||||
* @retval 0 It is invalid as a symbol name.
|
||||
*/
|
||||
int rb_enc_symname2_p(const char *name, long len, rb_encoding *enc);
|
||||
|
||||
/**
|
||||
* Identical to rb_check_id(), except it takes a pointer to a memory region
|
||||
* instead of Ruby's string.
|
||||
*
|
||||
* @param[in] ptr A pointer to a memory region.
|
||||
* @param[in] len Number of bytes of `ptr`.
|
||||
* @param[in] enc Encoding of `ptr`.
|
||||
* @exception rb_eEncodingError `ptr` contains non-ASCII according to `enc`.
|
||||
* @retval 0 No such id ever existed in the history.
|
||||
* @retval otherwise The id that represents the given name.
|
||||
*/
|
||||
ID rb_check_id_cstr(const char *ptr, long len, rb_encoding *enc);
|
||||
|
||||
/**
|
||||
* Identical to rb_check_id_cstr(), except for the return type. It can also be
|
||||
* seen as a routine identical to rb_check_symbol(), except it takes a pointer
|
||||
* to a memory region instead of Ruby's string.
|
||||
*
|
||||
* @param[in] ptr A pointer to a memory region.
|
||||
* @param[in] len Number of bytes of `ptr`.
|
||||
* @param[in] enc Encoding of `ptr`.
|
||||
* @exception rb_eEncodingError `ptr` contains non-ASCII according to `enc`.
|
||||
* @retval RUBY_Qnil No such id ever existed in the history.
|
||||
* @retval otherwise The id that represents the given name.
|
||||
*/
|
||||
VALUE rb_check_symbol_cstr(const char *ptr, long len, rb_encoding *enc);
|
||||
|
||||
RBIMPL_SYMBOL_EXPORT_END()
|
||||
|
||||
#endif /* RUBY_INTERNAL_ENCODING_SYMBOL_H */
|
||||
562
libs/libruby/ruby/internal/encoding/transcode.h
vendored
Normal file
562
libs/libruby/ruby/internal/encoding/transcode.h
vendored
Normal file
@@ -0,0 +1,562 @@
|
||||
#ifndef RUBY_INTERNAL_ENCODING_TRANSCODE_H /*-*-C++-*-vi:se ft=cpp:*/
|
||||
#define RUBY_INTERNAL_ENCODING_TRANSCODE_H
|
||||
/**
|
||||
* @file
|
||||
* @author Ruby developers <ruby-core@ruby-lang.org>
|
||||
* @copyright This file is a part of the programming language Ruby.
|
||||
* Permission is hereby granted, to either redistribute and/or
|
||||
* modify this file, provided that the conditions mentioned in the
|
||||
* file COPYING are met. Consult the file for details.
|
||||
* @warning Symbols prefixed with either `RBIMPL` or `rbimpl` are
|
||||
* implementation details. Don't take them as canon. They could
|
||||
* rapidly appear then vanish. The name (path) of this header file
|
||||
* is also an implementation detail. Do not expect it to persist
|
||||
* at the place it is now. Developers are free to move it anywhere
|
||||
* anytime at will.
|
||||
* @note To ruby-core: remember that this header can be possibly
|
||||
* recursively included from extension libraries written in C++.
|
||||
* Do not expect for instance `__VA_ARGS__` is always available.
|
||||
* We assume C99 for ruby itself but we don't assume languages of
|
||||
* extension libraries. They could be written in C++98.
|
||||
* @brief econv stuff
|
||||
*/
|
||||
|
||||
#include "ruby/internal/dllexport.h"
|
||||
#include "ruby/internal/value.h"
|
||||
|
||||
RBIMPL_SYMBOL_EXPORT_BEGIN()
|
||||
|
||||
/** return value of rb_econv_convert() */
|
||||
typedef enum {
|
||||
|
||||
/**
|
||||
* The conversion stopped when it found an invalid sequence.
|
||||
*/
|
||||
econv_invalid_byte_sequence,
|
||||
|
||||
/**
|
||||
* The conversion stopped when it found a character in the input which
|
||||
* cannot be representable in the output.
|
||||
*/
|
||||
econv_undefined_conversion,
|
||||
|
||||
/**
|
||||
* The conversion stopped because there is no destination.
|
||||
*/
|
||||
econv_destination_buffer_full,
|
||||
|
||||
/**
|
||||
* The conversion stopped because there is no input.
|
||||
*/
|
||||
econv_source_buffer_empty,
|
||||
|
||||
/**
|
||||
* The conversion stopped after converting everything. This is arguably
|
||||
* the expected normal end of conversion.
|
||||
*/
|
||||
econv_finished,
|
||||
|
||||
/**
|
||||
* The conversion stopped after writing something to somewhere, before
|
||||
* reading everything.
|
||||
*/
|
||||
econv_after_output,
|
||||
|
||||
/**
|
||||
* The conversion stopped in middle of reading a character, possibly due to
|
||||
* a partial read of a socket etc.
|
||||
*/
|
||||
econv_incomplete_input
|
||||
} rb_econv_result_t;
|
||||
|
||||
/** An opaque struct that represents a lowest level of encoding conversion. */
|
||||
typedef struct rb_econv_t rb_econv_t;
|
||||
|
||||
/**
|
||||
* Converts the contents of the passed string from its encoding to the passed
|
||||
* one.
|
||||
*
|
||||
* @param[in] str Target string.
|
||||
* @param[in] to Destination encoding.
|
||||
* @param[in] ecflags A set of enum
|
||||
* ::ruby_econv_flag_type.
|
||||
* @param[in] ecopts A keyword hash, like
|
||||
* ::rb_io_t::rb_io_enc_t::ecopts.
|
||||
* @exception rb_eArgError Not fully converted.
|
||||
* @exception rb_eInvalidByteSequenceError `str` is malformed.
|
||||
* @exception rb_eUndefinedConversionError `str` has a character not
|
||||
* representable using `to`.
|
||||
* @exception rb_eConversionNotFoundError There is no known conversion from
|
||||
* `str`'s encoding to `to`.
|
||||
* @return A string whose encoding is `to`, and whose contents is converted
|
||||
* contents of `str`.
|
||||
* @note Use rb_econv_prepare_options() to generate `ecopts`.
|
||||
*/
|
||||
VALUE rb_str_encode(VALUE str, VALUE to, int ecflags, VALUE ecopts);
|
||||
|
||||
/**
|
||||
* Queries if there is more than one way to convert between the passed two
|
||||
* encodings. Encoding conversion are has_and_belongs_to_many relationships.
|
||||
* There could be no direct conversion defined for the passed pair. Ruby tries
|
||||
* to find an indirect way to do so then. For instance ISO-8859-1 has no
|
||||
* direct conversion to ISO-2022-JP. But there is ISO-8859-1 to UTF-8
|
||||
* conversion; then there is UTF-8 to EUC-JP conversion; finally there also is
|
||||
* EUC-JP to ISO-2022-JP conversion. So in short ISO-8859-1 can be converted
|
||||
* to ISO-2022-JP using that path. This function returns true. Obviously not
|
||||
* everything that can be represented using UTF-8 can also be represented using
|
||||
* EUC-JP. Conversions in practice can fail depending on the actual input, and
|
||||
* that renders exceptions in case of rb_str_encode().
|
||||
*
|
||||
* @param[in] from_encoding One encoding.
|
||||
* @param[in] to_encoding Another encoding.
|
||||
* @retval 0 No way to convert the two.
|
||||
* @retval 1 At least one way to convert the two.
|
||||
*
|
||||
* @internal
|
||||
*
|
||||
* Practically @shyouhei knows no way for this function to return 0. It seems
|
||||
* everything can eventually be converted to/from UTF-8, which connects
|
||||
* everything.
|
||||
*/
|
||||
int rb_econv_has_convpath_p(const char* from_encoding, const char* to_encoding);
|
||||
|
||||
/**
|
||||
* Identical to rb_econv_prepare_opts(), except it additionally takes the
|
||||
* initial value of flags. The extra bits are bitwise-ORed to the return
|
||||
* value.
|
||||
*
|
||||
* @param[in] opthash Keyword arguments.
|
||||
* @param[out] ecopts Return buffer.
|
||||
* @param[in] ecflags Default set of enum ::ruby_econv_flag_type.
|
||||
* @exception rb_eArgError Unknown/Broken values passed.
|
||||
* @return Calculated set of enum ::ruby_econv_flag_type.
|
||||
* @post `ecopts` holds a hash object suitable for
|
||||
* ::rb_io_t::rb_io_enc_t::ecopts.
|
||||
*/
|
||||
int rb_econv_prepare_options(VALUE opthash, VALUE *ecopts, int ecflags);
|
||||
|
||||
/**
|
||||
* Splits a keyword arguments hash (that for instance `String#encode` took)
|
||||
* into a set of enum ::ruby_econv_flag_type and a hash storing replacement
|
||||
* characters etc.
|
||||
*
|
||||
* @param[in] opthash Keyword arguments.
|
||||
* @param[out] ecopts Return buffer.
|
||||
* @exception rb_eArgError Unknown/Broken values passed.
|
||||
* @return Calculated set of enum ::ruby_econv_flag_type.
|
||||
* @post `ecopts` holds a hash object suitable for
|
||||
* ::rb_io_t::rb_io_enc_t::ecopts.
|
||||
*/
|
||||
int rb_econv_prepare_opts(VALUE opthash, VALUE *ecopts);
|
||||
|
||||
/**
|
||||
* Creates a new instance of struct ::rb_econv_t.
|
||||
*
|
||||
* @param[in] source_encoding Name of an encoding.
|
||||
* @param[in] destination_encoding Name of another encoding.
|
||||
* @param[in] ecflags A set of enum ::ruby_econv_flag_type.
|
||||
* @exception rb_eArgError No such encoding.
|
||||
* @retval NULL Failed to create a struct ::rb_econv_t.
|
||||
* @retval otherwise Allocated struct ::rb_econv_t.
|
||||
* @warning Return value must be passed to rb_econv_close() exactly once.
|
||||
*/
|
||||
rb_econv_t *rb_econv_open(const char *source_encoding, const char *destination_encoding, int ecflags);
|
||||
|
||||
/**
|
||||
* Identical to rb_econv_open(), except it additionally takes a hash of
|
||||
* optional strings.
|
||||
*
|
||||
*
|
||||
* @param[in] source_encoding Name of an encoding.
|
||||
* @param[in] destination_encoding Name of another encoding.
|
||||
* @param[in] ecflags A set of enum ::ruby_econv_flag_type.
|
||||
* @param[in] ecopts Optional set of strings.
|
||||
* @exception rb_eArgError No such encoding.
|
||||
* @retval NULL Failed to create a struct ::rb_econv_t.
|
||||
* @retval otherwise Allocated struct ::rb_econv_t.
|
||||
* @warning Return value must be passed to rb_econv_close() exactly once.
|
||||
*/
|
||||
rb_econv_t *rb_econv_open_opts(const char *source_encoding, const char *destination_encoding, int ecflags, VALUE ecopts);
|
||||
|
||||
/**
|
||||
* Converts a string from an encoding to another.
|
||||
*
|
||||
* Possible flags are either ::RUBY_ECONV_PARTIAL_INPUT (means the source
|
||||
* buffer is a part of much larger one), ::RUBY_ECONV_AFTER_OUTPUT (instructs
|
||||
* the converter to stop after output before input), or both of them.
|
||||
*
|
||||
* @param[in,out] ec Conversion specification/state etc.
|
||||
* @param[in] source_buffer_ptr Target string.
|
||||
* @param[in] source_buffer_end End of target string.
|
||||
* @param[out] destination_buffer_ptr Return buffer.
|
||||
* @param[out] destination_buffer_end End of return buffer.
|
||||
* @param[in] flags Flags (see above).
|
||||
* @return The status of the conversion.
|
||||
* @post `destination_buffer_ptr` holds conversion results.
|
||||
*/
|
||||
rb_econv_result_t rb_econv_convert(rb_econv_t *ec,
|
||||
const unsigned char **source_buffer_ptr, const unsigned char *source_buffer_end,
|
||||
unsigned char **destination_buffer_ptr, unsigned char *destination_buffer_end,
|
||||
int flags);
|
||||
|
||||
/**
|
||||
* Destructs a converter. Note that a converter can have a buffer, and can be
|
||||
* non-empty. Calling this would lose your data then.
|
||||
*
|
||||
* @param[out] ec The converter to destroy.
|
||||
* @post `ec` is no longer a valid pointer.
|
||||
*/
|
||||
void rb_econv_close(rb_econv_t *ec);
|
||||
|
||||
/**
|
||||
* Assigns the replacement string. The string passed here would appear in
|
||||
* converted string when it cannot represent its source counterpart. This can
|
||||
* happen for instance you convert an emoji to ISO-8859-1.
|
||||
*
|
||||
* @param[out] ec Target converter.
|
||||
* @param[in] str Replacement string.
|
||||
* @param[in] len Number of bytes of `str`.
|
||||
* @param[in] encname Name of encoding of `str`.
|
||||
* @retval 0 Success.
|
||||
* @retval -1 Failure (ENOMEM etc.).
|
||||
* @post `ec`'s replacement string is set to `str`.
|
||||
*/
|
||||
int rb_econv_set_replacement(rb_econv_t *ec, const unsigned char *str, size_t len, const char *encname);
|
||||
|
||||
/**
|
||||
* "Decorate"s a converter. There are special kind of converters that
|
||||
* transforms the contents, like replacing CR into CRLF. You can add such
|
||||
* decorators to a converter using this API. By using this function a
|
||||
* decorator is prepended at the beginning of a conversion sequence: in case of
|
||||
* CRLF conversion, newlines are converted before encodings are converted.
|
||||
*
|
||||
* @param[out] ec Target converter to decorate.
|
||||
* @param[in] decorator_name Name of decorator to prepend.
|
||||
* @retval 0 Success.
|
||||
* @retval -1 Failure (no such decorator etc.).
|
||||
* @post Decorator works before encoding conversion happens.
|
||||
*
|
||||
* @internal
|
||||
*
|
||||
* What is the possible value of the `decorator_name` is not public. You have
|
||||
* to read through `transcode.c` carefully.
|
||||
*/
|
||||
int rb_econv_decorate_at_first(rb_econv_t *ec, const char *decorator_name);
|
||||
|
||||
/**
|
||||
* Identical to rb_econv_decorate_at_first(), except it adds to the opposite
|
||||
* direction. For instance CRLF conversion would run _after_ encodings are
|
||||
* converted.
|
||||
*
|
||||
* @param[out] ec Target converter to decorate.
|
||||
* @param[in] decorator_name Name of decorator to prepend.
|
||||
* @retval 0 Success.
|
||||
* @retval -1 Failure (no such decorator etc.).
|
||||
* @post Decorator works after encoding conversion happens.
|
||||
*/
|
||||
int rb_econv_decorate_at_last(rb_econv_t *ec, const char *decorator_name);
|
||||
|
||||
/**
|
||||
* Creates a `rb_eConverterNotFoundError` exception object (but does not
|
||||
* raise).
|
||||
*
|
||||
* @param[in] senc Name of source encoding.
|
||||
* @param[in] denc Name of destination encoding.
|
||||
* @param[in] ecflags A set of enum ::ruby_econv_flag_type.
|
||||
* @return An instance of `rb_eConverterNotFoundError`.
|
||||
*/
|
||||
VALUE rb_econv_open_exc(const char *senc, const char *denc, int ecflags);
|
||||
|
||||
/**
|
||||
* Appends the passed string to the passed converter's output buffer. This can
|
||||
* be handy when an encoding needs bytes out of thin air; for instance
|
||||
* ISO-2022-JP has "shift function" which does not correspond to any
|
||||
* characters.
|
||||
*
|
||||
* @param[out] ec Target converter.
|
||||
* @param[in] str String to insert.
|
||||
* @param[in] len Number of bytes of `str`.
|
||||
* @param[in] str_encoding Encoding of `str`.
|
||||
* @retval 0 Success.
|
||||
* @retval -1 Failure (conversion error etc.).
|
||||
* @note `str_encoding` can be anything, and `str` itself is converted
|
||||
* when necessary.
|
||||
*/
|
||||
int rb_econv_insert_output(rb_econv_t *ec,
|
||||
const unsigned char *str, size_t len, const char *str_encoding);
|
||||
|
||||
/**
|
||||
* Queries an encoding name which best suits for rb_econv_insert_output()'s
|
||||
* last parameter. Strings in this encoding need no conversion when inserted;
|
||||
* can be both time/space efficient.
|
||||
*
|
||||
* @param[in] ec Target converter.
|
||||
* @return Its encoding for insertion.
|
||||
*/
|
||||
const char *rb_econv_encoding_to_insert_output(rb_econv_t *ec);
|
||||
|
||||
/**
|
||||
* This is a rb_econv_make_exception() + rb_exc_raise() combo.
|
||||
*
|
||||
* @param[in] ec (Possibly failed) conversion.
|
||||
* @exception rb_eInvalidByteSequenceError Invalid byte sequence.
|
||||
* @exception rb_eUndefinedConversionError Conversion undefined.
|
||||
* @note This function can return when no error.
|
||||
*/
|
||||
void rb_econv_check_error(rb_econv_t *ec);
|
||||
|
||||
/**
|
||||
* This function makes sense right after rb_econv_convert() returns. As listed
|
||||
* in ::rb_econv_result_t, rb_econv_convert() can bail out for various reasons.
|
||||
* This function checks the passed converter's internal state and convert it to
|
||||
* an appropriate exception object.
|
||||
*
|
||||
* @param[in] ec Target converter.
|
||||
* @retval RUBY_Qnil The converter has no error.
|
||||
* @retval otherwise Conversion error turned into an exception.
|
||||
*/
|
||||
VALUE rb_econv_make_exception(rb_econv_t *ec);
|
||||
|
||||
/**
|
||||
* Queries if rb_econv_putback() makes sense, i.e. there are invalid byte
|
||||
* sequences remain in the buffer.
|
||||
*
|
||||
* @param[in] ec Target converter.
|
||||
* @return Number of bytes that can be pushed back.
|
||||
*/
|
||||
int rb_econv_putbackable(rb_econv_t *ec);
|
||||
|
||||
/**
|
||||
* Puts back the bytes. In case of ::econv_invalid_byte_sequence, some of
|
||||
* those invalid bytes are discarded and the others are buffered to be
|
||||
* converted later. The latter bytes can be put back using this API.
|
||||
*
|
||||
* @param[out] ec Target converter (invalid byte sequence).
|
||||
* @param[out] p Return buffer.
|
||||
* @param[in] n Max number of bytes to put back.
|
||||
* @post At most `n` bytes of what was put back is written to `p`.
|
||||
*/
|
||||
void rb_econv_putback(rb_econv_t *ec, unsigned char *p, int n);
|
||||
|
||||
/**
|
||||
* Queries the passed encoding's corresponding ASCII compatible encoding. "The
|
||||
* corresponding ASCII compatible encoding" in this context is an ASCII
|
||||
* compatible encoding which can represent exactly the same character sets as
|
||||
* the given ASCII incompatible encoding. For instance that of UTF-16LE is
|
||||
* UTF-8.
|
||||
*
|
||||
* @param[in] encname Name of an ASCII incompatible encoding.
|
||||
* @retval NULL `encname` is already ASCII compatible.
|
||||
* @retval otherwise The corresponding ASCII compatible encoding.
|
||||
*/
|
||||
const char *rb_econv_asciicompat_encoding(const char *encname);
|
||||
|
||||
/**
|
||||
* Identical to rb_econv_convert(), except it takes Ruby's string instead of
|
||||
* C's pointer.
|
||||
*
|
||||
* @param[in,out] ec Target converter.
|
||||
* @param[in] src Source string.
|
||||
* @param[in] flags Flags (see rb_econv_convert).
|
||||
* @exception rb_eArgError Converted string is too long.
|
||||
* @exception rb_eInvalidByteSequenceError Invalid byte sequence.
|
||||
* @exception rb_eUndefinedConversionError Conversion undefined.
|
||||
* @return The conversion result.
|
||||
*/
|
||||
VALUE rb_econv_str_convert(rb_econv_t *ec, VALUE src, int flags);
|
||||
|
||||
/**
|
||||
* Identical to rb_econv_str_convert(), except it converts only a part of the
|
||||
* passed string. Can be handy when you for instance want to do line-buffered
|
||||
* conversion.
|
||||
*
|
||||
* @param[in,out] ec Target converter.
|
||||
* @param[in] src Source string.
|
||||
* @param[in] byteoff Number of bytes to seek.
|
||||
* @param[in] bytesize Number of bytes to read.
|
||||
* @param[in] flags Flags (see rb_econv_convert).
|
||||
* @exception rb_eArgError Converted string is too long.
|
||||
* @exception rb_eInvalidByteSequenceError Invalid byte sequence.
|
||||
* @exception rb_eUndefinedConversionError Conversion undefined.
|
||||
* @return The conversion result.
|
||||
*/
|
||||
VALUE rb_econv_substr_convert(rb_econv_t *ec, VALUE src, long byteoff, long bytesize, int flags);
|
||||
|
||||
/**
|
||||
* Identical to rb_econv_str_convert(), except it appends the conversion result
|
||||
* to the additionally passed string instead of creating a new string. It can
|
||||
* also be seen as a routine identical to rb_econv_append(), except it takes a
|
||||
* Ruby's string instead of C's pointer.
|
||||
*
|
||||
* @param[in,out] ec Target converter.
|
||||
* @param[in] src Source string.
|
||||
* @param[in] dst Return buffer.
|
||||
* @param[in] flags Flags (see rb_econv_convert).
|
||||
* @exception rb_eArgError Converted string is too long.
|
||||
* @exception rb_eInvalidByteSequenceError Invalid byte sequence.
|
||||
* @exception rb_eUndefinedConversionError Conversion undefined.
|
||||
* @return The conversion result.
|
||||
*/
|
||||
VALUE rb_econv_str_append(rb_econv_t *ec, VALUE src, VALUE dst, int flags);
|
||||
|
||||
/**
|
||||
* Identical to rb_econv_str_append(), except it appends only a part of the
|
||||
* passed string with conversion. It can also be seen as a routine identical
|
||||
* to rb_econv_substr_convert(), except it appends the conversion result to the
|
||||
* additionally passed string instead of creating a new string.
|
||||
*
|
||||
* @param[in,out] ec Target converter.
|
||||
* @param[in] src Source string.
|
||||
* @param[in] byteoff Number of bytes to seek.
|
||||
* @param[in] bytesize Number of bytes to read.
|
||||
* @param[in] dst Return buffer.
|
||||
* @param[in] flags Flags (see rb_econv_convert).
|
||||
* @exception rb_eArgError Converted string is too long.
|
||||
* @exception rb_eInvalidByteSequenceError Invalid byte sequence.
|
||||
* @exception rb_eUndefinedConversionError Conversion undefined.
|
||||
* @return The conversion result.
|
||||
*/
|
||||
VALUE rb_econv_substr_append(rb_econv_t *ec, VALUE src, long byteoff, long bytesize, VALUE dst, int flags);
|
||||
|
||||
/**
|
||||
* Converts the passed C's pointer according to the passed converter, then
|
||||
* append the conversion result to the passed Ruby's string. This way buffer
|
||||
* overflow is properly avoided to resize the destination properly.
|
||||
*
|
||||
* @param[in,out] ec Target converter.
|
||||
* @param[in] bytesrc Target string.
|
||||
* @param[in] bytesize Number of bytes of `bytesrc`.
|
||||
* @param[in] dst Return buffer.
|
||||
* @param[in] flags Flags (see rb_econv_convert).
|
||||
* @exception rb_eArgError Converted string is too long.
|
||||
* @exception rb_eInvalidByteSequenceError Invalid byte sequence.
|
||||
* @exception rb_eUndefinedConversionError Conversion undefined.
|
||||
* @return The conversion result.
|
||||
*/
|
||||
VALUE rb_econv_append(rb_econv_t *ec, const char *bytesrc, long bytesize, VALUE dst, int flags);
|
||||
|
||||
/**
|
||||
* This badly named function does not set the destination encoding to binary,
|
||||
* but instead just nullifies newline conversion decorators if any. Other
|
||||
* ordinal character conversions still happen after this; something non-binary
|
||||
* would still be generated.
|
||||
*
|
||||
* @param[out] ec Target converter to modify.
|
||||
* @post Any newline conversions, if any, would be killed.
|
||||
*/
|
||||
void rb_econv_binmode(rb_econv_t *ec);
|
||||
|
||||
/**
|
||||
* This enum is kind of omnibus. Gathers various constants.
|
||||
*/
|
||||
enum ruby_econv_flag_type {
|
||||
|
||||
/**
|
||||
* @name Flags for rb_econv_open()
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
|
||||
/** Mask for error handling related bits. */
|
||||
RUBY_ECONV_ERROR_HANDLER_MASK = 0x000000ff,
|
||||
|
||||
/** Special handling of invalid sequences are there. */
|
||||
RUBY_ECONV_INVALID_MASK = 0x0000000f,
|
||||
|
||||
/** Invalid sequences shall be replaced. */
|
||||
RUBY_ECONV_INVALID_REPLACE = 0x00000002,
|
||||
|
||||
/** Special handling of undefined conversion are there. */
|
||||
RUBY_ECONV_UNDEF_MASK = 0x000000f0,
|
||||
|
||||
/** Undefined characters shall be replaced. */
|
||||
RUBY_ECONV_UNDEF_REPLACE = 0x00000020,
|
||||
|
||||
/** Undefined characters shall be escaped. */
|
||||
RUBY_ECONV_UNDEF_HEX_CHARREF = 0x00000030,
|
||||
|
||||
/** Decorators are there. */
|
||||
RUBY_ECONV_DECORATOR_MASK = 0x0001ff00,
|
||||
|
||||
/** Newline converters are there. */
|
||||
RUBY_ECONV_NEWLINE_DECORATOR_MASK = 0x00007f00,
|
||||
|
||||
/** (Unclear; seems unused). */
|
||||
RUBY_ECONV_NEWLINE_DECORATOR_READ_MASK = 0x00000f00,
|
||||
|
||||
/** (Unclear; seems unused). */
|
||||
RUBY_ECONV_NEWLINE_DECORATOR_WRITE_MASK = 0x00007000,
|
||||
|
||||
/** Universal newline mode. */
|
||||
RUBY_ECONV_UNIVERSAL_NEWLINE_DECORATOR = 0x00000100,
|
||||
|
||||
/** CR to CRLF conversion shall happen. */
|
||||
RUBY_ECONV_CRLF_NEWLINE_DECORATOR = 0x00001000,
|
||||
|
||||
/** CRLF to CR conversion shall happen. */
|
||||
RUBY_ECONV_CR_NEWLINE_DECORATOR = 0x00002000,
|
||||
|
||||
/** CRLF to LF conversion shall happen. */
|
||||
RUBY_ECONV_LF_NEWLINE_DECORATOR = 0x00004000,
|
||||
|
||||
/** Texts shall be XML-escaped. */
|
||||
RUBY_ECONV_XML_TEXT_DECORATOR = 0x00008000,
|
||||
|
||||
/** Texts shall be AttrValue escaped */
|
||||
RUBY_ECONV_XML_ATTR_CONTENT_DECORATOR = 0x00010000,
|
||||
|
||||
/** (Unclear; seems unused). */
|
||||
RUBY_ECONV_STATEFUL_DECORATOR_MASK = 0x00f00000,
|
||||
|
||||
/** Texts shall be AttrValue escaped. */
|
||||
RUBY_ECONV_XML_ATTR_QUOTE_DECORATOR = 0x00100000,
|
||||
|
||||
/** Newline decorator's default. */
|
||||
RUBY_ECONV_DEFAULT_NEWLINE_DECORATOR =
|
||||
#if defined(RUBY_TEST_CRLF_ENVIRONMENT) || defined(_WIN32)
|
||||
RUBY_ECONV_CRLF_NEWLINE_DECORATOR,
|
||||
#else
|
||||
0,
|
||||
#endif
|
||||
|
||||
#define ECONV_ERROR_HANDLER_MASK RUBY_ECONV_ERROR_HANDLER_MASK /**< @old{RUBY_ECONV_ERROR_HANDLER_MASK} */
|
||||
#define ECONV_INVALID_MASK RUBY_ECONV_INVALID_MASK /**< @old{RUBY_ECONV_INVALID_MASK} */
|
||||
#define ECONV_INVALID_REPLACE RUBY_ECONV_INVALID_REPLACE /**< @old{RUBY_ECONV_INVALID_REPLACE} */
|
||||
#define ECONV_UNDEF_MASK RUBY_ECONV_UNDEF_MASK /**< @old{RUBY_ECONV_UNDEF_MASK} */
|
||||
#define ECONV_UNDEF_REPLACE RUBY_ECONV_UNDEF_REPLACE /**< @old{RUBY_ECONV_UNDEF_REPLACE} */
|
||||
#define ECONV_UNDEF_HEX_CHARREF RUBY_ECONV_UNDEF_HEX_CHARREF /**< @old{RUBY_ECONV_UNDEF_HEX_CHARREF} */
|
||||
#define ECONV_DECORATOR_MASK RUBY_ECONV_DECORATOR_MASK /**< @old{RUBY_ECONV_DECORATOR_MASK} */
|
||||
#define ECONV_NEWLINE_DECORATOR_MASK RUBY_ECONV_NEWLINE_DECORATOR_MASK /**< @old{RUBY_ECONV_NEWLINE_DECORATOR_MASK} */
|
||||
#define ECONV_NEWLINE_DECORATOR_READ_MASK RUBY_ECONV_NEWLINE_DECORATOR_READ_MASK /**< @old{RUBY_ECONV_NEWLINE_DECORATOR_READ_MASK} */
|
||||
#define ECONV_NEWLINE_DECORATOR_WRITE_MASK RUBY_ECONV_NEWLINE_DECORATOR_WRITE_MASK /**< @old{RUBY_ECONV_NEWLINE_DECORATOR_WRITE_MASK} */
|
||||
#define ECONV_UNIVERSAL_NEWLINE_DECORATOR RUBY_ECONV_UNIVERSAL_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_UNIVERSAL_NEWLINE_DECORATOR} */
|
||||
#define ECONV_CRLF_NEWLINE_DECORATOR RUBY_ECONV_CRLF_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_CRLF_NEWLINE_DECORATOR} */
|
||||
#define ECONV_CR_NEWLINE_DECORATOR RUBY_ECONV_CR_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_CR_NEWLINE_DECORATOR} */
|
||||
#define ECONV_LF_NEWLINE_DECORATOR RUBY_ECONV_LF_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_LF_NEWLINE_DECORATOR} */
|
||||
#define ECONV_XML_TEXT_DECORATOR RUBY_ECONV_XML_TEXT_DECORATOR /**< @old{RUBY_ECONV_XML_TEXT_DECORATOR} */
|
||||
#define ECONV_XML_ATTR_CONTENT_DECORATOR RUBY_ECONV_XML_ATTR_CONTENT_DECORATOR /**< @old{RUBY_ECONV_XML_ATTR_CONTENT_DECORATOR} */
|
||||
#define ECONV_STATEFUL_DECORATOR_MASK RUBY_ECONV_STATEFUL_DECORATOR_MASK /**< @old{RUBY_ECONV_STATEFUL_DECORATOR_MASK} */
|
||||
#define ECONV_XML_ATTR_QUOTE_DECORATOR RUBY_ECONV_XML_ATTR_QUOTE_DECORATOR /**< @old{RUBY_ECONV_XML_ATTR_QUOTE_DECORATOR} */
|
||||
#define ECONV_DEFAULT_NEWLINE_DECORATOR RUBY_ECONV_DEFAULT_NEWLINE_DECORATOR /**< @old{RUBY_ECONV_DEFAULT_NEWLINE_DECORATOR} */
|
||||
/** @} */
|
||||
|
||||
/**
|
||||
* @name Flags for rb_econv_convert()
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
|
||||
/** Indicates the input is a part of much larger one. */
|
||||
RUBY_ECONV_PARTIAL_INPUT = 0x00020000,
|
||||
|
||||
/** Instructs the converter to stop after output. */
|
||||
RUBY_ECONV_AFTER_OUTPUT = 0x00040000,
|
||||
#define ECONV_PARTIAL_INPUT RUBY_ECONV_PARTIAL_INPUT /**< @old{RUBY_ECONV_PARTIAL_INPUT} */
|
||||
#define ECONV_AFTER_OUTPUT RUBY_ECONV_AFTER_OUTPUT /**< @old{RUBY_ECONV_AFTER_OUTPUT} */
|
||||
|
||||
RUBY_ECONV_FLAGS_PLACEHOLDER /**< Placeholder (not used) */
|
||||
};
|
||||
|
||||
RBIMPL_SYMBOL_EXPORT_END()
|
||||
|
||||
#endif /* RUBY_INTERNAL_ENCODING_TRANSCODE_H */
|
||||
Reference in New Issue
Block a user