Open
Description
The grapheme boundaries of "๐ท๐ธ๐ฎ๐ด" should be 8 and 16, but by feeding GraphemeCursor
the individual RIS codepoints I get 8 and 12. Am I using the API incorrectly or is this a bug?
use unicode_segmentation::{GraphemeCursor, GraphemeIncomplete};
fn main() {
let s = "๐ท๐ธ๐ฎ๐ด";
let mut cursor = GraphemeCursor::new(0, s.len(), true);
// ๐ท๐ธ
match cursor.next_boundary("๐ท", 0) {
Err(GraphemeIncomplete::NextChunk) => {}
_ => unreachable!(),
}
match cursor.next_boundary("๐ธ", 4) {
Err(GraphemeIncomplete::PreContext(4)) => {
cursor.provide_context("๐ท", 0);
}
_ => unreachable!(),
}
match cursor.next_boundary("๐ธ", 4) {
Err(GraphemeIncomplete::NextChunk) => {}
_ => unreachable!(),
}
match cursor.next_boundary("๐ฎ", 8) {
Err(GraphemeIncomplete::PreContext(8)) => {
cursor.provide_context("๐ธ", 4);
}
_ => unreachable!(),
}
match cursor.next_boundary("๐ฎ", 8) {
Err(GraphemeIncomplete::PreContext(4)) => {
cursor.provide_context("๐ท", 0);
}
_ => unreachable!(),
}
match cursor.next_boundary("๐ฎ", 8) {
Ok(Some(8)) => {}
_ => unreachable!(),
}
// ๐ฎ๐ด
match cursor.next_boundary("๐ฎ", 8) {
Err(GraphemeIncomplete::NextChunk) => {}
_ => unreachable!(),
}
match cursor.next_boundary("๐ด", 12) {
Err(GraphemeIncomplete::PreContext(12)) => {
cursor.provide_context("๐ฎ", 8);
}
_ => unreachable!(),
}
match cursor.next_boundary("๐ด", 12) {
Err(GraphemeIncomplete::PreContext(8)) => {
cursor.provide_context("๐ธ", 4);
}
_ => unreachable!(),
}
match cursor.next_boundary("๐ด", 12) {
Err(GraphemeIncomplete::PreContext(4)) => {
cursor.provide_context("๐ท", 0);
}
_ => unreachable!(),
}
match cursor.next_boundary("๐ด", 12) {
Ok(Some(16)) => {}
Ok(Some(12)) => panic!("this should be 16"),
_ => unreachable!(),
}
}
Metadata
Metadata
Assignees
Labels
No labels