git.seregonwar.com

crates/strato-ui-core/src/text/word_boundaries.rs

StratoSDK / crates / strato-ui-core / src / text / word_boundaries.rs

1	use std::iter::Peekable;
2	use std::{borrow::Cow, collections::HashSet};
3
4	use crate::text_offsets::CharOffset;
5	use itertools::Either;
6
7	use super::point::Point;
8
9	use super::words::is_default_word_boundary;
10	use super::TextBuffer;
11
12	/// This enum configures how the WordBoundaries iterator defines a "word"
13	#[derive(Clone, Debug)]
14	pub enum WordBoundariesPolicy {
15	/// Break words on spaces and the characters specified in words::is_default_word_boundary
16	Default,
17	/// Break words on spaces plus a specific set of provided characters
18	Custom(HashSet<char>),
19	/// Break words only on ASCII whitespace
20	OnlyWhitespace,
21	}
22
23	#[derive(Clone, Copy)]
24	pub enum WordBoundariesApproach {
25	ForwardWordStarts,
26	ForwardWordEnds,
27	BackwardWordStarts,
28	}
29
30	/// Iterator that returns the edges of words from a given offset, based on the selected approach
31	pub struct WordBoundaries<'a, T: TextBuffer + ?Sized> {
32	offset: CharOffset,
33	chars: Peekable<Either<T::Chars<'a>, T::CharsReverse<'a>>>,
34	buffer: &'a T,
35	in_word: bool,
36	approach: WordBoundariesApproach,
37	policy: Cow<'a, WordBoundariesPolicy>,
38	done: bool,
39	}
40
41	impl<'a, T: TextBuffer + ?Sized> WordBoundaries<'a, T> {
42	pub fn with_policy(mut self, policy: impl Into<Cow<'a, WordBoundariesPolicy>>) -> Self {
43	self.policy = policy.into();
44	self
45	}
46
47	/// Create an iterator that will return the starts of words moving forwards
48	pub fn forward_starts(offset: CharOffset, chars: T::Chars<'a>, buffer: &'a T) -> Self {
49	Self {
50	offset,
51	buffer,
52	chars: Either::Left(chars).peekable(),
53	in_word: true,
54	approach: WordBoundariesApproach::ForwardWordStarts,
55	policy: Cow::Owned(WordBoundariesPolicy::Default),
56	done: false,
57	}
58	}
59
60	/// Create an iterator that will return the ends of words moving forwards, exclusive of the
61	/// offset position.
62	///
63	/// Example: For a buffer of "word one two three", with an offset of `4` (immediately after
64	/// the 'word'), this will yield columns [8, 12, 18], the ends of `one`, `two`, and `three`,
65	/// but _excluding_ the initial position at the end of `word`.
66	pub fn forward_ends_exclusive(offset: CharOffset, chars: T::Chars<'a>, buffer: &'a T) -> Self {
67	Self {
68	offset,
69	buffer,
70	chars: Either::Left(chars).peekable(),
71	in_word: false,
72	approach: WordBoundariesApproach::ForwardWordEnds,
73	policy: Cow::Owned(WordBoundariesPolicy::Default),
74	done: false,
75	}
76	}
77
78	/// Create an iterator that will return the ends of words moving forwards, inclusive of the
79	/// offset position.
80	///
81	/// Example: For a buffer of "word one two three", with an offset of `4` (immediately after
82	/// the 'word'), this will yield columns [4, 8, 12, 18], the ends of all four words,
83	/// _including_ the initial position at the end of `word`.
84	pub fn forward_ends_inclusive(offset: CharOffset, chars: T::Chars<'a>, buffer: &'a T) -> Self {
85	Self {
86	offset,
87	buffer,
88	chars: Either::Left(chars).peekable(),
89	in_word: true,
90	approach: WordBoundariesApproach::ForwardWordEnds,
91	policy: Cow::Owned(WordBoundariesPolicy::Default),
92	done: false,
93	}
94	}
95
96	/// Create an iterator that will return the starts of words moving _backwards_, exclusive of
97	/// the offset position
98	///
99	/// Example: For a buffer of "word one two three", with an offset of `13` (immediately before
100	/// the 'three'), this will yield columns [9, 5, 0], the starts of `two`, `one`, and `word`,
101	/// but _excluding_ the initial position at the start of `three`.
102	pub fn backward_starts_exclusive(
103	offset: CharOffset,
104	chars: T::CharsReverse<'a>,
105	buffer: &'a T,
106	) -> Self {
107	Self {
108	offset,
109	buffer,
110	chars: Either::Right(chars).peekable(),
111	in_word: false,
112	approach: WordBoundariesApproach::BackwardWordStarts,
113	policy: Cow::Owned(WordBoundariesPolicy::Default),
114	done: false,
115	}
116	}
117
118	/// Create an iterator that will return the starts of words moving _backwards_, inclusive of
119	/// the offset position
120	///
121	/// Example: For a buffer of "word one two three", with an offset of `13` (immediately before
122	/// the 'three'), this will yield columns [13, 9, 5, 0], the starts of all four words,
123	/// _including_ the initial position at the start of `three`.
124	pub fn backward_starts_inclusive(
125	offset: CharOffset,
126	chars: T::CharsReverse<'a>,
127	buffer: &'a T,
128	) -> Self {
129	Self {
130	offset,
131	buffer,
132	chars: Either::Right(chars).peekable(),
133	in_word: true,
134	approach: WordBoundariesApproach::BackwardWordStarts,
135	policy: Cow::Owned(WordBoundariesPolicy::Default),
136	done: false,
137	}
138	}
139
140	fn step(&mut self) {
141	self.chars.next();
142	match self.approach {
143	WordBoundariesApproach::ForwardWordStarts \| WordBoundariesApproach::ForwardWordEnds => {
144	self.offset += 1;
145	}
146	WordBoundariesApproach::BackwardWordStarts => {
147	self.offset -= 1;
148	}
149	}
150	}
151
152	fn is_word_boundary(&self, c: char) -> bool {
153	match self.policy.as_ref() {
154	WordBoundariesPolicy::Default => is_default_word_boundary(c),
155	WordBoundariesPolicy::Custom(boundary_chars) => {
156	c.is_whitespace() \|\| boundary_chars.contains(&c)
157	}
158	WordBoundariesPolicy::OnlyWhitespace => c.is_whitespace(),
159	}
160	}
161	}
162
163	impl<T: TextBuffer + ?Sized> Iterator for WordBoundaries<'_, T> {
164	type Item = Point;
165
166	fn next(&mut self) -> Option<Self::Item> {
167	while let Some(&c) = self.chars.peek() {
168	match self.approach {
169	// For forward word starts, we look for the transition from not in a word (i.e. in
170	// a separator) to in a word. That boundary is the start of a new word
171	WordBoundariesApproach::ForwardWordStarts => {
172	if self.in_word {
173	self.step();
174
175	if self.is_word_boundary(c) {
176	self.in_word = false;
177	}
178	} else if self.is_word_boundary(c) {
179	self.step();
180	} else {
181	// We are not in a word, but the next character _is_ in a word, so
182	// we've found the start of the next word. We mark ourselves as being
183	// in a word (for the next iteration), then return the point.
184	self.in_word = true;
185	return self.buffer.to_point(self.offset).ok();
186	}
187	}
188	// For forward word ends, we look for the transition from in a word to not in a
189	// word. That boundary is the end of the current word. We also look for the same
190	// boundary for backward starts, since going backwards the transition from in a
191	// word to not in a word represents the _beginning_ of the current word
192	WordBoundariesApproach::ForwardWordEnds
193	\| WordBoundariesApproach::BackwardWordStarts => {
194	if self.in_word {
195	if self.is_word_boundary(c) {
196	// We are in a word, but the next character is _not_ in a word, so we
197	// have found the boundary. We mark ourselves as not being in a word,
198	// then return the point.
199	self.in_word = false;
200	return self.buffer.to_point(self.offset).ok();
201	} else {
202	self.step();
203	}
204	} else {
205	self.step();
206
207	if !self.is_word_boundary(c) {
208	self.in_word = true;
209	}
210	}
211	}
212	}
213	}
214
215	// We have consumed all of the characters in the given direction. However, we should also
216	// treat the end (or beginning if backward) of the buffer as a word boundary. We only want
217	// to return that once, however, so we mark ourselves as done afterwards.
218	if self.done {
219	None
220	} else {
221	self.done = true;
222
223	self.buffer.to_point(self.offset).ok()
224	}
225	}
226	}
227
228	impl From<WordBoundariesPolicy> for Cow<'_, WordBoundariesPolicy> {
229	fn from(policy: WordBoundariesPolicy) -> Self {
230	Cow::Owned(policy)
231	}
232	}
233
234	impl<'a> From<&'a WordBoundariesPolicy> for Cow<'a, WordBoundariesPolicy> {
235	fn from(policy: &'a WordBoundariesPolicy) -> Self {
236	Cow::Borrowed(policy)
237	}
238	}
239
240	#[cfg(test)]
241	#[path = "word_boundaries_tests.rs"]
242	mod tests;
243

Seregon/StratoSDK