Seregon/StratoSDK

StratoSDK is a framework with a declarative approach similar to Flutter/React, written and designed entirely for Rust.

Rust/27.3 KB/No license
crates/strato-ui-core/src/text/word_boundaries.rs
StratoSDK / crates / strato-ui-core / src / text / word_boundaries.rs
1use std::iter::Peekable;
2use std::{borrow::Cow, collections::HashSet};
3 
4use crate::text_offsets::CharOffset;
5use itertools::Either;
6 
7use super::point::Point;
8 
9use super::words::is_default_word_boundary;
10use super::TextBuffer;
11 
12/// This enum configures how the WordBoundaries iterator defines a "word"
13#[derive(Clone, Debug)]
14pub enum WordBoundariesPolicy {
15 /// Break words on spaces and the characters specified in words::is_default_word_boundary
16 Default,
17 /// Break words on spaces plus a specific set of provided characters
18 Custom(HashSet<char>),
19 /// Break words only on ASCII whitespace
20 OnlyWhitespace,
21}
22 
23#[derive(Clone, Copy)]
24pub enum WordBoundariesApproach {
25 ForwardWordStarts,
26 ForwardWordEnds,
27 BackwardWordStarts,
28}
29 
30/// Iterator that returns the edges of words from a given offset, based on the selected approach
31pub struct WordBoundaries<'a, T: TextBuffer + ?Sized> {
32 offset: CharOffset,
33 chars: Peekable<Either<T::Chars<'a>, T::CharsReverse<'a>>>,
34 buffer: &'a T,
35 in_word: bool,
36 approach: WordBoundariesApproach,
37 policy: Cow<'a, WordBoundariesPolicy>,
38 done: bool,
39}
40 
41impl<'a, T: TextBuffer + ?Sized> WordBoundaries<'a, T> {
42 pub fn with_policy(mut self, policy: impl Into<Cow<'a, WordBoundariesPolicy>>) -> Self {
43 self.policy = policy.into();
44 self
45 }
46 
47 /// Create an iterator that will return the starts of words moving forwards
48 pub fn forward_starts(offset: CharOffset, chars: T::Chars<'a>, buffer: &'a T) -> Self {
49 Self {
50 offset,
51 buffer,
52 chars: Either::Left(chars).peekable(),
53 in_word: true,
54 approach: WordBoundariesApproach::ForwardWordStarts,
55 policy: Cow::Owned(WordBoundariesPolicy::Default),
56 done: false,
57 }
58 }
59 
60 /// Create an iterator that will return the ends of words moving forwards, exclusive of the
61 /// offset position.
62 ///
63 /// Example: For a buffer of "word one two three", with an offset of `4` (immediately after
64 /// the 'word'), this will yield columns [8, 12, 18], the ends of `one`, `two`, and `three`,
65 /// but _excluding_ the initial position at the end of `word`.
66 pub fn forward_ends_exclusive(offset: CharOffset, chars: T::Chars<'a>, buffer: &'a T) -> Self {
67 Self {
68 offset,
69 buffer,
70 chars: Either::Left(chars).peekable(),
71 in_word: false,
72 approach: WordBoundariesApproach::ForwardWordEnds,
73 policy: Cow::Owned(WordBoundariesPolicy::Default),
74 done: false,
75 }
76 }
77 
78 /// Create an iterator that will return the ends of words moving forwards, inclusive of the
79 /// offset position.
80 ///
81 /// Example: For a buffer of "word one two three", with an offset of `4` (immediately after
82 /// the 'word'), this will yield columns [4, 8, 12, 18], the ends of all four words,
83 /// _including_ the initial position at the end of `word`.
84 pub fn forward_ends_inclusive(offset: CharOffset, chars: T::Chars<'a>, buffer: &'a T) -> Self {
85 Self {
86 offset,
87 buffer,
88 chars: Either::Left(chars).peekable(),
89 in_word: true,
90 approach: WordBoundariesApproach::ForwardWordEnds,
91 policy: Cow::Owned(WordBoundariesPolicy::Default),
92 done: false,
93 }
94 }
95 
96 /// Create an iterator that will return the starts of words moving _backwards_, exclusive of
97 /// the offset position
98 ///
99 /// Example: For a buffer of "word one two three", with an offset of `13` (immediately before
100 /// the 'three'), this will yield columns [9, 5, 0], the starts of `two`, `one`, and `word`,
101 /// but _excluding_ the initial position at the start of `three`.
102 pub fn backward_starts_exclusive(
103 offset: CharOffset,
104 chars: T::CharsReverse<'a>,
105 buffer: &'a T,
106 ) -> Self {
107 Self {
108 offset,
109 buffer,
110 chars: Either::Right(chars).peekable(),
111 in_word: false,
112 approach: WordBoundariesApproach::BackwardWordStarts,
113 policy: Cow::Owned(WordBoundariesPolicy::Default),
114 done: false,
115 }
116 }
117 
118 /// Create an iterator that will return the starts of words moving _backwards_, inclusive of
119 /// the offset position
120 ///
121 /// Example: For a buffer of "word one two three", with an offset of `13` (immediately before
122 /// the 'three'), this will yield columns [13, 9, 5, 0], the starts of all four words,
123 /// _including_ the initial position at the start of `three`.
124 pub fn backward_starts_inclusive(
125 offset: CharOffset,
126 chars: T::CharsReverse<'a>,
127 buffer: &'a T,
128 ) -> Self {
129 Self {
130 offset,
131 buffer,
132 chars: Either::Right(chars).peekable(),
133 in_word: true,
134 approach: WordBoundariesApproach::BackwardWordStarts,
135 policy: Cow::Owned(WordBoundariesPolicy::Default),
136 done: false,
137 }
138 }
139 
140 fn step(&mut self) {
141 self.chars.next();
142 match self.approach {
143 WordBoundariesApproach::ForwardWordStarts | WordBoundariesApproach::ForwardWordEnds => {
144 self.offset += 1;
145 }
146 WordBoundariesApproach::BackwardWordStarts => {
147 self.offset -= 1;
148 }
149 }
150 }
151 
152 fn is_word_boundary(&self, c: char) -> bool {
153 match self.policy.as_ref() {
154 WordBoundariesPolicy::Default => is_default_word_boundary(c),
155 WordBoundariesPolicy::Custom(boundary_chars) => {
156 c.is_whitespace() || boundary_chars.contains(&c)
157 }
158 WordBoundariesPolicy::OnlyWhitespace => c.is_whitespace(),
159 }
160 }
161}
162 
163impl<T: TextBuffer + ?Sized> Iterator for WordBoundaries<'_, T> {
164 type Item = Point;
165 
166 fn next(&mut self) -> Option<Self::Item> {
167 while let Some(&c) = self.chars.peek() {
168 match self.approach {
169 // For forward word starts, we look for the transition from not in a word (i.e. in
170 // a separator) to in a word. That boundary is the start of a new word
171 WordBoundariesApproach::ForwardWordStarts => {
172 if self.in_word {
173 self.step();
174 
175 if self.is_word_boundary(c) {
176 self.in_word = false;
177 }
178 } else if self.is_word_boundary(c) {
179 self.step();
180 } else {
181 // We are not in a word, but the next character _is_ in a word, so
182 // we've found the start of the next word. We mark ourselves as being
183 // in a word (for the next iteration), then return the point.
184 self.in_word = true;
185 return self.buffer.to_point(self.offset).ok();
186 }
187 }
188 // For forward word ends, we look for the transition from in a word to not in a
189 // word. That boundary is the end of the current word. We also look for the same
190 // boundary for backward starts, since going backwards the transition from in a
191 // word to not in a word represents the _beginning_ of the current word
192 WordBoundariesApproach::ForwardWordEnds
193 | WordBoundariesApproach::BackwardWordStarts => {
194 if self.in_word {
195 if self.is_word_boundary(c) {
196 // We are in a word, but the next character is _not_ in a word, so we
197 // have found the boundary. We mark ourselves as not being in a word,
198 // then return the point.
199 self.in_word = false;
200 return self.buffer.to_point(self.offset).ok();
201 } else {
202 self.step();
203 }
204 } else {
205 self.step();
206 
207 if !self.is_word_boundary(c) {
208 self.in_word = true;
209 }
210 }
211 }
212 }
213 }
214 
215 // We have consumed all of the characters in the given direction. However, we should also
216 // treat the end (or beginning if backward) of the buffer as a word boundary. We only want
217 // to return that once, however, so we mark ourselves as done afterwards.
218 if self.done {
219 None
220 } else {
221 self.done = true;
222 
223 self.buffer.to_point(self.offset).ok()
224 }
225 }
226}
227 
228impl From<WordBoundariesPolicy> for Cow<'_, WordBoundariesPolicy> {
229 fn from(policy: WordBoundariesPolicy) -> Self {
230 Cow::Owned(policy)
231 }
232}
233 
234impl<'a> From<&'a WordBoundariesPolicy> for Cow<'a, WordBoundariesPolicy> {
235 fn from(policy: &'a WordBoundariesPolicy) -> Self {
236 Cow::Borrowed(policy)
237 }
238}
239 
240#[cfg(test)]
241#[path = "word_boundaries_tests.rs"]
242mod tests;
243