StratoSDK is a framework with a declarative approach similar to Flutter/React, written and designed entirely for Rust.
| 1 | use std::iter::Peekable; |
| 2 | use std::{borrow::Cow, collections::HashSet}; |
| 3 | |
| 4 | use crate::text_offsets::CharOffset; |
| 5 | use itertools::Either; |
| 6 | |
| 7 | use super::point::Point; |
| 8 | |
| 9 | use super::words::is_default_word_boundary; |
| 10 | use super::TextBuffer; |
| 11 | |
| 12 | /// This enum configures how the WordBoundaries iterator defines a "word" |
| 13 | #[derive(Clone, Debug)] |
| 14 | pub enum WordBoundariesPolicy { |
| 15 | /// Break words on spaces and the characters specified in words::is_default_word_boundary |
| 16 | Default, |
| 17 | /// Break words on spaces plus a specific set of provided characters |
| 18 | Custom(HashSet<char>), |
| 19 | /// Break words only on ASCII whitespace |
| 20 | OnlyWhitespace, |
| 21 | } |
| 22 | |
| 23 | #[derive(Clone, Copy)] |
| 24 | pub enum WordBoundariesApproach { |
| 25 | ForwardWordStarts, |
| 26 | ForwardWordEnds, |
| 27 | BackwardWordStarts, |
| 28 | } |
| 29 | |
| 30 | /// Iterator that returns the edges of words from a given offset, based on the selected approach |
| 31 | pub struct WordBoundaries<'a, T: TextBuffer + ?Sized> { |
| 32 | offset: CharOffset, |
| 33 | chars: Peekable<Either<T::Chars<'a>, T::CharsReverse<'a>>>, |
| 34 | buffer: &'a T, |
| 35 | in_word: bool, |
| 36 | approach: WordBoundariesApproach, |
| 37 | policy: Cow<'a, WordBoundariesPolicy>, |
| 38 | done: bool, |
| 39 | } |
| 40 | |
| 41 | impl<'a, T: TextBuffer + ?Sized> WordBoundaries<'a, T> { |
| 42 | pub fn with_policy(mut self, policy: impl Into<Cow<'a, WordBoundariesPolicy>>) -> Self { |
| 43 | self.policy = policy.into(); |
| 44 | self |
| 45 | } |
| 46 | |
| 47 | /// Create an iterator that will return the starts of words moving forwards |
| 48 | pub fn forward_starts(offset: CharOffset, chars: T::Chars<'a>, buffer: &'a T) -> Self { |
| 49 | Self { |
| 50 | offset, |
| 51 | buffer, |
| 52 | chars: Either::Left(chars).peekable(), |
| 53 | in_word: true, |
| 54 | approach: WordBoundariesApproach::ForwardWordStarts, |
| 55 | policy: Cow::Owned(WordBoundariesPolicy::Default), |
| 56 | done: false, |
| 57 | } |
| 58 | } |
| 59 | |
| 60 | /// Create an iterator that will return the ends of words moving forwards, exclusive of the |
| 61 | /// offset position. |
| 62 | /// |
| 63 | /// Example: For a buffer of "word one two three", with an offset of `4` (immediately after |
| 64 | /// the 'word'), this will yield columns [8, 12, 18], the ends of `one`, `two`, and `three`, |
| 65 | /// but _excluding_ the initial position at the end of `word`. |
| 66 | pub fn forward_ends_exclusive(offset: CharOffset, chars: T::Chars<'a>, buffer: &'a T) -> Self { |
| 67 | Self { |
| 68 | offset, |
| 69 | buffer, |
| 70 | chars: Either::Left(chars).peekable(), |
| 71 | in_word: false, |
| 72 | approach: WordBoundariesApproach::ForwardWordEnds, |
| 73 | policy: Cow::Owned(WordBoundariesPolicy::Default), |
| 74 | done: false, |
| 75 | } |
| 76 | } |
| 77 | |
| 78 | /// Create an iterator that will return the ends of words moving forwards, inclusive of the |
| 79 | /// offset position. |
| 80 | /// |
| 81 | /// Example: For a buffer of "word one two three", with an offset of `4` (immediately after |
| 82 | /// the 'word'), this will yield columns [4, 8, 12, 18], the ends of all four words, |
| 83 | /// _including_ the initial position at the end of `word`. |
| 84 | pub fn forward_ends_inclusive(offset: CharOffset, chars: T::Chars<'a>, buffer: &'a T) -> Self { |
| 85 | Self { |
| 86 | offset, |
| 87 | buffer, |
| 88 | chars: Either::Left(chars).peekable(), |
| 89 | in_word: true, |
| 90 | approach: WordBoundariesApproach::ForwardWordEnds, |
| 91 | policy: Cow::Owned(WordBoundariesPolicy::Default), |
| 92 | done: false, |
| 93 | } |
| 94 | } |
| 95 | |
| 96 | /// Create an iterator that will return the starts of words moving _backwards_, exclusive of |
| 97 | /// the offset position |
| 98 | /// |
| 99 | /// Example: For a buffer of "word one two three", with an offset of `13` (immediately before |
| 100 | /// the 'three'), this will yield columns [9, 5, 0], the starts of `two`, `one`, and `word`, |
| 101 | /// but _excluding_ the initial position at the start of `three`. |
| 102 | pub fn backward_starts_exclusive( |
| 103 | offset: CharOffset, |
| 104 | chars: T::CharsReverse<'a>, |
| 105 | buffer: &'a T, |
| 106 | ) -> Self { |
| 107 | Self { |
| 108 | offset, |
| 109 | buffer, |
| 110 | chars: Either::Right(chars).peekable(), |
| 111 | in_word: false, |
| 112 | approach: WordBoundariesApproach::BackwardWordStarts, |
| 113 | policy: Cow::Owned(WordBoundariesPolicy::Default), |
| 114 | done: false, |
| 115 | } |
| 116 | } |
| 117 | |
| 118 | /// Create an iterator that will return the starts of words moving _backwards_, inclusive of |
| 119 | /// the offset position |
| 120 | /// |
| 121 | /// Example: For a buffer of "word one two three", with an offset of `13` (immediately before |
| 122 | /// the 'three'), this will yield columns [13, 9, 5, 0], the starts of all four words, |
| 123 | /// _including_ the initial position at the start of `three`. |
| 124 | pub fn backward_starts_inclusive( |
| 125 | offset: CharOffset, |
| 126 | chars: T::CharsReverse<'a>, |
| 127 | buffer: &'a T, |
| 128 | ) -> Self { |
| 129 | Self { |
| 130 | offset, |
| 131 | buffer, |
| 132 | chars: Either::Right(chars).peekable(), |
| 133 | in_word: true, |
| 134 | approach: WordBoundariesApproach::BackwardWordStarts, |
| 135 | policy: Cow::Owned(WordBoundariesPolicy::Default), |
| 136 | done: false, |
| 137 | } |
| 138 | } |
| 139 | |
| 140 | fn step(&mut self) { |
| 141 | self.chars.next(); |
| 142 | match self.approach { |
| 143 | WordBoundariesApproach::ForwardWordStarts | WordBoundariesApproach::ForwardWordEnds => { |
| 144 | self.offset += 1; |
| 145 | } |
| 146 | WordBoundariesApproach::BackwardWordStarts => { |
| 147 | self.offset -= 1; |
| 148 | } |
| 149 | } |
| 150 | } |
| 151 | |
| 152 | fn is_word_boundary(&self, c: char) -> bool { |
| 153 | match self.policy.as_ref() { |
| 154 | WordBoundariesPolicy::Default => is_default_word_boundary(c), |
| 155 | WordBoundariesPolicy::Custom(boundary_chars) => { |
| 156 | c.is_whitespace() || boundary_chars.contains(&c) |
| 157 | } |
| 158 | WordBoundariesPolicy::OnlyWhitespace => c.is_whitespace(), |
| 159 | } |
| 160 | } |
| 161 | } |
| 162 | |
| 163 | impl<T: TextBuffer + ?Sized> Iterator for WordBoundaries<'_, T> { |
| 164 | type Item = Point; |
| 165 | |
| 166 | fn next(&mut self) -> Option<Self::Item> { |
| 167 | while let Some(&c) = self.chars.peek() { |
| 168 | match self.approach { |
| 169 | // For forward word starts, we look for the transition from not in a word (i.e. in |
| 170 | // a separator) to in a word. That boundary is the start of a new word |
| 171 | WordBoundariesApproach::ForwardWordStarts => { |
| 172 | if self.in_word { |
| 173 | self.step(); |
| 174 | |
| 175 | if self.is_word_boundary(c) { |
| 176 | self.in_word = false; |
| 177 | } |
| 178 | } else if self.is_word_boundary(c) { |
| 179 | self.step(); |
| 180 | } else { |
| 181 | // We are not in a word, but the next character _is_ in a word, so |
| 182 | // we've found the start of the next word. We mark ourselves as being |
| 183 | // in a word (for the next iteration), then return the point. |
| 184 | self.in_word = true; |
| 185 | return self.buffer.to_point(self.offset).ok(); |
| 186 | } |
| 187 | } |
| 188 | // For forward word ends, we look for the transition from in a word to not in a |
| 189 | // word. That boundary is the end of the current word. We also look for the same |
| 190 | // boundary for backward starts, since going backwards the transition from in a |
| 191 | // word to not in a word represents the _beginning_ of the current word |
| 192 | WordBoundariesApproach::ForwardWordEnds |
| 193 | | WordBoundariesApproach::BackwardWordStarts => { |
| 194 | if self.in_word { |
| 195 | if self.is_word_boundary(c) { |
| 196 | // We are in a word, but the next character is _not_ in a word, so we |
| 197 | // have found the boundary. We mark ourselves as not being in a word, |
| 198 | // then return the point. |
| 199 | self.in_word = false; |
| 200 | return self.buffer.to_point(self.offset).ok(); |
| 201 | } else { |
| 202 | self.step(); |
| 203 | } |
| 204 | } else { |
| 205 | self.step(); |
| 206 | |
| 207 | if !self.is_word_boundary(c) { |
| 208 | self.in_word = true; |
| 209 | } |
| 210 | } |
| 211 | } |
| 212 | } |
| 213 | } |
| 214 | |
| 215 | // We have consumed all of the characters in the given direction. However, we should also |
| 216 | // treat the end (or beginning if backward) of the buffer as a word boundary. We only want |
| 217 | // to return that once, however, so we mark ourselves as done afterwards. |
| 218 | if self.done { |
| 219 | None |
| 220 | } else { |
| 221 | self.done = true; |
| 222 | |
| 223 | self.buffer.to_point(self.offset).ok() |
| 224 | } |
| 225 | } |
| 226 | } |
| 227 | |
| 228 | impl From<WordBoundariesPolicy> for Cow<'_, WordBoundariesPolicy> { |
| 229 | fn from(policy: WordBoundariesPolicy) -> Self { |
| 230 | Cow::Owned(policy) |
| 231 | } |
| 232 | } |
| 233 | |
| 234 | impl<'a> From<&'a WordBoundariesPolicy> for Cow<'a, WordBoundariesPolicy> { |
| 235 | fn from(policy: &'a WordBoundariesPolicy) -> Self { |
| 236 | Cow::Borrowed(policy) |
| 237 | } |
| 238 | } |
| 239 | |
| 240 | #[cfg(test)] |
| 241 | #[path = "word_boundaries_tests.rs"] |
| 242 | mod tests; |
| 243 |