git.seregonwar.com

crates/strato-ui-core/src/text/mod.rs

StratoSDK / crates / strato-ui-core / src / text / mod.rs

1	use crate::text_offsets::{ByteOffset, CharCounter, CharOffset};
2	use anyhow::{anyhow, Result};
3	use itertools::Itertools;
4
5	use crate::event::ModifiersState;
6
7	use self::point::Point;
8
9	use self::word_boundaries::WordBoundaries;
10
11	pub mod header;
12	pub mod point;
13	pub mod word_boundaries;
14	pub mod words;
15
16	pub use header::BlockHeaderSize;
17
18	#[derive(Debug, Default, Copy, Clone, PartialEq, Eq)]
19	pub enum SelectionType {
20	#[default]
21	Simple,
22	Semantic,
23	Lines,
24	Rect,
25	}
26
27	impl SelectionType {
28	pub fn from_click_count(click_count: u32) -> Self {
29	match click_count {
30	0 => SelectionType::Simple,
31	1 => SelectionType::Simple,
32	2 => SelectionType::Semantic,
33	3 => SelectionType::Lines,
34	_ => SelectionType::Lines,
35	}
36	}
37
38	pub fn from_mouse_event(modifiers: ModifiersState, click_count: u32) -> Self {
39	let is_rect = if cfg!(target_os = "macos") {
40	modifiers.cmd && modifiers.alt
41	} else {
42	modifiers.ctrl && modifiers.alt
43	};
44
45	if is_rect {
46	return SelectionType::Rect;
47	}
48
49	SelectionType::from_click_count(click_count)
50	}
51	}
52
53	impl From<SelectionType> for IsRect {
54	fn from(selection_type: SelectionType) -> Self {
55	match selection_type {
56	SelectionType::Rect => IsRect::True,
57	_ => IsRect::False,
58	}
59	}
60	}
61
62	#[derive(Debug, Clone, Copy, Eq, PartialEq, Default)]
63	pub enum IsRect {
64	True,
65	#[default]
66	False,
67	}
68
69	#[derive(Copy, Clone, Debug, Default)]
70	pub enum SelectionDirection {
71	#[default]
72	Forward,
73	Backward,
74	}
75
76	/// A buffer of text characters. This trait acts as a base layer to implement text segmentation
77	/// on top of. Currently, it supports word navigation.
78	pub trait TextBuffer {
79	type Chars<'a>: Iterator<Item = char> + 'a
80	where
81	Self: 'a;
82
83	type CharsReverse<'a>: Iterator<Item = char> + 'a
84	where
85	Self: 'a;
86
87	/// Begin iterating over the characters at `offset`, continuing to the end of the buffer.
88	///
89	/// The maximum valid `offset` is the length of the buffer (not 1 less than the length). This
90	/// allows starting just after the last character.
91	fn chars_at(&self, offset: CharOffset) -> Result<Self::Chars<'_>>;
92
93	/// Begin iterating backwards over the characters from `offset` to the start of the buffer.
94	///
95	/// Note that this is _different_ from the semantics of `Iterator::rev`, which would instead
96	/// start at the very end of the buffer.
97	///
98	/// The maximum valid `offset` is the length of the buffer (not 1 less than the length). This
99	/// allows starting just after the last character.
100	fn chars_rev_at(&self, offset: CharOffset) -> Result<Self::CharsReverse<'_>>;
101
102	/// Converts a character offset to a buffer [`Point`], if it is in bounds.
103	fn to_point(&self, offset: CharOffset) -> Result<Point>;
104
105	/// Convert a point to its offset within the buffer.
106	fn to_offset(&self, point: Point) -> Result<CharOffset>;
107
108	/// Get an iterator of word starting points forward from the given offset
109	fn word_starts_from_offset<T: BufferIndex>(
110	&self,
111	position: T,
112	) -> Result<WordBoundaries<'_, Self>> {
113	let offset = position.to_char_offset(self)?;
114	Ok(WordBoundaries::forward_starts(
115	offset,
116	self.chars_at(offset)?,
117	self,
118	))
119	}
120
121	/// Get an iterator of word ending points forward from the given offset, excluding the current
122	/// location if it is a word boundary.
123	///
124	/// Example: For a buffer of "word one two three", with an offset of `4` (immediately after
125	/// the 'word'), this will yield columns [8, 12, 18], the ends of `one`, `two`, and `three`,
126	/// but _excluding_ the initial position at the end of `word`.
127	fn word_ends_from_offset_exclusive<T: BufferIndex>(
128	&self,
129	position: T,
130	) -> Result<WordBoundaries<'_, Self>> {
131	let offset = position.to_char_offset(self)?;
132	Ok(WordBoundaries::forward_ends_exclusive(
133	offset,
134	self.chars_at(offset)?,
135	self,
136	))
137	}
138
139	/// Get an iterator of word ending points forward from the given offset, including the current
140	/// location if appropriate.
141	///
142	/// Example: For a buffer of "word one two three", with an offset of `4` (immediately after
143	/// the 'word'), this will yield columns [4, 8, 12, 18], the ends of all four words,
144	/// _including_ the initial position at the end of `word`.
145	fn word_ends_from_offset_inclusive<T: BufferIndex>(
146	&self,
147	position: T,
148	) -> Result<WordBoundaries<'_, Self>> {
149	let offset = position.to_char_offset(self)?;
150	Ok(WordBoundaries::forward_ends_inclusive(
151	offset,
152	self.chars_at(offset)?,
153	self,
154	))
155	}
156
157	/// Get an iterator of word starting points backwards from the given offset, excluding the
158	/// current location if it is a word boundary.
159	///
160	/// Example: For a buffer of "word one two three", with an offset of `13` (immediately before
161	/// the 'three'), this will yield columns [9, 5, 0], the starts of `two`, `one`, and `word`,
162	/// but _excluding_ the initial position at the start of `three`.
163	fn word_starts_backward_from_offset_exclusive<T: BufferIndex>(
164	&self,
165	position: T,
166	) -> Result<WordBoundaries<'_, Self>> {
167	let offset = position.to_char_offset(self)?;
168	Ok(WordBoundaries::backward_starts_exclusive(
169	offset,
170	self.chars_rev_at(offset)?,
171	self,
172	))
173	}
174
175	/// Get an iterator of word starting points backwards from the given offset, including the
176	/// current location if appropriate.
177	///
178	/// Example: For a buffer of "word one two three", with an offset of `13` (immediately before
179	/// the 'three'), this will yield columns [13, 9, 5, 0], the starts of all four words,
180	/// _including_ the initial position at the start of `three`.
181	fn word_starts_backward_from_offset_inclusive<T: BufferIndex>(
182	&self,
183	position: T,
184	) -> Result<WordBoundaries<'_, Self>> {
185	let offset = position.to_char_offset(self)?;
186	Ok(WordBoundaries::backward_starts_inclusive(
187	offset,
188	self.chars_rev_at(offset)?,
189	self,
190	))
191	}
192	}
193
194	/// A type which can index into a text buffer.
195	pub trait BufferIndex {
196	fn to_char_offset<B: TextBuffer + ?Sized>(&self, buffer: &B) -> Result<CharOffset>;
197	}
198
199	impl BufferIndex for CharOffset {
200	fn to_char_offset<B: TextBuffer + ?Sized>(&self, _: &B) -> Result<CharOffset> {
201	Ok(*self)
202	}
203	}
204
205	impl BufferIndex for Point {
206	fn to_char_offset<B: TextBuffer + ?Sized>(&self, buffer: &B) -> Result<CharOffset> {
207	buffer.to_offset(*self)
208	}
209	}
210
211	impl TextBuffer for str {
212	type Chars<'a> = std::str::Chars<'a>;
213	type CharsReverse<'a> = std::iter::Rev<std::str::Chars<'a>>;
214
215	fn chars_at(&self, offset: CharOffset) -> Result<Self::Chars<'_>> {
216	let chars = self.chars().count();
217	if offset.as_usize() <= chars {
218	Ok(self.chars().dropping(offset.as_usize()))
219	} else {
220	Err(anyhow!(
221	"Offset {offset} out of bounds; char length is {chars}"
222	))
223	}
224	}
225
226	fn chars_rev_at(&self, offset: CharOffset) -> Result<Self::CharsReverse<'_>> {
227	let chars = self.chars().count();
228	if offset.as_usize() <= chars {
229	Ok(self.chars().rev().dropping(chars - offset.as_usize()))
230	} else {
231	Err(anyhow!(
232	"Offset {offset} out of bounds; char length is {chars}"
233	))
234	}
235	}
236
237	fn to_point(&self, offset: CharOffset) -> Result<Point> {
238	let chars = self.chars().count();
239	if offset.as_usize() <= chars {
240	Ok(Point::new(0, offset.as_usize() as u32))
241	} else {
242	Err(anyhow!(
243	"Offset {offset} out of bounds; char length is {chars}"
244	))
245	}
246	}
247
248	fn to_offset(&self, point: Point) -> Result<CharOffset> {
249	if point.row == 0 {
250	let chars = self.chars().count();
251	if (point.column as usize) <= chars {
252	Ok(CharOffset::from(point.column as usize))
253	} else {
254	Err(anyhow!(
255	"Column {} out of bounds; char length is {chars}",
256	point.column
257	))
258	}
259	} else {
260	Err(anyhow!(
261	"Row {} out of bounds; str only has 1 row",
262	point.row
263	))
264	}
265	}
266	}
267
268	/// Convert a slice of text into a `Vec` of UTF-8 bytes.
269	pub fn str_to_byte_vec(text: &str) -> Vec<u8> {
270	text.as_bytes().iter().cloned().collect_vec()
271	}
272
273	/// Slice a string by [`char`] offsets, rather than byte offsets.
274	///
275	/// The starting index is inclusive, while the ending index is exclusive.
276	pub fn char_slice(s: &str, start: usize, end: usize) -> Option<&str> {
277	if end < start {
278	return None;
279	}
280
281	if start == end {
282	return Some("");
283	}
284
285	let mut indices = s.char_indices();
286	let (start_index, _) = indices.nth(start)?;
287	// Why not just use `nth()` again? We need to distinguish between a `None` because `end`
288	// is out of bounds and a `None` because `end` is the end of the string.
289	// If/when Iterator::advance_by (https://github.com/rust-lang/rust/issues/77404) stabilizes,
290	// we should use that. In the meantime, this doesn't hurt performance because `nth()`
291	// also has to advance character-by-character.
292	for _ in start + 1..end {
293	indices.next()?;
294	}
295
296	let end_index = match indices.next() {
297	Some((index, _)) => index,
298	None => s.len(),
299	};
300
301	s.get(start_index..end_index)
302	}
303
304	pub fn count_chars_up_to_byte(text: &str, byte_offset: ByteOffset) -> Option<CharOffset> {
305	if byte_offset.as_usize() == text.len() {
306	return Some(CharOffset::from(text.chars().count()));
307	}
308	let mut counter = CharCounter::new(text);
309	counter.char_offset(byte_offset)
310	}
311
312	#[cfg(test)]
313	#[path = "mod_tests.rs"]
314	mod tests;
315

Seregon/StratoSDK