Skip to main content

Mountain/ApplicationState/DTO/
DocumentStateDTO.rs

1//! # DocumentStateDTO
2//!
3//! # RESPONSIBILITY
4//! - Data transfer object for text document state
5//! - Serializable format for gRPC/IPC transmission
6//! - Used by Mountain to track document lifecycle and sync with Air
7//!
8//! # FIELDS
9//! - URI: Unique document resource identifier
10//! - LanguageIdentifier: Language ID for syntax highlighting
11//! - Version: Client-side version for change tracking
12//! - Lines: Document content split into lines
13//! - EOL: End-of-line sequence (\n or \r\n)
14//! - IsDirty: Indicates unsaved changes
15//! - Encoding: File encoding (e.g., utf8)
16//! - VersionIdentifier: Internal version for host tracking
17//!
18//! TODO (Mountain→Air Split): If Air implements a background document sync
19//! service, consider delegating delta change validation or conflict resolution
20//! to Air. For now, Mountain handles this synchronously to ensure UI
21//! responsiveness.
22
23use CommonLibrary::{Error::CommonError::CommonError, Utility::Serialization::URLSerializationHelper};
24use serde::{Deserialize, Serialize};
25use serde_json::Value;
26use url::Url;
27
28use crate::{ApplicationState::Internal::TextProcessing::AnalyzeTextLinesAndEOL::AnalyzeTextLinesAndEOL, dev_log};
29use super::{RPCModelContentChangeDTO::RPCModelContentChangeDTO, RPCRangeDTO::RPCRangeDTO};
30
31/// Maximum line count for a document to prevent memory exhaustion
32const MAX_DOCUMENT_LINES:usize = 1_000_000;
33
34/// Maximum line length to prevent line-based denial of service
35const MAX_LINE_LENGTH:usize = 100_000;
36
37/// Maximum language identifier string length
38const MAX_LANGUAGE_ID_LENGTH:usize = 128;
39
40/// Represents the complete in-memory state of a single text document.
41#[derive(Serialize, Deserialize, Clone, Debug)]
42#[serde(rename_all = "camelCase")]
43pub struct DocumentStateDTO {
44	/// The unique resource identifier for this document.
45	#[serde(rename = "uri", with = "URLSerializationHelper")]
46	pub URI:Url,
47
48	/// The VS Code language identifier (e.g., "rust", "typescript").
49	#[serde(skip_serializing_if = "String::is_empty")]
50	pub LanguageIdentifier:String,
51
52	/// The version number, incremented on each change from the client.
53	pub Version:i64,
54
55	/// The content of the document, split into lines.
56	pub Lines:Vec<String>,
57
58	/// The detected end-of-line sequence (e.g., `\n` or `\r\n`).
59	#[serde(rename = "eol")]
60	pub EOL:String,
61
62	/// A flag indicating if the in-memory version has unsaved changes.
63	pub IsDirty:bool,
64
65	/// The detected file encoding (e.g., "utf8").
66	pub Encoding:String,
67
68	/// An internal version number, used for tracking changes within the host.
69	pub VersionIdentifier:i64,
70}
71
72impl DocumentStateDTO {
73	/// Creates a new `DocumentStateDTO` from its initial content with
74	/// validation.
75	///
76	/// # Arguments
77	/// * `URI` - The document resource URI
78	/// * `LanguageIdentifier` - Optional language ID for syntax highlighting
79	/// * `Content` - The initial document content
80	///
81	/// # Returns
82	/// Result containing the DTO or an error if validation fails
83	///
84	/// # Errors
85	/// Returns `CommonError` if:
86	/// - Language identifier exceeds maximum length
87	/// - Document exceeds maximum line count
88	/// - Any line exceeds maximum length
89	/// - URI is empty
90	pub fn Create(URI:Url, LanguageIdentifier:Option<String>, Content:String) -> Result<Self, CommonError> {
91		// Validate URI is not empty
92		if URI.as_str().is_empty() {
93			return Err(CommonError::InvalidArgument {
94				ArgumentName:"URI".into(),
95				Reason:"URI cannot be empty".into(),
96			});
97		}
98
99		let LanguageID = LanguageIdentifier.unwrap_or_else(|| "plaintext".to_string());
100
101		// Validate language identifier length
102		if LanguageID.len() > MAX_LANGUAGE_ID_LENGTH {
103			return Err(CommonError::InvalidArgument {
104				ArgumentName:"LanguageIdentifier".into(),
105				Reason:format!("Language identifier exceeds maximum length of {} bytes", MAX_LANGUAGE_ID_LENGTH),
106			});
107		}
108
109		let (Lines, EOL) = AnalyzeTextLinesAndEOL(&Content);
110
111		// Validate document line count
112		if Lines.len() > MAX_DOCUMENT_LINES {
113			return Err(CommonError::InvalidArgument {
114				ArgumentName:"Content".into(),
115				Reason:format!("Document exceeds maximum line count of {}", MAX_DOCUMENT_LINES),
116			});
117		}
118
119		// Validate individual line lengths
120		for (Index, Line) in Lines.iter().enumerate() {
121			if Line.len() > MAX_LINE_LENGTH {
122				return Err(CommonError::InvalidArgument {
123					ArgumentName:"Content".into(),
124					Reason:format!("Line {} exceeds maximum length of {} bytes", Index + 1, MAX_LINE_LENGTH),
125				});
126			}
127		}
128
129		let Encoding = "utf8".to_string();
130
131		Ok(Self {
132			URI,
133
134			LanguageIdentifier:LanguageID,
135
136			Version:1,
137
138			Lines,
139
140			EOL,
141
142			IsDirty:false,
143
144			Encoding,
145
146			VersionIdentifier:1,
147		})
148	}
149
150	/// Creates a new `DocumentStateDTO` without validation for internal use.
151	/// This should only be called with trusted data sources.
152	pub fn CreateUnsafe(
153		URI:Url,
154
155		LanguageIdentifier:String,
156
157		Lines:Vec<String>,
158
159		EOL:String,
160
161		IsDirty:bool,
162
163		Encoding:String,
164
165		Version:i64,
166
167		VersionIdentifier:i64,
168	) -> Self {
169		Self {
170			URI,
171
172			LanguageIdentifier,
173
174			Version,
175
176			Lines,
177
178			EOL,
179
180			IsDirty,
181
182			Encoding,
183
184			VersionIdentifier,
185		}
186	}
187
188	/// Reconstructs the full text content of the document from its lines.
189	pub fn GetText(&self) -> String { self.Lines.join(&self.EOL) }
190
191	/// Converts the struct to a `serde_json::Value`, useful for notifications.
192	pub fn ToDTO(&self) -> Result<Value, CommonError> {
193		serde_json::to_value(self).map_err(|Error| CommonError::SerializationError { Description:Error.to_string() })
194	}
195
196	/// Applies a set of changes to the document. This can be a full text
197	/// replacement or a collection of delta changes.
198	pub fn ApplyChanges(&mut self, NewVersion:i64, ChangesValue:&Value) -> Result<(), CommonError> {
199		// Ignore stale changes.
200		if NewVersion <= self.Version {
201			return Ok(());
202		}
203
204		// Attempt to deserialize as an array of delta changes first.
205		if let Ok(RPCChange) = serde_json::from_value::<Vec<RPCModelContentChangeDTO>>(ChangesValue.clone()) {
206			dev_log!("model", "applying {} delta change(s) to document {}", RPCChange.len(), self.URI);
207
208			self.Lines = ApplyDeltaChanges(&self.Lines, &self.EOL, &RPCChange);
209		} else if let Some(FullText) = ChangesValue.as_str() {
210			// If it's not deltas, check if it's a full text replacement.
211			let (NewLines, NewEOL) = AnalyzeTextLinesAndEOL(FullText);
212
213			self.Lines = NewLines;
214
215			self.EOL = NewEOL;
216		} else {
217			return Err(CommonError::InvalidArgument {
218				ArgumentName:"ChangesValue".into(),
219
220				Reason:format!(
221					"Invalid change format for {}: expected string or RPCModelContentChangeDTO array.",
222					self.URI
223				),
224			});
225		}
226
227		// Update metadata after changes have been applied.
228		self.Version = NewVersion;
229
230		self.VersionIdentifier += 1;
231
232		self.IsDirty = true;
233
234		Ok(())
235	}
236}
237
238/// Applies delta changes to the document text and returns the updated lines.
239///
240/// This function:
241/// 1. Sorts changes in reverse order (by start position) to prevent offset
242///    corruption
243/// 2. Converts line/column positions to byte offsets in the full text
244/// 3. Applies each change (delete range + insert new text)
245/// 4. Splits the result back into lines
246///
247/// # Arguments
248/// * `Lines` - The current document lines
249/// * `EOL` - The end-of-line sequence to use
250/// * `RPCChange` - Array of changes to apply
251///
252/// # Returns
253/// Updated lines vector after applying all changes
254fn ApplyDeltaChanges(Lines:&[String], EOL:&str, RPCChange:&[RPCModelContentChangeDTO]) -> Vec<String> {
255	// Join lines into full text for offset-based manipulation
256	let mut ResultText = Lines.join(EOL);
257
258	// If no changes, return original lines
259	if RPCChange.is_empty() {
260		return Lines.to_vec();
261	}
262
263	// Sort changes in reverse order of position to prevent offset corruption
264	// When applying multiple changes, earlier changes shift positions for later
265	// changes. By applying from end to beginning, all offsets remain valid.
266	let mut SortedChanges:Vec<&RPCModelContentChangeDTO> = RPCChange.iter().collect();
267
268	SortedChanges.sort_by(|a, b| CMP_Range_Position(&b.Range, &a.Range));
269
270	// Apply each change to the text
271	for Change in SortedChanges {
272		// Convert (line, column) to byte offset
273		let StartOffset = PositionToOffset(&ResultText, EOL, &Change.Range.StartLineNumber, &Change.Range.StartColumn);
274
275		let EndOffset = PositionToOffset(&ResultText, EOL, &Change.Range.EndLineNumber, &Change.Range.EndColumn);
276
277		// Validate offsets
278		if StartOffset > EndOffset {
279			dev_log!(
280				"model",
281				"error: invalid range: start ({}) > end ({}) for text length {}",
282				StartOffset,
283				EndOffset,
284				ResultText.len()
285			);
286
287			continue;
288		}
289
290		let TextLength = ResultText.len();
291
292		if StartOffset > TextLength || EndOffset > TextLength {
293			dev_log!(
294				"model",
295				"error: out of bounds: start ({}) or end ({}) exceeds text length {}",
296				StartOffset,
297				EndOffset,
298				TextLength
299			);
300
301			continue;
302		}
303
304		// Remove old text and insert new text
305		// Safe slice operation: validated offsets above
306		let OldText = ResultText.as_bytes();
307
308		ResultText =
309			String::from_utf8_lossy(&[&OldText[..StartOffset], Change.Text.as_bytes(), &OldText[EndOffset..]].concat())
310				.into_owned();
311	}
312
313	// Re-split the result into lines
314	AnalyzeTextLinesAndEOL(&ResultText).0
315}
316
317/// Converts line/column position to byte offset in text.
318///
319/// VSCode LSP uses 0-based line numbers and 0-based column numbers.
320/// This function matches that convention.
321fn PositionToOffset(Text:&str, EOL:&str, LineNumber:&usize, Column:&usize) -> usize {
322	let Lines:Vec<&str> = Text.split(EOL).collect();
323
324	let EOLLength = EOL.len();
325
326	let mut Offset = 0;
327
328	// Add length of all preceding lines plus their EOL markers
329	for LineIndex in 0..*LineNumber {
330		if LineIndex < Lines.len() {
331			Offset += Lines[LineIndex].len() + EOLLength;
332		}
333	}
334
335	// Add column offset within the current line
336	if *LineNumber < Lines.len() {
337		// Column is in character positions, convert to byte offset
338		let CurrentLine = Lines[*LineNumber];
339
340		let CharOffset = CurrentLine
341			.char_indices()
342			.nth(*Column)
343			.map_or(CurrentLine.len(), |(offset, _)| offset);
344
345		Offset += CharOffset;
346	}
347
348	Offset
349}
350
351/// Compares two RPC ranges to determine their order in the document.
352/// Returns negative if a comes before b, zero if equal, positive if a comes
353/// after b.
354fn CMP_Range_Position(A:&RPCRangeDTO, B:&RPCRangeDTO) -> std::cmp::Ordering {
355	A.StartLineNumber
356		.cmp(&B.StartLineNumber)
357		.then_with(|| A.StartColumn.cmp(&B.StartColumn))
358}