deno/cli/lsp/text.rs
2021-01-11 18:13:41 +01:00

516 lines
11 KiB
Rust

// Copyright 2018-2021 the Deno authors. All rights reserved. MIT license.
use deno_core::serde_json::json;
use deno_core::serde_json::Value;
use dissimilar::diff;
use dissimilar::Chunk;
use lspower::lsp_types;
use lspower::lsp_types::TextEdit;
use std::ops::Bound;
use std::ops::Range;
use std::ops::RangeBounds;
// TODO(@kitson) in general all of these text handling routines don't handle
// JavaScript encoding in the same way and likely cause issues when trying to
// arbitrate between chars and Unicode graphemes. There be dragons.
/// Generate a character position for the start of each line. For example:
///
/// ```rust
/// let actual = index_lines("a\nb\n");
/// assert_eq!(actual, vec![0, 2, 4]);
/// ```
///
pub fn index_lines(text: &str) -> Vec<u32> {
let mut indexes = vec![0_u32];
for (i, c) in text.chars().enumerate() {
if c == '\n' {
indexes.push((i + 1) as u32);
}
}
indexes
}
enum IndexValid {
All,
UpTo(u32),
}
impl IndexValid {
fn covers(&self, line: u32) -> bool {
match *self {
IndexValid::UpTo(to) => to > line,
IndexValid::All => true,
}
}
}
fn to_range(line_index: &[u32], range: lsp_types::Range) -> Range<usize> {
let start =
(line_index[range.start.line as usize] + range.start.character) as usize;
let end =
(line_index[range.end.line as usize] + range.end.character) as usize;
Range { start, end }
}
pub fn to_position(line_index: &[u32], char_pos: u32) -> lsp_types::Position {
let mut line = 0_usize;
let mut line_start = 0_u32;
for (pos, v) in line_index.iter().enumerate() {
if char_pos < *v {
break;
}
line_start = *v;
line = pos;
}
lsp_types::Position {
line: line as u32,
character: char_pos - line_start,
}
}
pub fn to_char_pos(line_index: &[u32], position: lsp_types::Position) -> u32 {
if let Some(line_start) = line_index.get(position.line as usize) {
line_start + position.character
} else {
0_u32
}
}
/// Apply a vector of document changes to the supplied string.
pub fn apply_content_changes(
content: &mut String,
content_changes: Vec<lsp_types::TextDocumentContentChangeEvent>,
) {
let mut line_index = index_lines(&content);
let mut index_valid = IndexValid::All;
for change in content_changes {
if let Some(range) = change.range {
if !index_valid.covers(range.start.line) {
line_index = index_lines(&content);
}
let range = to_range(&line_index, range);
content.replace_range(range, &change.text);
} else {
*content = change.text;
index_valid = IndexValid::UpTo(0);
}
}
}
/// Compare two strings and return a vector of text edit records which are
/// supported by the Language Server Protocol.
pub fn get_edits(a: &str, b: &str) -> Vec<TextEdit> {
let chunks = diff(a, b);
let mut text_edits = Vec::<TextEdit>::new();
let line_index = index_lines(a);
let mut iter = chunks.iter().peekable();
let mut a_pos = 0_u32;
loop {
let chunk = iter.next();
match chunk {
None => break,
Some(Chunk::Equal(e)) => {
a_pos += e.chars().count() as u32;
}
Some(Chunk::Delete(d)) => {
let start = to_position(&line_index, a_pos);
a_pos += d.chars().count() as u32;
let end = to_position(&line_index, a_pos);
let range = lsp_types::Range { start, end };
match iter.peek() {
Some(Chunk::Insert(i)) => {
iter.next();
text_edits.push(TextEdit {
range,
new_text: i.to_string(),
});
}
_ => text_edits.push(TextEdit {
range,
new_text: "".to_string(),
}),
}
}
Some(Chunk::Insert(i)) => {
let pos = to_position(&line_index, a_pos);
let range = lsp_types::Range {
start: pos,
end: pos,
};
text_edits.push(TextEdit {
range,
new_text: i.to_string(),
});
}
}
}
text_edits
}
/// Convert a difference between two strings into a change range used by the
/// TypeScript Language Service.
pub fn get_range_change(a: &str, b: &str) -> Value {
let chunks = diff(a, b);
let mut iter = chunks.iter().peekable();
let mut started = false;
let mut start = 0;
let mut end = 0;
let mut new_length = 0;
let mut equal = 0;
let mut a_pos = 0;
loop {
let chunk = iter.next();
match chunk {
None => break,
Some(Chunk::Equal(e)) => {
a_pos += e.chars().count();
equal += e.chars().count();
}
Some(Chunk::Delete(d)) => {
if !started {
start = a_pos;
started = true;
equal = 0;
}
a_pos += d.chars().count();
if started {
end = a_pos;
new_length += equal;
equal = 0;
}
}
Some(Chunk::Insert(i)) => {
if !started {
start = a_pos;
end = a_pos;
started = true;
equal = 0;
} else {
end += equal;
}
new_length += i.chars().count() + equal;
equal = 0;
}
}
}
json!({
"span": {
"start": start,
"length": end - start,
},
"newLength": new_length,
})
}
/// Provide a slice of a string based on a character range.
pub fn slice(s: &str, range: impl RangeBounds<usize>) -> &str {
let start = match range.start_bound() {
Bound::Included(bound) | Bound::Excluded(bound) => *bound,
Bound::Unbounded => 0,
};
let len = match range.end_bound() {
Bound::Included(bound) => *bound + 1,
Bound::Excluded(bound) => *bound,
Bound::Unbounded => s.len(),
} - start;
substring(s, start, start + len)
}
/// Provide a substring based on the start and end character index positions.
pub fn substring(s: &str, start: usize, end: usize) -> &str {
let len = end - start;
let mut char_pos = 0;
let mut byte_start = 0;
let mut it = s.chars();
loop {
if char_pos == start {
break;
}
if let Some(c) = it.next() {
char_pos += 1;
byte_start += c.len_utf8();
} else {
break;
}
}
char_pos = 0;
let mut byte_end = byte_start;
loop {
if char_pos == len {
break;
}
if let Some(c) = it.next() {
char_pos += 1;
byte_end += c.len_utf8();
} else {
break;
}
}
&s[byte_start..byte_end]
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_apply_content_changes() {
let mut content = "a\nb\nc\nd".to_string();
let content_changes = vec![lsp_types::TextDocumentContentChangeEvent {
range: Some(lsp_types::Range {
start: lsp_types::Position {
line: 1,
character: 0,
},
end: lsp_types::Position {
line: 1,
character: 1,
},
}),
range_length: Some(1),
text: "e".to_string(),
}];
apply_content_changes(&mut content, content_changes);
assert_eq!(content, "a\ne\nc\nd");
}
#[test]
fn test_get_edits() {
let a = "abcdefg";
let b = "a\nb\nchije\nfg\n";
let actual = get_edits(a, b);
assert_eq!(
actual,
vec![
TextEdit {
range: lsp_types::Range {
start: lsp_types::Position {
line: 0,
character: 1
},
end: lsp_types::Position {
line: 0,
character: 5
}
},
new_text: "\nb\nchije\n".to_string()
},
TextEdit {
range: lsp_types::Range {
start: lsp_types::Position {
line: 0,
character: 7
},
end: lsp_types::Position {
line: 0,
character: 7
}
},
new_text: "\n".to_string()
},
]
);
}
#[test]
fn test_get_range_change() {
let a = "abcdefg";
let b = "abedcfg";
let actual = get_range_change(a, b);
assert_eq!(
actual,
json!({
"span": {
"start": 2,
"length": 3,
},
"newLength": 3
})
);
let a = "abfg";
let b = "abcdefg";
let actual = get_range_change(a, b);
assert_eq!(
actual,
json!({
"span": {
"start": 2,
"length": 0,
},
"newLength": 3
})
);
let a = "abcdefg";
let b = "abfg";
let actual = get_range_change(a, b);
assert_eq!(
actual,
json!({
"span": {
"start": 2,
"length": 3,
},
"newLength": 0
})
);
let a = "abcdefg";
let b = "abfghij";
let actual = get_range_change(a, b);
assert_eq!(
actual,
json!({
"span": {
"start": 2,
"length": 5,
},
"newLength": 5
})
);
let a = "abcdefghijk";
let b = "axcxexfxixk";
let actual = get_range_change(a, b);
assert_eq!(
actual,
json!({
"span": {
"start": 1,
"length": 9,
},
"newLength": 9
})
);
let a = "abcde";
let b = "ab(c)de";
let actual = get_range_change(a, b);
assert_eq!(
actual,
json!({
"span" : {
"start": 2,
"length": 1,
},
"newLength": 3
})
);
}
#[test]
fn test_index_lines() {
let actual = index_lines("a\nb\r\nc");
assert_eq!(actual, vec![0, 2, 5]);
}
#[test]
fn test_to_position() {
let line_index = index_lines("a\nb\r\nc\n");
assert_eq!(
to_position(&line_index, 6),
lsp_types::Position {
line: 2,
character: 1,
}
);
assert_eq!(
to_position(&line_index, 0),
lsp_types::Position {
line: 0,
character: 0,
}
);
assert_eq!(
to_position(&line_index, 3),
lsp_types::Position {
line: 1,
character: 1,
}
);
}
#[test]
fn test_to_position_mbc() {
let line_index = index_lines("\n😱🦕\n🤯\n");
assert_eq!(
to_position(&line_index, 0),
lsp_types::Position {
line: 0,
character: 0,
}
);
assert_eq!(
to_position(&line_index, 2),
lsp_types::Position {
line: 0,
character: 2,
}
);
assert_eq!(
to_position(&line_index, 3),
lsp_types::Position {
line: 1,
character: 0,
}
);
assert_eq!(
to_position(&line_index, 4),
lsp_types::Position {
line: 1,
character: 1,
}
);
assert_eq!(
to_position(&line_index, 5),
lsp_types::Position {
line: 1,
character: 2,
}
);
assert_eq!(
to_position(&line_index, 6),
lsp_types::Position {
line: 2,
character: 0,
}
);
assert_eq!(
to_position(&line_index, 7),
lsp_types::Position {
line: 2,
character: 1,
}
);
assert_eq!(
to_position(&line_index, 8),
lsp_types::Position {
line: 3,
character: 0,
}
);
}
#[test]
fn test_substring() {
assert_eq!(substring("Deno", 1, 3), "en");
assert_eq!(substring("y̆y̆", 2, 4), "");
// this doesn't work like JavaScript, as 🦕 is treated as a single char in
// Rust, but as two chars in JavaScript.
// assert_eq!(substring("🦕🦕", 2, 4), "🦕");
}
#[test]
fn test_slice() {
assert_eq!(slice("Deno", 1..3), "en");
assert_eq!(slice("Deno", 1..=3), "eno");
assert_eq!(slice("Deno Land", 1..), "eno Land");
assert_eq!(slice("Deno", ..3), "Den");
}
}