feat(03-01): add wiki-link resolution and path traversal guard to vault.rs

- Add is_within_vault() with canonicalize + starts_with path traversal guard
- Add resolve_wiki_link() with case-insensitive multi-strategy matching
  (hyphens first, then underscores, then literal spaces per locked decision)
- Support subpath wiki-links: [[guides/Getting Started]] -> guides/getting-started.md
- Add resolve_standard_link() for inline [text](path.md) link resolution
  relative to the current document's directory within the vault
- All resolution functions guarded by is_within_vault() to prevent traversal
This commit is contained in:
2026-02-28 23:04:27 +01:00
parent a63f4115f9
commit f2604d6429
+142
View File
@@ -40,3 +40,145 @@ pub fn load_document(vault_path: &Path, relative: &str) -> VaultDocument {
}, },
} }
} }
// ── Path traversal guard ──────────────────────────────────────────────────────
/// Returns `true` if `candidate` is a path within `vault_path`.
///
/// Uses `canonicalize()` + `starts_with()` to resolve symlinks and `..` components
/// before the prefix check, guarding against path traversal attacks such as
/// `../../etc/passwd` in linked destinations.
///
/// Returns `false` if either path cannot be canonicalized (e.g. does not exist yet,
/// or permissions deny access).
pub fn is_within_vault(vault_path: &Path, candidate: &Path) -> bool {
match (candidate.canonicalize(), vault_path.canonicalize()) {
(Ok(canon_candidate), Ok(canon_vault)) => canon_candidate.starts_with(&canon_vault),
_ => false,
}
}
// ── Wiki-link resolution ──────────────────────────────────────────────────────
/// Resolve a raw wiki-link target string to a vault-relative `PathBuf`.
///
/// Algorithm (per locked decisions — case-insensitive, hyphens first):
/// 1. Split `raw_target` on the last `/` to extract optional `subdir` and `name`.
/// 2. Generate candidate stems: spaces→hyphens, spaces→underscores, literal spaces.
/// 3. List `vault_path/subdir` via `read_dir`; compare each entry's stem (`.md`
/// stripped, lowercased) against each candidate (lowercased).
/// 4. On first match: apply `is_within_vault()` path traversal guard.
/// 5. If within vault: return the vault-relative path as `Some(PathBuf)`.
/// 6. If no match: return `None`.
///
/// First match wins (filesystem iteration order). This is intentional and documented.
///
/// # Examples
///
/// ```text
/// resolve_wiki_link(vault, "Getting Started")
/// → matches "getting-started.md" (hyphen strategy, case-insensitive)
///
/// resolve_wiki_link(vault, "guides/Getting Started")
/// → matches "guides/getting-started.md"
/// ```
pub fn resolve_wiki_link(vault_path: &Path, raw_target: &str) -> Option<PathBuf> {
// Split off subpath prefix if present: "guides/Getting Started" → ("guides", "Getting Started")
let (subdir, name) = match raw_target.rfind('/') {
Some(i) => (&raw_target[..i], &raw_target[i + 1..]),
None => ("", raw_target),
};
// Generate candidate stems (space-replacement strategies, hyphens first per locked decision)
let hyphen = name.replace(' ', "-");
let under = name.replace(' ', "_");
let literal = name.to_string();
let candidates: [String; 3] = [hyphen, under, literal];
// Compute search directory
let search_dir: PathBuf = if subdir.is_empty() {
vault_path.to_path_buf()
} else {
vault_path.join(subdir)
};
let Ok(entries) = std::fs::read_dir(&search_dir) else {
return None;
};
for entry in entries.flatten() {
let fname = entry.file_name();
let fname_str = fname.to_string_lossy();
// Only consider .md files
let stem = match fname_str.strip_suffix(".md") {
Some(s) => s.to_string(),
None => continue,
};
let stem_lower = stem.to_lowercase();
for candidate in &candidates {
if stem_lower == candidate.to_lowercase() {
// Candidate matched — apply path traversal guard
let full_path = entry.path();
if let Ok(canonical) = full_path.canonicalize() {
if let Ok(vault_canonical) = vault_path.canonicalize() {
if canonical.starts_with(&vault_canonical) {
// Return vault-relative path
return canonical
.strip_prefix(&vault_canonical)
.ok()
.map(|p| p.to_path_buf());
}
}
}
// Path traversal detected or canonicalize failed — skip this match
break;
}
}
}
None
}
// ── Standard link resolution ──────────────────────────────────────────────────
/// Resolve a standard markdown link destination relative to `current_doc`.
///
/// For inline links `[text](path.md)`, `dest` is the raw path string from the link.
/// Resolution is relative to the directory containing `current_doc` within the vault.
///
/// Applies the `is_within_vault()` path traversal guard: if the resolved path
/// escapes the vault root (e.g. `../../etc/passwd`), returns `None`.
///
/// Returns `Some(vault_relative_path)` if the file exists and is within the vault,
/// `None` otherwise.
///
/// # Arguments
///
/// * `vault_path` — absolute path to the vault root directory
/// * `current_doc` — vault-relative path of the currently displayed document
/// (e.g. `"guides/getting-started.md"`)
/// * `dest` — raw link destination from the markdown (e.g. `"../index.md"`)
pub fn resolve_standard_link(vault_path: &Path, current_doc: &str, dest: &str) -> Option<PathBuf> {
// Compute base directory: directory of the current document within the vault
let current_full = vault_path.join(current_doc);
let base_dir = current_full.parent()?;
// Join the link destination relative to the base directory
let candidate = base_dir.join(dest);
// Apply path traversal guard (also checks file existence via canonicalize)
if let Ok(canonical) = candidate.canonicalize() {
if let Ok(vault_canonical) = vault_path.canonicalize() {
if canonical.starts_with(&vault_canonical) {
return canonical
.strip_prefix(&vault_canonical)
.ok()
.map(|p| p.to_path_buf());
}
}
}
None
}