feat(quick-3): add ureq dep, allowed_remote_domains config, and fetch_remote_markdown

- Add ureq = "2.12" to Cargo.toml for synchronous HTTP fetching
- Add allowed_remote_domains: Vec<String> field to Config struct with serde default
- Add RemoteDocument enum with Loaded/DomainNotAllowed/FetchError/NotMarkdown variants
- Add fetch_remote_markdown() with domain whitelist, 10s timeout, content-type validation, 5MB body limit
This commit is contained in:
2026-03-01 13:12:27 +01:00
parent eb1c7866ce
commit 5759ec83e6
3 changed files with 155 additions and 1 deletions
+1
View File
@@ -15,3 +15,4 @@ syntect-tui = "3.0"
notify = "6.1"
ansi-to-tui = "8.0"
walkdir = "2.5"
ureq = "2.12"
+8
View File
@@ -12,6 +12,12 @@ pub struct Config {
#[serde(default = "default_theme")]
#[allow(dead_code)]
pub theme: String,
#[serde(default)]
pub margin: u16,
#[serde(default)]
pub allowed_remote_domains: Vec<String>,
}
fn default_vault_path() -> PathBuf {
@@ -27,6 +33,8 @@ impl Default for Config {
Config {
vault_path: default_vault_path(),
theme: default_theme(),
margin: 0,
allowed_remote_domains: Vec::new(),
}
}
}
+146 -1
View File
@@ -1,5 +1,6 @@
use std::io::{self, BufRead};
use std::io::{self, BufRead, Read};
use std::path::{Path, PathBuf};
use std::time::Duration;
use walkdir::WalkDir;
// ── VaultDocument ─────────────────────────────────────────────────────────────
@@ -189,6 +190,150 @@ pub fn resolve_standard_link(vault_path: &Path, current_doc: &str, dest: &str) -
None
}
// ── Remote document fetching ──────────────────────────────────────────────────
/// Result of attempting to fetch a remote markdown document.
pub enum RemoteDocument {
/// Successfully fetched and content appears to be markdown.
Loaded { url: String, content: String },
/// Domain is not in the whitelist.
DomainNotAllowed { domain: String },
/// HTTP request failed (network error, timeout, non-2xx status).
FetchError { url: String, reason: String },
/// Response content does not appear to be markdown (e.g. HTML page, binary).
NotMarkdown { url: String, content_type: String },
}
/// Extract the domain from a URL string.
///
/// Examples:
/// - `https://example.com/foo` → `example.com`
/// - `https://sub.example.com:8080/bar` → `sub.example.com`
fn extract_domain(url: &str) -> Option<String> {
// Split off the scheme: "https://example.com/..." → "example.com/..."
let after_scheme = url.splitn(2, "://").nth(1)?;
// Take everything before the first '/'
let host_port = after_scheme.split('/').next()?;
// Strip port number if present (last ':' only if it looks like a port)
let domain = match host_port.rfind(':') {
Some(i) => {
let port_part = &host_port[i + 1..];
if port_part.chars().all(|c| c.is_ascii_digit()) {
&host_port[..i]
} else {
host_port
}
}
None => host_port,
};
Some(domain.to_lowercase())
}
/// Check whether `domain` is permitted by the `allowed_domains` list.
///
/// Matching rules (case-insensitive):
/// - Exact match: `example.com` matches `example.com`
/// - Subdomain match: `sub.example.com` matches whitelist entry `example.com`
fn domain_is_allowed(domain: &str, allowed_domains: &[String]) -> bool {
for allowed in allowed_domains {
let allowed_lower = allowed.to_lowercase();
if domain == allowed_lower {
return true;
}
// Allow subdomains: "sub.example.com" matches "example.com"
let suffix = format!(".{}", allowed_lower);
if domain.ends_with(&suffix) {
return true;
}
}
false
}
/// Fetch a remote markdown document from `url` and validate its content.
///
/// Steps:
/// 1. Extract domain from URL and check against `allowed_domains` whitelist.
/// 2. Issue a GET request with a 10-second timeout via ureq.
/// 3. Validate the Content-Type header (accept markdown/plain text; reject HTML/binary).
/// 4. Read the response body (capped at 5 MB to prevent memory exhaustion).
/// 5. Return the appropriate `RemoteDocument` variant.
pub fn fetch_remote_markdown(url: &str, allowed_domains: &[String]) -> RemoteDocument {
// Step 1: Domain whitelist check
let domain = match extract_domain(url) {
Some(d) => d,
None => {
return RemoteDocument::FetchError {
url: url.to_string(),
reason: "Could not parse domain from URL".to_string(),
};
}
};
if !domain_is_allowed(&domain, allowed_domains) {
return RemoteDocument::DomainNotAllowed { domain };
}
// Step 2: HTTP GET with timeout
let response = match ureq::get(url)
.timeout(Duration::from_secs(10))
.call()
{
Ok(resp) => resp,
Err(ureq::Error::Status(code, resp)) => {
return RemoteDocument::FetchError {
url: url.to_string(),
reason: format!("HTTP {} {}", code, resp.status_text().to_string()),
};
}
Err(e) => {
return RemoteDocument::FetchError {
url: url.to_string(),
reason: e.to_string(),
};
}
};
// Step 3: Content-Type validation
let content_type = response
.header("Content-Type")
.unwrap_or("application/octet-stream")
.to_lowercase();
// Strip parameters like "; charset=utf-8"
let ct_base = content_type.split(';').next().unwrap_or("").trim().to_string();
let url_path_is_md = url.split('?').next().unwrap_or(url).ends_with(".md");
let is_acceptable = matches!(
ct_base.as_str(),
"text/markdown" | "text/plain" | "text/x-markdown"
) || url_path_is_md;
let is_html = ct_base == "text/html";
if is_html || (!is_acceptable && !url_path_is_md) {
return RemoteDocument::NotMarkdown {
url: url.to_string(),
content_type: ct_base,
};
}
// Step 4: Read body with 5 MB limit
let mut body = String::new();
let mut reader = response.into_reader().take(5_000_000);
if let Err(e) = reader.read_to_string(&mut body) {
return RemoteDocument::FetchError {
url: url.to_string(),
reason: format!("Failed to read response body: {}", e),
};
}
RemoteDocument::Loaded {
url: url.to_string(),
content: body,
}
}
// ── Directory listing ─────────────────────────────────────────────────────────
/// Entry in the vault directory listing.