feat(quick-3): add ureq dep, allowed_remote_domains config, and fetch_remote_markdown
- Add ureq = "2.12" to Cargo.toml for synchronous HTTP fetching - Add allowed_remote_domains: Vec<String> field to Config struct with serde default - Add RemoteDocument enum with Loaded/DomainNotAllowed/FetchError/NotMarkdown variants - Add fetch_remote_markdown() with domain whitelist, 10s timeout, content-type validation, 5MB body limit
This commit is contained in:
@@ -15,3 +15,4 @@ syntect-tui = "3.0"
|
||||
notify = "6.1"
|
||||
ansi-to-tui = "8.0"
|
||||
walkdir = "2.5"
|
||||
ureq = "2.12"
|
||||
|
||||
@@ -12,6 +12,12 @@ pub struct Config {
|
||||
#[serde(default = "default_theme")]
|
||||
#[allow(dead_code)]
|
||||
pub theme: String,
|
||||
|
||||
#[serde(default)]
|
||||
pub margin: u16,
|
||||
|
||||
#[serde(default)]
|
||||
pub allowed_remote_domains: Vec<String>,
|
||||
}
|
||||
|
||||
fn default_vault_path() -> PathBuf {
|
||||
@@ -27,6 +33,8 @@ impl Default for Config {
|
||||
Config {
|
||||
vault_path: default_vault_path(),
|
||||
theme: default_theme(),
|
||||
margin: 0,
|
||||
allowed_remote_domains: Vec::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
+146
-1
@@ -1,5 +1,6 @@
|
||||
use std::io::{self, BufRead};
|
||||
use std::io::{self, BufRead, Read};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::time::Duration;
|
||||
use walkdir::WalkDir;
|
||||
|
||||
// ── VaultDocument ─────────────────────────────────────────────────────────────
|
||||
@@ -189,6 +190,150 @@ pub fn resolve_standard_link(vault_path: &Path, current_doc: &str, dest: &str) -
|
||||
None
|
||||
}
|
||||
|
||||
// ── Remote document fetching ──────────────────────────────────────────────────
|
||||
|
||||
/// Result of attempting to fetch a remote markdown document.
|
||||
pub enum RemoteDocument {
|
||||
/// Successfully fetched and content appears to be markdown.
|
||||
Loaded { url: String, content: String },
|
||||
/// Domain is not in the whitelist.
|
||||
DomainNotAllowed { domain: String },
|
||||
/// HTTP request failed (network error, timeout, non-2xx status).
|
||||
FetchError { url: String, reason: String },
|
||||
/// Response content does not appear to be markdown (e.g. HTML page, binary).
|
||||
NotMarkdown { url: String, content_type: String },
|
||||
}
|
||||
|
||||
/// Extract the domain from a URL string.
|
||||
///
|
||||
/// Examples:
|
||||
/// - `https://example.com/foo` → `example.com`
|
||||
/// - `https://sub.example.com:8080/bar` → `sub.example.com`
|
||||
fn extract_domain(url: &str) -> Option<String> {
|
||||
// Split off the scheme: "https://example.com/..." → "example.com/..."
|
||||
let after_scheme = url.splitn(2, "://").nth(1)?;
|
||||
// Take everything before the first '/'
|
||||
let host_port = after_scheme.split('/').next()?;
|
||||
// Strip port number if present (last ':' only if it looks like a port)
|
||||
let domain = match host_port.rfind(':') {
|
||||
Some(i) => {
|
||||
let port_part = &host_port[i + 1..];
|
||||
if port_part.chars().all(|c| c.is_ascii_digit()) {
|
||||
&host_port[..i]
|
||||
} else {
|
||||
host_port
|
||||
}
|
||||
}
|
||||
None => host_port,
|
||||
};
|
||||
Some(domain.to_lowercase())
|
||||
}
|
||||
|
||||
/// Check whether `domain` is permitted by the `allowed_domains` list.
|
||||
///
|
||||
/// Matching rules (case-insensitive):
|
||||
/// - Exact match: `example.com` matches `example.com`
|
||||
/// - Subdomain match: `sub.example.com` matches whitelist entry `example.com`
|
||||
fn domain_is_allowed(domain: &str, allowed_domains: &[String]) -> bool {
|
||||
for allowed in allowed_domains {
|
||||
let allowed_lower = allowed.to_lowercase();
|
||||
if domain == allowed_lower {
|
||||
return true;
|
||||
}
|
||||
// Allow subdomains: "sub.example.com" matches "example.com"
|
||||
let suffix = format!(".{}", allowed_lower);
|
||||
if domain.ends_with(&suffix) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
/// Fetch a remote markdown document from `url` and validate its content.
|
||||
///
|
||||
/// Steps:
|
||||
/// 1. Extract domain from URL and check against `allowed_domains` whitelist.
|
||||
/// 2. Issue a GET request with a 10-second timeout via ureq.
|
||||
/// 3. Validate the Content-Type header (accept markdown/plain text; reject HTML/binary).
|
||||
/// 4. Read the response body (capped at 5 MB to prevent memory exhaustion).
|
||||
/// 5. Return the appropriate `RemoteDocument` variant.
|
||||
pub fn fetch_remote_markdown(url: &str, allowed_domains: &[String]) -> RemoteDocument {
|
||||
// Step 1: Domain whitelist check
|
||||
let domain = match extract_domain(url) {
|
||||
Some(d) => d,
|
||||
None => {
|
||||
return RemoteDocument::FetchError {
|
||||
url: url.to_string(),
|
||||
reason: "Could not parse domain from URL".to_string(),
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
if !domain_is_allowed(&domain, allowed_domains) {
|
||||
return RemoteDocument::DomainNotAllowed { domain };
|
||||
}
|
||||
|
||||
// Step 2: HTTP GET with timeout
|
||||
let response = match ureq::get(url)
|
||||
.timeout(Duration::from_secs(10))
|
||||
.call()
|
||||
{
|
||||
Ok(resp) => resp,
|
||||
Err(ureq::Error::Status(code, resp)) => {
|
||||
return RemoteDocument::FetchError {
|
||||
url: url.to_string(),
|
||||
reason: format!("HTTP {} {}", code, resp.status_text().to_string()),
|
||||
};
|
||||
}
|
||||
Err(e) => {
|
||||
return RemoteDocument::FetchError {
|
||||
url: url.to_string(),
|
||||
reason: e.to_string(),
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
// Step 3: Content-Type validation
|
||||
let content_type = response
|
||||
.header("Content-Type")
|
||||
.unwrap_or("application/octet-stream")
|
||||
.to_lowercase();
|
||||
|
||||
// Strip parameters like "; charset=utf-8"
|
||||
let ct_base = content_type.split(';').next().unwrap_or("").trim().to_string();
|
||||
|
||||
let url_path_is_md = url.split('?').next().unwrap_or(url).ends_with(".md");
|
||||
|
||||
let is_acceptable = matches!(
|
||||
ct_base.as_str(),
|
||||
"text/markdown" | "text/plain" | "text/x-markdown"
|
||||
) || url_path_is_md;
|
||||
|
||||
let is_html = ct_base == "text/html";
|
||||
|
||||
if is_html || (!is_acceptable && !url_path_is_md) {
|
||||
return RemoteDocument::NotMarkdown {
|
||||
url: url.to_string(),
|
||||
content_type: ct_base,
|
||||
};
|
||||
}
|
||||
|
||||
// Step 4: Read body with 5 MB limit
|
||||
let mut body = String::new();
|
||||
let mut reader = response.into_reader().take(5_000_000);
|
||||
if let Err(e) = reader.read_to_string(&mut body) {
|
||||
return RemoteDocument::FetchError {
|
||||
url: url.to_string(),
|
||||
reason: format!("Failed to read response body: {}", e),
|
||||
};
|
||||
}
|
||||
|
||||
RemoteDocument::Loaded {
|
||||
url: url.to_string(),
|
||||
content: body,
|
||||
}
|
||||
}
|
||||
|
||||
// ── Directory listing ─────────────────────────────────────────────────────────
|
||||
|
||||
/// Entry in the vault directory listing.
|
||||
|
||||
Reference in New Issue
Block a user