feat(quick-3): add ureq dep, allowed_remote_domains config, and fetch_remote_markdown
- Add ureq = "2.12" to Cargo.toml for synchronous HTTP fetching - Add allowed_remote_domains: Vec<String> field to Config struct with serde default - Add RemoteDocument enum with Loaded/DomainNotAllowed/FetchError/NotMarkdown variants - Add fetch_remote_markdown() with domain whitelist, 10s timeout, content-type validation, 5MB body limit
This commit is contained in:
@@ -15,3 +15,4 @@ syntect-tui = "3.0"
|
|||||||
notify = "6.1"
|
notify = "6.1"
|
||||||
ansi-to-tui = "8.0"
|
ansi-to-tui = "8.0"
|
||||||
walkdir = "2.5"
|
walkdir = "2.5"
|
||||||
|
ureq = "2.12"
|
||||||
|
|||||||
@@ -12,6 +12,12 @@ pub struct Config {
|
|||||||
#[serde(default = "default_theme")]
|
#[serde(default = "default_theme")]
|
||||||
#[allow(dead_code)]
|
#[allow(dead_code)]
|
||||||
pub theme: String,
|
pub theme: String,
|
||||||
|
|
||||||
|
#[serde(default)]
|
||||||
|
pub margin: u16,
|
||||||
|
|
||||||
|
#[serde(default)]
|
||||||
|
pub allowed_remote_domains: Vec<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
fn default_vault_path() -> PathBuf {
|
fn default_vault_path() -> PathBuf {
|
||||||
@@ -27,6 +33,8 @@ impl Default for Config {
|
|||||||
Config {
|
Config {
|
||||||
vault_path: default_vault_path(),
|
vault_path: default_vault_path(),
|
||||||
theme: default_theme(),
|
theme: default_theme(),
|
||||||
|
margin: 0,
|
||||||
|
allowed_remote_domains: Vec::new(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
+146
-1
@@ -1,5 +1,6 @@
|
|||||||
use std::io::{self, BufRead};
|
use std::io::{self, BufRead, Read};
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
|
use std::time::Duration;
|
||||||
use walkdir::WalkDir;
|
use walkdir::WalkDir;
|
||||||
|
|
||||||
// ── VaultDocument ─────────────────────────────────────────────────────────────
|
// ── VaultDocument ─────────────────────────────────────────────────────────────
|
||||||
@@ -189,6 +190,150 @@ pub fn resolve_standard_link(vault_path: &Path, current_doc: &str, dest: &str) -
|
|||||||
None
|
None
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ── Remote document fetching ──────────────────────────────────────────────────
|
||||||
|
|
||||||
|
/// Result of attempting to fetch a remote markdown document.
|
||||||
|
pub enum RemoteDocument {
|
||||||
|
/// Successfully fetched and content appears to be markdown.
|
||||||
|
Loaded { url: String, content: String },
|
||||||
|
/// Domain is not in the whitelist.
|
||||||
|
DomainNotAllowed { domain: String },
|
||||||
|
/// HTTP request failed (network error, timeout, non-2xx status).
|
||||||
|
FetchError { url: String, reason: String },
|
||||||
|
/// Response content does not appear to be markdown (e.g. HTML page, binary).
|
||||||
|
NotMarkdown { url: String, content_type: String },
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Extract the domain from a URL string.
|
||||||
|
///
|
||||||
|
/// Examples:
|
||||||
|
/// - `https://example.com/foo` → `example.com`
|
||||||
|
/// - `https://sub.example.com:8080/bar` → `sub.example.com`
|
||||||
|
fn extract_domain(url: &str) -> Option<String> {
|
||||||
|
// Split off the scheme: "https://example.com/..." → "example.com/..."
|
||||||
|
let after_scheme = url.splitn(2, "://").nth(1)?;
|
||||||
|
// Take everything before the first '/'
|
||||||
|
let host_port = after_scheme.split('/').next()?;
|
||||||
|
// Strip port number if present (last ':' only if it looks like a port)
|
||||||
|
let domain = match host_port.rfind(':') {
|
||||||
|
Some(i) => {
|
||||||
|
let port_part = &host_port[i + 1..];
|
||||||
|
if port_part.chars().all(|c| c.is_ascii_digit()) {
|
||||||
|
&host_port[..i]
|
||||||
|
} else {
|
||||||
|
host_port
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None => host_port,
|
||||||
|
};
|
||||||
|
Some(domain.to_lowercase())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check whether `domain` is permitted by the `allowed_domains` list.
|
||||||
|
///
|
||||||
|
/// Matching rules (case-insensitive):
|
||||||
|
/// - Exact match: `example.com` matches `example.com`
|
||||||
|
/// - Subdomain match: `sub.example.com` matches whitelist entry `example.com`
|
||||||
|
fn domain_is_allowed(domain: &str, allowed_domains: &[String]) -> bool {
|
||||||
|
for allowed in allowed_domains {
|
||||||
|
let allowed_lower = allowed.to_lowercase();
|
||||||
|
if domain == allowed_lower {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
// Allow subdomains: "sub.example.com" matches "example.com"
|
||||||
|
let suffix = format!(".{}", allowed_lower);
|
||||||
|
if domain.ends_with(&suffix) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
false
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Fetch a remote markdown document from `url` and validate its content.
|
||||||
|
///
|
||||||
|
/// Steps:
|
||||||
|
/// 1. Extract domain from URL and check against `allowed_domains` whitelist.
|
||||||
|
/// 2. Issue a GET request with a 10-second timeout via ureq.
|
||||||
|
/// 3. Validate the Content-Type header (accept markdown/plain text; reject HTML/binary).
|
||||||
|
/// 4. Read the response body (capped at 5 MB to prevent memory exhaustion).
|
||||||
|
/// 5. Return the appropriate `RemoteDocument` variant.
|
||||||
|
pub fn fetch_remote_markdown(url: &str, allowed_domains: &[String]) -> RemoteDocument {
|
||||||
|
// Step 1: Domain whitelist check
|
||||||
|
let domain = match extract_domain(url) {
|
||||||
|
Some(d) => d,
|
||||||
|
None => {
|
||||||
|
return RemoteDocument::FetchError {
|
||||||
|
url: url.to_string(),
|
||||||
|
reason: "Could not parse domain from URL".to_string(),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
if !domain_is_allowed(&domain, allowed_domains) {
|
||||||
|
return RemoteDocument::DomainNotAllowed { domain };
|
||||||
|
}
|
||||||
|
|
||||||
|
// Step 2: HTTP GET with timeout
|
||||||
|
let response = match ureq::get(url)
|
||||||
|
.timeout(Duration::from_secs(10))
|
||||||
|
.call()
|
||||||
|
{
|
||||||
|
Ok(resp) => resp,
|
||||||
|
Err(ureq::Error::Status(code, resp)) => {
|
||||||
|
return RemoteDocument::FetchError {
|
||||||
|
url: url.to_string(),
|
||||||
|
reason: format!("HTTP {} {}", code, resp.status_text().to_string()),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
return RemoteDocument::FetchError {
|
||||||
|
url: url.to_string(),
|
||||||
|
reason: e.to_string(),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Step 3: Content-Type validation
|
||||||
|
let content_type = response
|
||||||
|
.header("Content-Type")
|
||||||
|
.unwrap_or("application/octet-stream")
|
||||||
|
.to_lowercase();
|
||||||
|
|
||||||
|
// Strip parameters like "; charset=utf-8"
|
||||||
|
let ct_base = content_type.split(';').next().unwrap_or("").trim().to_string();
|
||||||
|
|
||||||
|
let url_path_is_md = url.split('?').next().unwrap_or(url).ends_with(".md");
|
||||||
|
|
||||||
|
let is_acceptable = matches!(
|
||||||
|
ct_base.as_str(),
|
||||||
|
"text/markdown" | "text/plain" | "text/x-markdown"
|
||||||
|
) || url_path_is_md;
|
||||||
|
|
||||||
|
let is_html = ct_base == "text/html";
|
||||||
|
|
||||||
|
if is_html || (!is_acceptable && !url_path_is_md) {
|
||||||
|
return RemoteDocument::NotMarkdown {
|
||||||
|
url: url.to_string(),
|
||||||
|
content_type: ct_base,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Step 4: Read body with 5 MB limit
|
||||||
|
let mut body = String::new();
|
||||||
|
let mut reader = response.into_reader().take(5_000_000);
|
||||||
|
if let Err(e) = reader.read_to_string(&mut body) {
|
||||||
|
return RemoteDocument::FetchError {
|
||||||
|
url: url.to_string(),
|
||||||
|
reason: format!("Failed to read response body: {}", e),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
RemoteDocument::Loaded {
|
||||||
|
url: url.to_string(),
|
||||||
|
content: body,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// ── Directory listing ─────────────────────────────────────────────────────────
|
// ── Directory listing ─────────────────────────────────────────────────────────
|
||||||
|
|
||||||
/// Entry in the vault directory listing.
|
/// Entry in the vault directory listing.
|
||||||
|
|||||||
Reference in New Issue
Block a user