Compare commits
3 Commits
6ebe505a07
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| 2b66132d0d | |||
| d8c59dacc1 | |||
| 5de27503c5 |
@@ -0,0 +1,165 @@
|
||||
# AGENTS.md
|
||||
|
||||
## Project Summary
|
||||
|
||||
`ebookm` is a Rust workspace for compiling a set of Substack posts and local HTML files into a single EPUB.
|
||||
|
||||
Current workspace layout:
|
||||
|
||||
- `ebookm-core`
|
||||
Core library: manifest parsing, source loading, extraction, normalization, TOC/link logic, EPUB generation.
|
||||
- `ebookm-cli`
|
||||
Thin CLI wrapper around `ebookm-core`.
|
||||
|
||||
Primary user workflow:
|
||||
|
||||
```bash
|
||||
cargo run -p ebookm-cli -- build -m <manifest>
|
||||
```
|
||||
|
||||
## Key Files
|
||||
|
||||
- `Cargo.toml`
|
||||
Workspace manifest.
|
||||
- `ebookm-core/src/manifest.rs`
|
||||
YAML manifest schema and defaults.
|
||||
- `ebookm-core/src/source.rs`
|
||||
Source loading for Substack URLs and local HTML files.
|
||||
- `ebookm-core/src/extract.rs`
|
||||
Metadata/body extraction, including Substack-specific selectors.
|
||||
- `ebookm-core/src/normalize.rs`
|
||||
HTML cleanup, local/remote image bundling, link rewriting, XHTML-safe output conversion.
|
||||
- `ebookm-core/src/pipeline.rs`
|
||||
Main build orchestration and chapter generation.
|
||||
- `ebookm-core/src/epub.rs`
|
||||
EPUB packaging, nav.xhtml and toc.ncx generation.
|
||||
- `ebookm-core/src/template.rs`
|
||||
Starter manifest template used by `ebookm init`.
|
||||
- `README.md`
|
||||
User-facing docs and manifest reference.
|
||||
|
||||
## Current Manifest Semantics
|
||||
|
||||
Top-level manifest keys:
|
||||
|
||||
- `book`
|
||||
- `output`
|
||||
- `defaults`
|
||||
- `sections`
|
||||
- `entries`
|
||||
- `link_rules`
|
||||
|
||||
Supported source kinds:
|
||||
|
||||
- `substack`
|
||||
Public Substack post URL.
|
||||
- `html`
|
||||
Local HTML file path, resolved relative to the manifest.
|
||||
|
||||
Important processing options:
|
||||
|
||||
- `defaults.processing.include_author`
|
||||
- `defaults.processing.include_date`
|
||||
- `defaults.processing.include_source_url`
|
||||
- `defaults.processing.skip_first_paragraphs`
|
||||
- per-entry overrides under `entries.<id>.processing`
|
||||
|
||||
Current defaults:
|
||||
|
||||
- `include_author: true`
|
||||
- `include_date: true`
|
||||
- `include_source_url: true`
|
||||
- `skip_first_paragraphs: 0`
|
||||
|
||||
## Current EPUB Behavior
|
||||
|
||||
- Section structure is emitted into both `nav.xhtml` and `toc.ncx`.
|
||||
- Chapter header content is configurable:
|
||||
author, date, and canonical URL can each be independently shown/hidden.
|
||||
- Local HTML images are bundled when `fetch_images: true`.
|
||||
- Local image paths are resolved relative to the HTML file, not the manifest.
|
||||
- Remote images from Substack pages are also bundled when `fetch_images: true`.
|
||||
- Generated chapter XHTML is post-processed to self-close HTML void tags like `img`, `hr`, and `br` for EPUB/XML compatibility.
|
||||
|
||||
## Known Implementation Boundaries
|
||||
|
||||
- Substack handling is tuned to current public page structure, especially:
|
||||
`.available-content .body.markup`
|
||||
`.post-title`
|
||||
JSON-LD `datePublished`
|
||||
- Subscriber-only/authenticated Substack content is not implemented.
|
||||
- CSS background images are not bundled.
|
||||
- Manifest fields like `subtitle`, `summary`, and `tags` are parsed but only partially used.
|
||||
- `rewrite_external_substack_links` and `preserve_other_external_links` exist in the manifest schema but are not deeply wired into behavior yet.
|
||||
|
||||
## Validation and Debugging
|
||||
|
||||
Run tests:
|
||||
|
||||
```bash
|
||||
cargo test
|
||||
```
|
||||
|
||||
Build a manifest:
|
||||
|
||||
```bash
|
||||
cargo run -p ebookm-cli -- build -m ageofpeace/ageofpeace.yaml
|
||||
```
|
||||
|
||||
Inspect extracted source metadata:
|
||||
|
||||
```bash
|
||||
cargo run -p ebookm-cli -- inspect <url-or-file>
|
||||
```
|
||||
|
||||
Validate generated XHTML quickly:
|
||||
|
||||
```bash
|
||||
unzip -p path/to/book.epub OEBPS/text/chapter.xhtml | xmllint --noout -
|
||||
```
|
||||
|
||||
Validate the full EPUB package:
|
||||
|
||||
```bash
|
||||
epubcheck path/to/book.epub
|
||||
```
|
||||
|
||||
Useful inspection commands:
|
||||
|
||||
```bash
|
||||
unzip -l path/to/book.epub
|
||||
unzip -p path/to/book.epub OEBPS/nav.xhtml
|
||||
unzip -p path/to/book.epub OEBPS/toc.ncx
|
||||
unzip -p path/to/book.epub OEBPS/text/<entry>.xhtml
|
||||
```
|
||||
|
||||
## Existing Real Example
|
||||
|
||||
The repository contains a real working manifest:
|
||||
|
||||
- `ageofpeace/ageofpeace.yaml`
|
||||
|
||||
Related local content/assets:
|
||||
|
||||
- `ageofpeace/introduction.html`
|
||||
- `ageofpeace/johngu.jpg`
|
||||
- `ageofpeace/age_of_peace_cover.jpg`
|
||||
|
||||
This is the best regression case for:
|
||||
|
||||
- mixed local HTML + Substack sources
|
||||
- cover image handling
|
||||
- local image bundling
|
||||
- section TOC nesting
|
||||
- chapter-header processing options
|
||||
|
||||
## Guidance For Future Agents
|
||||
|
||||
- Preserve manifest backward compatibility unless there is a strong reason not to.
|
||||
- If TOC behavior changes, verify both `nav.xhtml` and `toc.ncx`.
|
||||
- If HTML normalization changes, verify generated XHTML with `xmllint`.
|
||||
- If image handling changes, test both:
|
||||
local HTML image references
|
||||
remote Substack image references
|
||||
- Prefer extending `ebookm-core` behavior and keeping `ebookm-cli` thin.
|
||||
- Update `README.md` whenever user-facing manifest fields or behavior change.
|
||||
@@ -2,7 +2,7 @@ book:
|
||||
title: "Age of Peace"
|
||||
author: "John Gu"
|
||||
language: "en"
|
||||
identifier: "urn:uuid:ageofpeace:johngu"
|
||||
identifier: "urn:book:ageofpeace:johngu"
|
||||
description: "Age of Peace: a novel"
|
||||
|
||||
output:
|
||||
@@ -36,14 +36,16 @@ sections:
|
||||
- "biridana"
|
||||
- id: "part-3"
|
||||
title: "Nameless Country"
|
||||
entries: []
|
||||
entries:
|
||||
- "in_the_east"
|
||||
- "finale"
|
||||
|
||||
|
||||
entries:
|
||||
intro:
|
||||
source:
|
||||
kind: "html"
|
||||
path: "ageofpeace/introduction.html"
|
||||
path: "introduction.html"
|
||||
contested_island:
|
||||
source:
|
||||
kind: "substack"
|
||||
@@ -70,6 +72,21 @@ entries:
|
||||
url: "https://ageofpeace.substack.com/p/biridana"
|
||||
toc:
|
||||
title: "Biridana"
|
||||
in_the_east:
|
||||
source:
|
||||
kind: "substack"
|
||||
url: "https://ageofpeace.substack.com/p/in-the-east"
|
||||
toc:
|
||||
title: "In the East"
|
||||
finale:
|
||||
source:
|
||||
kind: "substack"
|
||||
url: "https://ageofpeace.substack.com/p/age-of-peace"
|
||||
toc:
|
||||
title: "Finale"
|
||||
processing:
|
||||
skip_first_paragraphs: 1
|
||||
|
||||
|
||||
|
||||
link_rules:
|
||||
|
||||
+52
-9
@@ -3,6 +3,7 @@ use std::fs::File;
|
||||
use std::io::Write;
|
||||
use std::path::Path;
|
||||
|
||||
use chrono::Utc;
|
||||
use quick_xml::escape::escape;
|
||||
use zip::CompressionMethod;
|
||||
use zip::write::{SimpleFileOptions, ZipWriter};
|
||||
@@ -181,11 +182,16 @@ fn build_ncx(manifest: &crate::manifest::Manifest, built: &[BuiltEntry]) -> Stri
|
||||
let section_play_order = play_order;
|
||||
play_order += 1;
|
||||
|
||||
let section_target = section_entries[0]
|
||||
.section_anchor
|
||||
.as_ref()
|
||||
.map(|anchor| format!("text/{}.xhtml#{}", section_entries[0].id, anchor))
|
||||
.unwrap_or_else(|| format!("text/{}.xhtml", section_entries[0].id));
|
||||
let mut child_points = String::new();
|
||||
for entry in §ion_entries {
|
||||
child_points.push_str(&format!(
|
||||
"<navPoint id=\"nav-{}\" playOrder=\"{}\"><navLabel><text>{}</text></navLabel><content src=\"text/{}.xhtml\"/></navPoint>",
|
||||
escape(&entry.id),
|
||||
escape(&xml_id("nav", &entry.id)),
|
||||
play_order,
|
||||
escape(&entry.chapter.nav_title),
|
||||
escape(&entry.id)
|
||||
@@ -194,11 +200,11 @@ fn build_ncx(manifest: &crate::manifest::Manifest, built: &[BuiltEntry]) -> Stri
|
||||
}
|
||||
|
||||
nav_points.push_str(&format!(
|
||||
"<navPoint id=\"section-{}\" playOrder=\"{}\"><navLabel><text>{}</text></navLabel><content src=\"text/{}.xhtml\"/>{}</navPoint>",
|
||||
escape(§ion.id),
|
||||
"<navPoint id=\"{}\" playOrder=\"{}\"><navLabel><text>{}</text></navLabel><content src=\"{}\"/>{}</navPoint>",
|
||||
escape(&xml_id("section", §ion.id)),
|
||||
section_play_order,
|
||||
escape(§ion.title),
|
||||
escape(§ion_entries[0].id),
|
||||
escape(§ion_target),
|
||||
child_points
|
||||
));
|
||||
}
|
||||
@@ -233,14 +239,17 @@ fn build_opf(
|
||||
for entry in built {
|
||||
manifest_items.push_str(&format!(
|
||||
"<item id=\"{}\" href=\"text/{}.xhtml\" media-type=\"application/xhtml+xml\"/>",
|
||||
escape(&entry.id),
|
||||
escape(&xml_id("entry", &entry.id)),
|
||||
escape(&entry.id)
|
||||
));
|
||||
spine_items.push_str(&format!("<itemref idref=\"{}\"/>", escape(&entry.id)));
|
||||
spine_items.push_str(&format!(
|
||||
"<itemref idref=\"{}\"/>",
|
||||
escape(&xml_id("entry", &entry.id))
|
||||
));
|
||||
for asset in &entry.assets {
|
||||
manifest_items.push_str(&format!(
|
||||
"<item id=\"{}\" href=\"{}\" media-type=\"{}\"/>",
|
||||
escape(&asset.id),
|
||||
escape(&xml_id("asset", &asset.id)),
|
||||
escape(&asset.href),
|
||||
escape(&asset.media_type)
|
||||
));
|
||||
@@ -249,8 +258,9 @@ fn build_opf(
|
||||
|
||||
if let Some(cover_href) = cover_href {
|
||||
manifest_items.push_str(&format!(
|
||||
"<item id=\"cover\" href=\"{}\" media-type=\"image/jpeg\" properties=\"cover-image\"/>",
|
||||
escape(cover_href)
|
||||
"<item id=\"cover\" href=\"{}\" media-type=\"{}\" properties=\"cover-image\"/>",
|
||||
escape(cover_href),
|
||||
escape(&media_type_from_href(cover_href))
|
||||
));
|
||||
}
|
||||
|
||||
@@ -260,6 +270,7 @@ fn build_opf(
|
||||
.clone()
|
||||
.unwrap_or_else(|| "Unknown".to_string());
|
||||
let description = manifest.book.description.clone().unwrap_or_default();
|
||||
let modified = Utc::now().format("%Y-%m-%dT%H:%M:%SZ").to_string();
|
||||
format!(
|
||||
r#"<?xml version="1.0" encoding="UTF-8"?>
|
||||
<package version="3.0" xmlns="http://www.idpf.org/2007/opf" unique-identifier="bookid">
|
||||
@@ -269,6 +280,7 @@ fn build_opf(
|
||||
<dc:creator>{}</dc:creator>
|
||||
<dc:language>{}</dc:language>
|
||||
<dc:description>{}</dc:description>
|
||||
<meta property="dcterms:modified">{}</meta>
|
||||
</metadata>
|
||||
<manifest>{}</manifest>
|
||||
<spine toc="ncx">{}</spine>
|
||||
@@ -278,11 +290,42 @@ fn build_opf(
|
||||
escape(&author),
|
||||
escape(&manifest.book.language),
|
||||
escape(&description),
|
||||
escape(&modified),
|
||||
manifest_items,
|
||||
spine_items
|
||||
)
|
||||
}
|
||||
|
||||
fn xml_id(prefix: &str, value: &str) -> String {
|
||||
let mut id = String::with_capacity(prefix.len() + value.len() + 1);
|
||||
id.push_str(prefix);
|
||||
id.push('-');
|
||||
for ch in value.chars() {
|
||||
if ch.is_ascii_alphanumeric() || matches!(ch, '_' | '-' | '.') {
|
||||
id.push(ch);
|
||||
} else {
|
||||
id.push('-');
|
||||
}
|
||||
}
|
||||
id
|
||||
}
|
||||
|
||||
fn media_type_from_href(href: &str) -> String {
|
||||
match href.rsplit('.').next().map(|ext| ext.to_ascii_lowercase()) {
|
||||
Some(extension) => match extension.as_str() {
|
||||
"jpg" | "jpeg" => "image/jpeg",
|
||||
"png" => "image/png",
|
||||
"gif" => "image/gif",
|
||||
"svg" => "image/svg+xml",
|
||||
"webp" => "image/webp",
|
||||
"avif" => "image/avif",
|
||||
_ => "application/octet-stream",
|
||||
}
|
||||
.to_string(),
|
||||
None => "application/octet-stream".to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
const CONTAINER_XML: &str = r#"<?xml version="1.0" encoding="UTF-8"?>
|
||||
<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
|
||||
<rootfiles>
|
||||
|
||||
+21
-21
@@ -26,25 +26,7 @@ pub struct InspectResult {
|
||||
pub fn extract_article(loaded: &LoadedSource) -> Result<ExtractedArticle> {
|
||||
let document = Html::parse_document(&loaded.html);
|
||||
let json_ld = extract_primary_json_ld(&document);
|
||||
let title = select_content(
|
||||
&document,
|
||||
&[
|
||||
r#"meta[property="og:title"]"#,
|
||||
r#"article .post-title"#,
|
||||
".post-title",
|
||||
"h1",
|
||||
"title",
|
||||
],
|
||||
"content",
|
||||
)
|
||||
.or_else(|| {
|
||||
select_text(
|
||||
&document,
|
||||
&[r#"article .post-title"#, ".post-title", "h1", "title"],
|
||||
)
|
||||
})
|
||||
.or_else(|| json_ld_string(&json_ld, "headline"))
|
||||
.ok_or_else(|| EbookmError::Extraction {
|
||||
let title = extract_title(&document, &json_ld).ok_or_else(|| EbookmError::Extraction {
|
||||
input: origin_label(&loaded.origin),
|
||||
})?;
|
||||
|
||||
@@ -131,6 +113,20 @@ fn select_content(document: &Html, selectors: &[&str], attr: &str) -> Option<Str
|
||||
})
|
||||
}
|
||||
|
||||
fn extract_title(document: &Html, json_ld: &Option<Value>) -> Option<String> {
|
||||
select_text(
|
||||
document,
|
||||
&[
|
||||
"h1.post-title",
|
||||
".post-title",
|
||||
r#"article .post-title"#,
|
||||
"title",
|
||||
"h1",
|
||||
],
|
||||
)
|
||||
.or_else(|| json_ld_string(json_ld, "headline"))
|
||||
}
|
||||
|
||||
fn select_text(document: &Html, selectors: &[&str]) -> Option<String> {
|
||||
selectors.iter().find_map(|selector| {
|
||||
let selector = Selector::parse(selector).ok()?;
|
||||
@@ -205,7 +201,11 @@ fn json_ld_string(json_ld: &Option<Value>, key: &str) -> Option<String> {
|
||||
|
||||
fn json_ld_author(json_ld: &Option<Value>) -> Option<String> {
|
||||
let author = json_ld.as_ref()?.get("author")?;
|
||||
if let Some(author_name) = author.get(0).and_then(|entry| entry.get("name")).and_then(Value::as_str) {
|
||||
if let Some(author_name) = author
|
||||
.get(0)
|
||||
.and_then(|entry| entry.get("name"))
|
||||
.and_then(Value::as_str)
|
||||
{
|
||||
return Some(author_name.to_string());
|
||||
}
|
||||
if let Some(author_name) = author.get("name").and_then(Value::as_str) {
|
||||
@@ -255,7 +255,7 @@ mod tests {
|
||||
};
|
||||
|
||||
let extracted = extract_article(&loaded).expect("extract article");
|
||||
assert_eq!(extracted.title, "A Contested Island");
|
||||
assert_eq!(extracted.title, "Chapter 1: A Contested Island");
|
||||
assert_eq!(extracted.author.as_deref(), Some("John Gu"));
|
||||
assert_eq!(
|
||||
extracted.published,
|
||||
|
||||
+105
-10
@@ -3,6 +3,7 @@ use std::path::Path;
|
||||
|
||||
use kuchiki::traits::*;
|
||||
use regex::Regex;
|
||||
use reqwest::header::CONTENT_TYPE;
|
||||
use sha1::{Digest, Sha1};
|
||||
use url::Url;
|
||||
|
||||
@@ -159,12 +160,18 @@ fn fetch_local_asset(base_path: &Path, src: &str) -> Result<Asset> {
|
||||
}
|
||||
|
||||
fn fetch_remote_asset(url: &Url) -> Result<Asset> {
|
||||
let bytes = reqwest::blocking::get(url.clone())
|
||||
let response = reqwest::blocking::get(url.clone())
|
||||
.and_then(|response| response.error_for_status())
|
||||
.map_err(|source| EbookmError::Request {
|
||||
url: url.to_string(),
|
||||
source,
|
||||
})?
|
||||
})?;
|
||||
let content_type = response
|
||||
.headers()
|
||||
.get(CONTENT_TYPE)
|
||||
.and_then(|value| value.to_str().ok())
|
||||
.map(|value| value.to_string());
|
||||
let bytes = response
|
||||
.bytes()
|
||||
.map_err(|source| EbookmError::Request {
|
||||
url: url.to_string(),
|
||||
@@ -172,7 +179,7 @@ fn fetch_remote_asset(url: &Url) -> Result<Asset> {
|
||||
})?
|
||||
.to_vec();
|
||||
|
||||
let extension = infer_extension_from_str(url.path());
|
||||
let extension = infer_extension_from_response(url.path(), content_type.as_deref(), &bytes);
|
||||
let media_type = infer_media_type(&extension);
|
||||
let digest = Sha1::digest(url.as_str().as_bytes());
|
||||
let id = format!("{:x}", digest);
|
||||
@@ -189,7 +196,8 @@ fn build_asset_from_path(path: &Path) -> Result<Asset> {
|
||||
path: path.display().to_string(),
|
||||
source,
|
||||
})?;
|
||||
let extension = infer_extension_from_path(path);
|
||||
let path_extension = infer_extension_from_path(path);
|
||||
let extension = infer_extension_from_bytes(Some(path_extension.as_str()), &bytes);
|
||||
let media_type = infer_media_type(&extension);
|
||||
let digest = Sha1::digest(path.display().to_string().as_bytes());
|
||||
let id = format!("{:x}", digest);
|
||||
@@ -292,8 +300,8 @@ fn infer_extension_from_path(path: &Path) -> String {
|
||||
path.extension()
|
||||
.and_then(|value| value.to_str())
|
||||
.filter(|value| !value.is_empty())
|
||||
.unwrap_or("bin")
|
||||
.to_string()
|
||||
.map(|value| value.to_ascii_lowercase())
|
||||
.unwrap_or_else(|| "bin".to_string())
|
||||
}
|
||||
|
||||
fn infer_extension_from_str(path: &str) -> String {
|
||||
@@ -301,8 +309,59 @@ fn infer_extension_from_str(path: &str) -> String {
|
||||
.extension()
|
||||
.and_then(|value| value.to_str())
|
||||
.filter(|value| !value.is_empty())
|
||||
.unwrap_or("bin")
|
||||
.to_string()
|
||||
.map(|value| value.to_ascii_lowercase())
|
||||
.unwrap_or_else(|| "bin".to_string())
|
||||
}
|
||||
|
||||
fn infer_extension_from_response(path: &str, content_type: Option<&str>, bytes: &[u8]) -> String {
|
||||
let path_extension = infer_extension_from_str(path);
|
||||
if let Some(extension) = extension_from_content_type(content_type) {
|
||||
return extension;
|
||||
}
|
||||
infer_extension_from_bytes(Some(path_extension.as_str()), bytes)
|
||||
}
|
||||
|
||||
fn infer_extension_from_bytes(path_extension: Option<&str>, bytes: &[u8]) -> String {
|
||||
if let Some(extension) = sniff_image_extension(bytes) {
|
||||
return extension.to_string();
|
||||
}
|
||||
path_extension.unwrap_or("bin").to_string()
|
||||
}
|
||||
|
||||
fn extension_from_content_type(content_type: Option<&str>) -> Option<String> {
|
||||
let normalized = content_type?.split(';').next()?.trim().to_ascii_lowercase();
|
||||
let extension = match normalized.as_str() {
|
||||
"image/jpeg" => "jpeg",
|
||||
"image/png" => "png",
|
||||
"image/gif" => "gif",
|
||||
"image/svg+xml" => "svg",
|
||||
"image/webp" => "webp",
|
||||
"image/avif" => "avif",
|
||||
_ => return None,
|
||||
};
|
||||
Some(extension.to_string())
|
||||
}
|
||||
|
||||
fn sniff_image_extension(bytes: &[u8]) -> Option<&'static str> {
|
||||
if bytes.len() >= 3 && bytes.starts_with(&[0xff, 0xd8, 0xff]) {
|
||||
return Some("jpeg");
|
||||
}
|
||||
if bytes.len() >= 8 && bytes.starts_with(&[0x89, b'P', b'N', b'G', 0x0d, 0x0a, 0x1a, 0x0a]) {
|
||||
return Some("png");
|
||||
}
|
||||
if bytes.len() >= 6 && (bytes.starts_with(b"GIF87a") || bytes.starts_with(b"GIF89a")) {
|
||||
return Some("gif");
|
||||
}
|
||||
if bytes.len() >= 12 && bytes.starts_with(b"RIFF") && &bytes[8..12] == b"WEBP" {
|
||||
return Some("webp");
|
||||
}
|
||||
if bytes.len() >= 12
|
||||
&& &bytes[4..8] == b"ftyp"
|
||||
&& bytes.windows(4).any(|window| window == b"avif")
|
||||
{
|
||||
return Some("avif");
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn infer_media_type(extension: &str) -> String {
|
||||
@@ -312,6 +371,7 @@ fn infer_media_type(extension: &str) -> String {
|
||||
"gif" => "image/gif",
|
||||
"svg" => "image/svg+xml",
|
||||
"webp" => "image/webp",
|
||||
"avif" => "image/avif",
|
||||
_ => "application/octet-stream",
|
||||
}
|
||||
.to_string()
|
||||
@@ -329,9 +389,12 @@ fn to_xhtml_fragment(html: &str) -> String {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::to_xhtml_fragment;
|
||||
use quick_xml::events::Event;
|
||||
use super::{
|
||||
extension_from_content_type, infer_extension_from_bytes, infer_extension_from_response,
|
||||
to_xhtml_fragment,
|
||||
};
|
||||
use quick_xml::Reader;
|
||||
use quick_xml::events::Event;
|
||||
|
||||
#[test]
|
||||
fn converts_void_html_tags_to_xhtml_self_closing_tags() {
|
||||
@@ -354,4 +417,36 @@ mod tests {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn prefers_actual_jpeg_bytes_over_png_path() {
|
||||
let jpeg_bytes = [0xff, 0xd8, 0xff, 0xdb];
|
||||
assert_eq!(
|
||||
infer_extension_from_response("image.png", None, &jpeg_bytes),
|
||||
"jpeg"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn prefers_content_type_for_remote_assets() {
|
||||
let png_bytes = [0x89, b'P', b'N', b'G', 0x0d, 0x0a, 0x1a, 0x0a];
|
||||
assert_eq!(
|
||||
infer_extension_from_response(
|
||||
"image.jpeg",
|
||||
Some("image/png; charset=binary"),
|
||||
&png_bytes
|
||||
),
|
||||
"png"
|
||||
);
|
||||
assert_eq!(
|
||||
extension_from_content_type(Some("image/avif")),
|
||||
Some("avif".to_string())
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn preserves_path_extension_when_bytes_are_unknown() {
|
||||
let bytes = b"not an image";
|
||||
assert_eq!(infer_extension_from_bytes(Some("jpg"), bytes), "jpg");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -28,6 +28,7 @@ struct ChapterHeaderOptions {
|
||||
pub struct BuiltEntry {
|
||||
pub id: String,
|
||||
pub hidden_from_toc: bool,
|
||||
pub section_anchor: Option<String>,
|
||||
pub chapter: BuiltChapter,
|
||||
pub assets: Vec<Asset>,
|
||||
}
|
||||
@@ -186,10 +187,23 @@ pub fn build_epub(manifest: &Manifest, manifest_path: &Path) -> Result<()> {
|
||||
&metadata,
|
||||
)?;
|
||||
|
||||
let section_anchor = section
|
||||
.entries
|
||||
.first()
|
||||
.filter(|first_entry_id| *first_entry_id == entry_id)
|
||||
.map(|_| format!("section-{}", section.id));
|
||||
|
||||
built_entries.push(BuiltEntry {
|
||||
id: entry_id.clone(),
|
||||
hidden_from_toc: entry.toc.hidden,
|
||||
chapter: build_chapter(entry_id, entry, &manifest.defaults, &normalized),
|
||||
section_anchor: section_anchor.clone(),
|
||||
chapter: build_chapter(
|
||||
entry_id,
|
||||
entry,
|
||||
&manifest.defaults,
|
||||
&normalized,
|
||||
section_anchor.as_deref(),
|
||||
),
|
||||
assets: normalized.assets,
|
||||
});
|
||||
}
|
||||
@@ -215,6 +229,7 @@ fn build_chapter(
|
||||
entry: &crate::manifest::EntryDefinition,
|
||||
defaults: &crate::manifest::DefaultsConfig,
|
||||
doc: &NormalizedDocument,
|
||||
section_anchor: Option<&str>,
|
||||
) -> BuiltChapter {
|
||||
let nav_title = entry.toc.title.clone().unwrap_or_else(|| doc.title.clone());
|
||||
let header = resolve_header_options(entry, defaults);
|
||||
@@ -243,6 +258,10 @@ fn build_chapter(
|
||||
format!(r#"<div class="chapter-meta">{}</div>"#, meta_lines.join(""))
|
||||
};
|
||||
|
||||
let section_anchor_markup = section_anchor
|
||||
.map(|anchor| format!(r#"<div id="{}"></div>"#, escape_html(anchor)))
|
||||
.unwrap_or_default();
|
||||
|
||||
let xhtml = format!(
|
||||
r#"<?xml version="1.0" encoding="UTF-8"?>
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
@@ -251,6 +270,7 @@ fn build_chapter(
|
||||
<link rel="stylesheet" type="text/css" href="../styles/book.css"/>
|
||||
</head>
|
||||
<body id="{}">
|
||||
{}
|
||||
<h1>{}</h1>
|
||||
{}
|
||||
{}
|
||||
@@ -258,6 +278,7 @@ fn build_chapter(
|
||||
</html>"#,
|
||||
escape_html(&doc.title),
|
||||
escape_html(entry_id),
|
||||
section_anchor_markup,
|
||||
escape_html(&doc.title),
|
||||
meta_block,
|
||||
doc.body_xhtml
|
||||
@@ -425,8 +446,10 @@ link_rules:
|
||||
assert!(!chapter_contents.contains("urn:uuid:test-book"));
|
||||
assert!(chapter_contents.contains("../assets/"));
|
||||
drop(chapter);
|
||||
assert!(archive
|
||||
.file_names()
|
||||
.any(|name| name.starts_with("OEBPS/assets/") && name.ends_with(".jpg")));
|
||||
assert!(
|
||||
archive
|
||||
.file_names()
|
||||
.any(|name| name.starts_with("OEBPS/assets/") && name.ends_with(".jpg"))
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user