From ef8ace4f35393d760c46b5809185600fe0ee7adb Mon Sep 17 00:00:00 2001 From: itsjunetime Date: Thu, 6 Mar 2025 10:30:05 -0700 Subject: [PATCH] Use new mupdf search API --- CHANGELOG.md | 1 + Cargo.lock | 38 +++++++++++++++++++------------------- benches/for_profiling.rs | 2 +- benches/rendering.rs | 32 +++++++++++++++++++++++++++++--- benches/utils.rs | 6 +++++- src/renderer.rs | 28 ++++++++-------------------- 6 files changed, 63 insertions(+), 44 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6e03d0a..d59b036 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ # Unreleased - Update ratatui(-image) dependencies +- Use new mupdf search API for slightly better performance # v0.3.0 diff --git a/Cargo.lock b/Cargo.lock index d5f5c75..76b0c9e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -322,9 +322,9 @@ checksum = "8f1fe948ff07f4bd06c30984e69f5b4899c516a3ef74f34df92a2df2ab535495" [[package]] name = "bytes" -version = "1.10.0" +version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f61dac84819c6588b558454b194026eb1f09c293b9036ae9b159e74e73ab6cf9" +checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" [[package]] name = "cassowary" @@ -820,9 +820,9 @@ dependencies = [ [[package]] name = "either" -version = "1.14.0" +version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7914353092ddf589ad78f25c5c1c21b7f80b0ff8621e7c814c3485b5306da9d" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" [[package]] name = "equivalent" @@ -1718,26 +1718,26 @@ dependencies = [ [[package]] name = "mupdf" version = "0.4.4" -source = "git+https://github.com/itsjunetime/mupdf-rs?branch=june%2Fmupdf_1_25#7601c9a96228bf5ab733f374175579363105a211" +source = "git+https://github.com/itsjunetime/mupdf-rs?branch=june%2Fmupdf_1_25#9dab2e5deff2a018fe2bc001e7d97b8ace08c4d7" dependencies = [ "bitflags 2.9.0", "font-kit", "mupdf-sys", "num_enum", "once_cell", - "zerocopy 0.8.21", + "zerocopy 0.8.22", ] [[package]] name = "mupdf-sys" version = "0.4.4" -source = "git+https://github.com/itsjunetime/mupdf-rs?branch=june%2Fmupdf_1_25#7601c9a96228bf5ab733f374175579363105a211" +source = "git+https://github.com/itsjunetime/mupdf-rs?branch=june%2Fmupdf_1_25#9dab2e5deff2a018fe2bc001e7d97b8ace08c4d7" dependencies = [ "bindgen", "cc", "pkg-config", "regex", - "zerocopy 0.8.21", + "zerocopy 0.8.22", ] [[package]] @@ -2181,9 +2181,9 @@ dependencies = [ [[package]] name = "proc-macro-crate" -version = "3.2.0" +version = "3.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ecf48c7ca261d60b74ab1a7b20da18bede46776b2e55535cb958eb595c5fa7b" +checksum = "edce586971a4dfaa28950c6f18ed55e0406c1ab88bbce2c6f6293a7aaba73d35" dependencies = [ "toml_edit", ] @@ -2970,9 +2970,9 @@ dependencies = [ [[package]] name = "time" -version = "0.3.37" +version = "0.3.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35e7868883861bd0e56d9ac6efcaaca0d6d5d82a2a7ec8209ff492c07cf37b21" +checksum = "dad298b01a40a23aac4580b67e3dbedb7cc8402f3592d7f49469de2ea4aecdd8" dependencies = [ "deranged", "libc", @@ -2985,9 +2985,9 @@ dependencies = [ [[package]] name = "time-core" -version = "0.1.2" +version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3" +checksum = "765c97a5b985b7c11d7bc27fa927dc4fe6af3a6dfb021d28deb60d3bf51e76ef" [[package]] name = "tinytemplate" @@ -3795,11 +3795,11 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.8.21" +version = "0.8.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dcf01143b2dd5d134f11f545cf9f1431b13b749695cb33bcce051e7568f99478" +checksum = "09612fda0b63f7cb9e0af7e5916fe5a1f8cdcb066829f10f36883207628a4872" dependencies = [ - "zerocopy-derive 0.8.21", + "zerocopy-derive 0.8.22", ] [[package]] @@ -3815,9 +3815,9 @@ dependencies = [ [[package]] name = "zerocopy-derive" -version = "0.8.21" +version = "0.8.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712c8386f4f4299382c9abee219bee7084f78fb939d88b6840fcc1320d5f6da2" +checksum = "79f81d38d7a2ed52d8f034e62c568e111df9bf8aba2f7cf19ddc5bf7bd89d520" dependencies = [ "proc-macro2", "quote", diff --git a/benches/for_profiling.rs b/benches/for_profiling.rs index a9d4142..f03a017 100644 --- a/benches/for_profiling.rs +++ b/benches/for_profiling.rs @@ -9,5 +9,5 @@ async fn main() { .nth(1) .expect("Please enter a file to profile"); - utils::render_doc(file).await; + utils::render_doc(file, None).await; } diff --git a/benches/rendering.rs b/benches/rendering.rs index fadea83..35fe380 100644 --- a/benches/rendering.rs +++ b/benches/rendering.rs @@ -10,7 +10,7 @@ use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main, profile use futures_util::StreamExt; use tdf::{ converter::{ConvertedPage, ConverterMsg}, - renderer::{PageInfo, RenderInfo, fill_default} + renderer::{fill_default, PageInfo, RenderInfo} }; use utils::{ RenderState, handle_converter_msg, handle_renderer_msg, render_doc, start_all_rendering, @@ -27,7 +27,7 @@ fn render_full(c: &mut Criterion) { for file in FILES { c.bench_with_input(BenchmarkId::new("render_full", file), &file, |b, &file| { b.to_async(tokio::runtime::Runtime::new().unwrap()) - .iter(|| render_doc(file)) + .iter(|| render_doc(file, None)) }); } } @@ -61,6 +61,32 @@ fn only_converting(c: &mut Criterion) { } } +fn search_short_common(c: &mut Criterion) { + for file in FILES { + c.bench_with_input( + BenchmarkId::new("search_short_common", file), + &file, + |b, &file| { + b.to_async(tokio::runtime::Runtime::new().unwrap()) + .iter(|| render_doc(file, Some("an"))) + } + ); + } +} + +fn search_long_rare(c: &mut Criterion) { + for file in FILES { + c.bench_with_input( + BenchmarkId::new("search_long_rare", file), + &file, + |b, &file| { + b.to_async(tokio::runtime::Runtime::new().unwrap()) + .iter(|| render_doc(file, Some("this is long and rare"))) + } + ); + } +} + pub async fn render_first_page(path: impl AsRef) { let RenderState { mut from_render_rx, @@ -175,6 +201,6 @@ impl Profiler for CpuProfiler { criterion_group!( name = benches; config = Criterion::default().sample_size(40).with_profiler(CpuProfiler); - targets = render_full, render_to_first_page, only_converting + targets = render_full, render_to_first_page, only_converting, search_short_common, search_long_rare ); criterion_main!(benches); diff --git a/benches/utils.rs b/benches/utils.rs index b50e394..b07a94c 100644 --- a/benches/utils.rs +++ b/benches/utils.rs @@ -129,7 +129,7 @@ pub fn start_all_rendering(path: impl AsRef) -> RenderState { } } -pub async fn render_doc(path: impl AsRef) { +pub async fn render_doc(path: impl AsRef, search_term: Option<&str>) { let RenderState { mut from_render_rx, mut from_converter_rx, @@ -138,6 +138,10 @@ pub async fn render_doc(path: impl AsRef) { to_render_tx } = start_all_rendering(path); + if let Some(term) = search_term { + to_render_tx.send(RenderNotif::Search(term.to_owned())).unwrap(); + } + while pages.is_empty() || pages.iter().any(Option::is_none) { tokio::select! { Some(renderer_msg) = from_render_rx.next() => { diff --git a/src/renderer.rs b/src/renderer.rs index 19f2b81..3bc5b48 100644 --- a/src/renderer.rs +++ b/src/renderer.rs @@ -3,7 +3,7 @@ use std::{thread::sleep, time::Duration}; use crossterm::terminal::WindowSize; use flume::{Receiver, SendError, Sender, TryRecvError}; use itertools::Itertools; -use mupdf::{Colorspace, Document, Matrix, Page, Pixmap}; +use mupdf::{Colorspace, Document, Matrix, Page, Pixmap, TextPageOptions}; use ratatui::layout::Rect; pub enum RenderNotif { @@ -328,25 +328,13 @@ fn render_single_page_to_ctx( invert: bool, (area_w, area_h): (f32, f32) ) -> Result, mupdf::error::Error> { - let mut max_hits = 10; - let result_rects = loop { - let rects = search_term - .as_ref() - // mupdf allocates a buffer of the size we give it to try to fill it with results. If we - // pass in u32::MAX, it allocates too much memory to function. If we pass too small of a - // number in, we may miss out on some of the results. Ideally, we'd like to make a better - // interface than this, but we're stuck with this kinda ugly looping until we make sure - // that we've found every instance of it on this page. - .map(|term| page.search(term, max_hits)) - .transpose()? - .unwrap_or_default(); - - if rects.len() < (max_hits as usize) { - break rects; - } - - max_hits *= 2; - }; + let result_rects = search_term + .map(|term| { + page.to_text_page(TextPageOptions::empty()) + .and_then(|page| page.search(term)) + }) + .transpose()? + .unwrap_or_default(); // If there are no search terms on this page, and we've already rendered it with no search // terms, then just return none to avoid this computation