Use new mupdf search API

This commit is contained in:
itsjunetime
2025-03-06 10:30:05 -07:00
parent 6462c09030
commit ef8ace4f35
6 changed files with 63 additions and 44 deletions
+1
View File
@@ -1,6 +1,7 @@
# Unreleased
- Update ratatui(-image) dependencies
- Use new mupdf search API for slightly better performance
# v0.3.0
Generated
+19 -19
View File
@@ -322,9 +322,9 @@ checksum = "8f1fe948ff07f4bd06c30984e69f5b4899c516a3ef74f34df92a2df2ab535495"
[[package]]
name = "bytes"
version = "1.10.0"
version = "1.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f61dac84819c6588b558454b194026eb1f09c293b9036ae9b159e74e73ab6cf9"
checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a"
[[package]]
name = "cassowary"
@@ -820,9 +820,9 @@ dependencies = [
[[package]]
name = "either"
version = "1.14.0"
version = "1.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b7914353092ddf589ad78f25c5c1c21b7f80b0ff8621e7c814c3485b5306da9d"
checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
[[package]]
name = "equivalent"
@@ -1718,26 +1718,26 @@ dependencies = [
[[package]]
name = "mupdf"
version = "0.4.4"
source = "git+https://github.com/itsjunetime/mupdf-rs?branch=june%2Fmupdf_1_25#7601c9a96228bf5ab733f374175579363105a211"
source = "git+https://github.com/itsjunetime/mupdf-rs?branch=june%2Fmupdf_1_25#9dab2e5deff2a018fe2bc001e7d97b8ace08c4d7"
dependencies = [
"bitflags 2.9.0",
"font-kit",
"mupdf-sys",
"num_enum",
"once_cell",
"zerocopy 0.8.21",
"zerocopy 0.8.22",
]
[[package]]
name = "mupdf-sys"
version = "0.4.4"
source = "git+https://github.com/itsjunetime/mupdf-rs?branch=june%2Fmupdf_1_25#7601c9a96228bf5ab733f374175579363105a211"
source = "git+https://github.com/itsjunetime/mupdf-rs?branch=june%2Fmupdf_1_25#9dab2e5deff2a018fe2bc001e7d97b8ace08c4d7"
dependencies = [
"bindgen",
"cc",
"pkg-config",
"regex",
"zerocopy 0.8.21",
"zerocopy 0.8.22",
]
[[package]]
@@ -2181,9 +2181,9 @@ dependencies = [
[[package]]
name = "proc-macro-crate"
version = "3.2.0"
version = "3.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ecf48c7ca261d60b74ab1a7b20da18bede46776b2e55535cb958eb595c5fa7b"
checksum = "edce586971a4dfaa28950c6f18ed55e0406c1ab88bbce2c6f6293a7aaba73d35"
dependencies = [
"toml_edit",
]
@@ -2970,9 +2970,9 @@ dependencies = [
[[package]]
name = "time"
version = "0.3.37"
version = "0.3.39"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "35e7868883861bd0e56d9ac6efcaaca0d6d5d82a2a7ec8209ff492c07cf37b21"
checksum = "dad298b01a40a23aac4580b67e3dbedb7cc8402f3592d7f49469de2ea4aecdd8"
dependencies = [
"deranged",
"libc",
@@ -2985,9 +2985,9 @@ dependencies = [
[[package]]
name = "time-core"
version = "0.1.2"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3"
checksum = "765c97a5b985b7c11d7bc27fa927dc4fe6af3a6dfb021d28deb60d3bf51e76ef"
[[package]]
name = "tinytemplate"
@@ -3795,11 +3795,11 @@ dependencies = [
[[package]]
name = "zerocopy"
version = "0.8.21"
version = "0.8.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dcf01143b2dd5d134f11f545cf9f1431b13b749695cb33bcce051e7568f99478"
checksum = "09612fda0b63f7cb9e0af7e5916fe5a1f8cdcb066829f10f36883207628a4872"
dependencies = [
"zerocopy-derive 0.8.21",
"zerocopy-derive 0.8.22",
]
[[package]]
@@ -3815,9 +3815,9 @@ dependencies = [
[[package]]
name = "zerocopy-derive"
version = "0.8.21"
version = "0.8.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712c8386f4f4299382c9abee219bee7084f78fb939d88b6840fcc1320d5f6da2"
checksum = "79f81d38d7a2ed52d8f034e62c568e111df9bf8aba2f7cf19ddc5bf7bd89d520"
dependencies = [
"proc-macro2",
"quote",
+1 -1
View File
@@ -9,5 +9,5 @@ async fn main() {
.nth(1)
.expect("Please enter a file to profile");
utils::render_doc(file).await;
utils::render_doc(file, None).await;
}
+29 -3
View File
@@ -10,7 +10,7 @@ use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main, profile
use futures_util::StreamExt;
use tdf::{
converter::{ConvertedPage, ConverterMsg},
renderer::{PageInfo, RenderInfo, fill_default}
renderer::{fill_default, PageInfo, RenderInfo}
};
use utils::{
RenderState, handle_converter_msg, handle_renderer_msg, render_doc, start_all_rendering,
@@ -27,7 +27,7 @@ fn render_full(c: &mut Criterion) {
for file in FILES {
c.bench_with_input(BenchmarkId::new("render_full", file), &file, |b, &file| {
b.to_async(tokio::runtime::Runtime::new().unwrap())
.iter(|| render_doc(file))
.iter(|| render_doc(file, None))
});
}
}
@@ -61,6 +61,32 @@ fn only_converting(c: &mut Criterion) {
}
}
fn search_short_common(c: &mut Criterion) {
for file in FILES {
c.bench_with_input(
BenchmarkId::new("search_short_common", file),
&file,
|b, &file| {
b.to_async(tokio::runtime::Runtime::new().unwrap())
.iter(|| render_doc(file, Some("an")))
}
);
}
}
fn search_long_rare(c: &mut Criterion) {
for file in FILES {
c.bench_with_input(
BenchmarkId::new("search_long_rare", file),
&file,
|b, &file| {
b.to_async(tokio::runtime::Runtime::new().unwrap())
.iter(|| render_doc(file, Some("this is long and rare")))
}
);
}
}
pub async fn render_first_page(path: impl AsRef<Path>) {
let RenderState {
mut from_render_rx,
@@ -175,6 +201,6 @@ impl Profiler for CpuProfiler {
criterion_group!(
name = benches;
config = Criterion::default().sample_size(40).with_profiler(CpuProfiler);
targets = render_full, render_to_first_page, only_converting
targets = render_full, render_to_first_page, only_converting, search_short_common, search_long_rare
);
criterion_main!(benches);
+5 -1
View File
@@ -129,7 +129,7 @@ pub fn start_all_rendering(path: impl AsRef<Path>) -> RenderState {
}
}
pub async fn render_doc(path: impl AsRef<Path>) {
pub async fn render_doc(path: impl AsRef<Path>, search_term: Option<&str>) {
let RenderState {
mut from_render_rx,
mut from_converter_rx,
@@ -138,6 +138,10 @@ pub async fn render_doc(path: impl AsRef<Path>) {
to_render_tx
} = start_all_rendering(path);
if let Some(term) = search_term {
to_render_tx.send(RenderNotif::Search(term.to_owned())).unwrap();
}
while pages.is_empty() || pages.iter().any(Option::is_none) {
tokio::select! {
Some(renderer_msg) = from_render_rx.next() => {
+8 -20
View File
@@ -3,7 +3,7 @@ use std::{thread::sleep, time::Duration};
use crossterm::terminal::WindowSize;
use flume::{Receiver, SendError, Sender, TryRecvError};
use itertools::Itertools;
use mupdf::{Colorspace, Document, Matrix, Page, Pixmap};
use mupdf::{Colorspace, Document, Matrix, Page, Pixmap, TextPageOptions};
use ratatui::layout::Rect;
pub enum RenderNotif {
@@ -328,25 +328,13 @@ fn render_single_page_to_ctx(
invert: bool,
(area_w, area_h): (f32, f32)
) -> Result<Option<RenderedContext>, mupdf::error::Error> {
let mut max_hits = 10;
let result_rects = loop {
let rects = search_term
.as_ref()
// mupdf allocates a buffer of the size we give it to try to fill it with results. If we
// pass in u32::MAX, it allocates too much memory to function. If we pass too small of a
// number in, we may miss out on some of the results. Ideally, we'd like to make a better
// interface than this, but we're stuck with this kinda ugly looping until we make sure
// that we've found every instance of it on this page.
.map(|term| page.search(term, max_hits))
.transpose()?
.unwrap_or_default();
if rects.len() < (max_hits as usize) {
break rects;
}
max_hits *= 2;
};
let result_rects = search_term
.map(|term| {
page.to_text_page(TextPageOptions::empty())
.and_then(|page| page.search(term))
})
.transpose()?
.unwrap_or_default();
// If there are no search terms on this page, and we've already rendered it with no search
// terms, then just return none to avoid this computation