From fc10dc8ffe70bf9eac61115a3dc11738c69df0f6 Mon Sep 17 00:00:00 2001 From: itsjunetime Date: Thu, 6 Mar 2025 16:26:56 -0700 Subject: [PATCH] Dramatically improve search performance by pausing rendering every once-in-a-while --- CHANGELOG.md | 1 + benches/rendering.rs | 4 +- benches/utils.rs | 18 +++- src/lib.rs | 7 ++ src/main.rs | 12 ++- src/renderer.rs | 194 +++++++++++++++++++++++++++++++++++-------- 6 files changed, 194 insertions(+), 42 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d59b036..11d48a4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ - Update ratatui(-image) dependencies - Use new mupdf search API for slightly better performance +- Pause rendering every once in a while while there's a search term to enable searching across the entire document more quickly # v0.3.0 diff --git a/benches/rendering.rs b/benches/rendering.rs index 35fe380..b7a3457 100644 --- a/benches/rendering.rs +++ b/benches/rendering.rs @@ -10,7 +10,7 @@ use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main, profile use futures_util::StreamExt; use tdf::{ converter::{ConvertedPage, ConverterMsg}, - renderer::{fill_default, PageInfo, RenderInfo} + renderer::{PageInfo, RenderInfo, fill_default} }; use utils::{ RenderState, handle_converter_msg, handle_renderer_msg, render_doc, start_all_rendering, @@ -120,7 +120,7 @@ async fn render_all_files(path: &'static str) -> Vec { while let Some(info) = from_render_rx.next().await { match info.expect("Renderer ran into an error while rendering") { - RenderInfo::Reloaded => (), + RenderInfo::Reloaded | RenderInfo::SearchResults { .. } => (), RenderInfo::NumPages(num) => fill_default(&mut pages, num), RenderInfo::Page(page) => { let num = page.page_num; diff --git a/benches/utils.rs b/benches/utils.rs index b07a94c..aba8776 100644 --- a/benches/utils.rs +++ b/benches/utils.rs @@ -21,8 +21,8 @@ pub fn handle_renderer_msg( to_converter_tx.send(ConverterMsg::NumPages(num)).unwrap(); } Ok(RenderInfo::Page(info)) => to_converter_tx.send(ConverterMsg::AddImg(info)).unwrap(), - // We can ignore the `Reloaded` variant 'cause that's only used to send info to the TUI - Ok(RenderInfo::Reloaded) => (), + // We can ignore the these variants 'cause they're only used to send info to the TUI + Ok(RenderInfo::Reloaded | RenderInfo::SearchResults { .. }) => (), Err(e) => panic!("Got error from renderer: {e:?}") } } @@ -77,7 +77,15 @@ pub fn start_rendering_loop( width: columns * FONT_SIZE.0 }; - std::thread::spawn(move || start_rendering(&str_path, to_main_tx, from_main_rx, size)); + std::thread::spawn(move || { + start_rendering( + &str_path, + to_main_tx, + from_main_rx, + size, + tdf::PrerenderLimit::All + ) + }); let main_area = Rect { x: 0, @@ -139,7 +147,9 @@ pub async fn render_doc(path: impl AsRef, search_term: Option<&str>) { } = start_all_rendering(path); if let Some(term) = search_term { - to_render_tx.send(RenderNotif::Search(term.to_owned())).unwrap(); + to_render_tx + .send(RenderNotif::Search(term.to_owned())) + .unwrap(); } while pages.is_empty() || pages.iter().any(Option::is_none) { diff --git a/src/lib.rs b/src/lib.rs index d7b9ba6..1da041b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,6 +1,13 @@ +use std::num::NonZeroUsize; + #[global_allocator] static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc; +pub enum PrerenderLimit { + All, + Limited(NonZeroUsize) +} + pub mod converter; pub mod renderer; pub mod skip; diff --git a/src/main.rs b/src/main.rs index c7af9cb..4620d81 100644 --- a/src/main.rs +++ b/src/main.rs @@ -17,6 +17,7 @@ use notify::{Event, EventKind, RecursiveMode, Watcher}; use ratatui::{Terminal, backend::CrosstermBackend}; use ratatui_image::picker::Picker; use tdf::{ + PrerenderLimit, converter::{ConvertedPage, ConverterMsg, run_conversion_loop}, renderer::{self, RenderError, RenderInfo, RenderNotif}, tui::{BottomMessage, InputAction, MessageSetting, Tui} @@ -47,6 +48,9 @@ async fn main() -> Result<(), Box> { optional -m,--max-wide max_wide: NonZeroUsize /// Fullscreen the pdf (hide document name, page count, etc) optional -f,--fullscreen fullscreen: bool + /// The number of pages to prerender surrounding the currently-shown page; 0 means no + /// limit. By default, there is no limit. + optional -p,--prerender prerender: usize /// PDF file to read required file: PathBuf }; @@ -141,8 +145,12 @@ async fn main() -> Result<(), Box> { // then we want to spawn off the rendering task // We need to use the thread::spawn API so that this exists in a thread not owned by tokio, // since the methods we call in `start_rendering` will panic if called in an async context + let prerender = flags + .prerender + .and_then(NonZeroUsize::new) + .map_or(PrerenderLimit::All, PrerenderLimit::Limited); std::thread::spawn(move || { - renderer::start_rendering(&file_path, render_tx, render_rx, window_size) + renderer::start_rendering(&file_path, render_tx, render_rx, window_size, prerender) }); let mut ev_stream = crossterm::event::EventStream::new(); @@ -211,6 +219,8 @@ async fn main() -> Result<(), Box> { to_converter.send(ConverterMsg::AddImg(info))?; }, RenderInfo::Reloaded => tui.set_msg(MessageSetting::Some(BottomMessage::Reloaded)), + RenderInfo::SearchResults { page_num, num_results } => + tui.got_num_results_on_page(page_num, num_results), }, Err(e) => tui.show_error(e), } diff --git a/src/renderer.rs b/src/renderer.rs index 3bc5b48..3d7354b 100644 --- a/src/renderer.rs +++ b/src/renderer.rs @@ -1,11 +1,15 @@ -use std::{thread::sleep, time::Duration}; +use std::{num::NonZeroUsize, thread::sleep, time::Duration}; use crossterm::terminal::WindowSize; use flume::{Receiver, SendError, Sender, TryRecvError}; use itertools::Itertools; -use mupdf::{Colorspace, Document, Matrix, Page, Pixmap, TextPageOptions}; +use mupdf::{ + Colorspace, Document, Matrix, Page, Pixmap, Quad, TextPageOptions, text_page::SearchHitResponse +}; use ratatui::layout::Rect; +use crate::PrerenderLimit; + pub enum RenderNotif { Area(Rect), JumpToPage(usize), @@ -24,6 +28,7 @@ pub enum RenderError { pub enum RenderInfo { NumPages(usize), Page(PageInfo), + SearchResults { page_num: usize, num_results: usize }, Reloaded } @@ -43,7 +48,15 @@ pub struct ImageData { #[derive(Default)] struct PrevRender { successful: bool, - contained_term: Option + contained_term: PageSearchResult +} + +#[derive(Default, PartialEq)] +enum PageSearchResult { + #[default] + Unknown, + DidNotContain, + Contained(NonZeroUsize) } #[inline] @@ -69,7 +82,8 @@ pub fn start_rendering( path: &str, sender: Sender>, receiver: Receiver, - size: WindowSize + size: WindowSize, + prerender: PrerenderLimit ) -> Result<(), SendError>> { // We want this outside of 'reload so that if the doc reloads, the search term that somebody // set will still get highlighted in the reloaded doc @@ -159,7 +173,7 @@ pub fn start_rendering( // what we do with a notif is the same regardless of if we're in the middle of // rendering the list of pages or we're all done macro_rules! handle_notif { - ($notif:ident) => { + ($notif:ident) => {{ match $notif { RenderNotif::Reload => continue 'reload, RenderNotif::Invert => { @@ -184,7 +198,8 @@ pub fn start_rendering( // the pages wherein there were already no search results. So this // is a little optimization to allow that. for page in &mut rendered { - if !page.successful || page.contained_term != Some(true) { + if let PageSearchResult::Contained(_) = page.contained_term { + page.contained_term = PageSearchResult::DidNotContain; page.successful = false; } } @@ -195,28 +210,43 @@ pub fn start_rendering( // term, we can render them with the term, but if they don't, we // don't need to re-render and send it over again. for page in &mut rendered { - page.contained_term = None; + page.contained_term = PageSearchResult::Unknown; } search_term = Some(term); } continue 'render_pages; } } - }; + }}; } let (left, right) = rendered.split_at_mut(start_point); + // This is our iterator over all the pages we want to look at and render. It uses this + // weird 'interleave' thing to render pages on *both sides* of the currently-displayed + // page in case they device to go forward or backwards. let page_iter = right .iter_mut() .enumerate() - .map(|(idx, p)| (idx + start_point, p)) + .map(move |(idx, p)| (idx + start_point, p)) .interleave( left.iter_mut() .rev() .enumerate() - .map(|(idx, p)| (start_point - (idx + 1), p)) - ); + .map(move |(idx, p)| (start_point - (idx + 1), p)) + ) + .take(match (&prerender, &search_term) { + // If the user has limited the amount of pages they want to prerender, then we + // just do what they ask. Nice and easy. + (PrerenderLimit::Limited(l), _) => l.get(), + // If they haven't limited it, but we don't have any search term that we're + // currently looking for, just go for all of it + (PrerenderLimit::All, None) => n_pages, + // If they haven't limited it, and we DO have a search term we need to look + // for, just do 20 so that we don't dramatically slow down the search process + // since they've specifically initiated that and so we want it to take priority + (PrerenderLimit::All, Some(_)) => 20 + }); let area_w = f32::from(area.width) * f32::from(col_w); let area_h = f32::from(area.height) * f32::from(col_h); @@ -225,10 +255,9 @@ pub fn start_rendering( for (num, rendered) in page_iter { // we only want to continue if one of the following is met: // 1. It failed to render last time (we want to retry) - // 2. The `contained_term` is set to None (representing 'Unknown'), meaning that we - // need to at least check if it contains the current term to see if it needs a - // re-render - if rendered.successful && rendered.contained_term.is_some() { + // 2. The `contained_term` is set to Unknown, meaning that we need to at least + // check if it contains the current term to see if it needs a re-render + if rendered.successful && rendered.contained_term != PageSearchResult::Unknown { continue; } @@ -251,14 +280,11 @@ pub fn start_rendering( Ok(p) => p }; - let rendered_with_no_results = - rendered.successful && rendered.contained_term == Some(false); - // render the page match render_single_page_to_ctx( &page, search_term.as_deref(), - rendered_with_no_results, + rendered, invert, (area_w, area_h) ) { @@ -271,7 +297,8 @@ pub fn start_rendering( // we make a potentially incorrect assumption here that writing the context // to a png won't fail, and mark that it all rendered correctly here before // spawning off the thread to do so and send it. - rendered.contained_term = Some(ctx.result_rects.is_empty()); + rendered.contained_term = NonZeroUsize::new(ctx.result_rects.len()) + .map_or(PageSearchResult::DidNotContain, PageSearchResult::Contained); rendered.successful = true; let w = ctx.pixmap.width(); @@ -302,6 +329,74 @@ pub fn start_rendering( } } + // Now, if we have a search term, we want to look through the rest of the document past + // what we've just rendered (and looked at the search results of) + if let Some(ref term) = search_term { + let mut search_start = start_point; + loop { + // hmm maybe this would be nice to make configurable but whatever + const SEARCH_AT_TIME: usize = 20; + + // So now we want to look through all the remaining pages, starting after this + // current one (we don't do interleaving here 'cause I'm lazy + let page_idx = rendered[search_start..] + .iter_mut() + .enumerate() + // And we only want to take max SEARCH_AT_TIME of them since we don't want + // to block on this for *too* long + .take(SEARCH_AT_TIME) + // We want to remove all the ones that we've already determined did not + // contain the current term... + .filter(|(_, r)| r.contained_term != PageSearchResult::DidNotContain) + // And then adjust the index to be correct for the actual page number + .map(|(idx, r)| (idx + search_start, r)); + + // then we go through each... + for (page_num, rendered) in page_idx { + // We get the number of results (using the function that specifically just + // counts them instead of determining the quads of them all) + let num_results = doc + .load_page(page_num as i32) + .and_then(|page| count_search_results(&page, term)) + .unwrap(); + + // Mark the `contained_term` field with this updated value... + rendered.contained_term = NonZeroUsize::new(num_results) + .map_or(PageSearchResult::DidNotContain, PageSearchResult::Contained); + + // And send it over to the tui so that they can know and use it to + // determine what next page to jump to + sender.send(Ok(RenderInfo::SearchResults { + page_num, + num_results + }))?; + } + + // then once we're done with this iteration, we increment search_start to + // prepare for the next iteration + search_start += SEARCH_AT_TIME; + + // now, we want to check if we've gone past the end - if so, we go back to the + // beginning so we can get the pages before the current one. + if search_start > n_pages { + search_start = 0; + } else if ((search_start - SEARCH_AT_TIME) + 1..search_start) + .contains(&start_point) + { + // And if we are back at the place we started, we've looked through all the + // pages. Quit. + break; + } + + match receiver.try_recv() { + // If there are no messages left for us, just continue in this loop + Err(TryRecvError::Empty) => (), + Err(TryRecvError::Disconnected) => return Ok(()), + Ok(msg) => handle_notif!(msg) + } + } + } + // Then once we've rendered all these pages, wait until we get another notification // that this doc needs to be reloaded // This once returned None despite the main thing being still connected (I think, at @@ -309,7 +404,8 @@ pub fn start_rendering( let Ok(msg) = receiver.recv() else { return Ok(()); }; - handle_notif!(msg); + + handle_notif!(msg) } } } @@ -324,23 +420,15 @@ struct RenderedContext { fn render_single_page_to_ctx( page: &Page, search_term: Option<&str>, - already_rendered_no_results: bool, + prev_render: &PrevRender, invert: bool, (area_w, area_h): (f32, f32) ) -> Result, mupdf::error::Error> { - let result_rects = search_term - .map(|term| { - page.to_text_page(TextPageOptions::empty()) - .and_then(|page| page.search(term)) - }) - .transpose()? - .unwrap_or_default(); - - // If there are no search terms on this page, and we've already rendered it with no search - // terms, then just return none to avoid this computation - if result_rects.is_empty() && already_rendered_no_results { - return Ok(None); - } + let result_rects = match prev_render.contained_term { + PageSearchResult::Unknown => search_page(page, search_term, None)?, + PageSearchResult::DidNotContain => Vec::new(), + PageSearchResult::Contained(count) => search_page(page, search_term, Some(count))? + }; // then, get the size of the page let bounds = page.bounds()?; @@ -413,3 +501,39 @@ pub struct HighlightRect { pub lr_x: u32, pub lr_y: u32 } + +#[inline] +fn search_page( + page: &Page, + search_term: Option<&str>, + trusted_search_results: Option +) -> Result, mupdf::error::Error> { + search_term + .map(|term| { + page.to_text_page(TextPageOptions::empty()) + .and_then(|page| { + let mut v = + Vec::with_capacity(trusted_search_results.map_or(0, NonZeroUsize::get)); + page.search_cb(term, &mut v, |v, results| { + v.extend(results.iter().cloned()); + SearchHitResponse::ContinueSearch + }) + .map(|_| v) + }) + }) + .transpose() + .map(Option::unwrap_or_default) +} + +#[inline] +fn count_search_results(page: &Page, search_term: &str) -> Result { + page.to_text_page(TextPageOptions::empty()) + .and_then(|page| { + let mut count = 0; + page.search_cb(search_term, &mut count, |count, results| { + *count += results.len(); + SearchHitResponse::ContinueSearch + })?; + Ok(count) + }) +}