Dramatically improve search performance by pausing rendering every once-in-a-while

This commit is contained in:
itsjunetime
2025-03-06 16:26:56 -07:00
parent ef8ace4f35
commit fc10dc8ffe
6 changed files with 194 additions and 42 deletions
+1
View File
@@ -2,6 +2,7 @@
- Update ratatui(-image) dependencies
- Use new mupdf search API for slightly better performance
- Pause rendering every once in a while while there's a search term to enable searching across the entire document more quickly
# v0.3.0
+2 -2
View File
@@ -10,7 +10,7 @@ use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main, profile
use futures_util::StreamExt;
use tdf::{
converter::{ConvertedPage, ConverterMsg},
renderer::{fill_default, PageInfo, RenderInfo}
renderer::{PageInfo, RenderInfo, fill_default}
};
use utils::{
RenderState, handle_converter_msg, handle_renderer_msg, render_doc, start_all_rendering,
@@ -120,7 +120,7 @@ async fn render_all_files(path: &'static str) -> Vec<PageInfo> {
while let Some(info) = from_render_rx.next().await {
match info.expect("Renderer ran into an error while rendering") {
RenderInfo::Reloaded => (),
RenderInfo::Reloaded | RenderInfo::SearchResults { .. } => (),
RenderInfo::NumPages(num) => fill_default(&mut pages, num),
RenderInfo::Page(page) => {
let num = page.page_num;
+14 -4
View File
@@ -21,8 +21,8 @@ pub fn handle_renderer_msg(
to_converter_tx.send(ConverterMsg::NumPages(num)).unwrap();
}
Ok(RenderInfo::Page(info)) => to_converter_tx.send(ConverterMsg::AddImg(info)).unwrap(),
// We can ignore the `Reloaded` variant 'cause that's only used to send info to the TUI
Ok(RenderInfo::Reloaded) => (),
// We can ignore the these variants 'cause they're only used to send info to the TUI
Ok(RenderInfo::Reloaded | RenderInfo::SearchResults { .. }) => (),
Err(e) => panic!("Got error from renderer: {e:?}")
}
}
@@ -77,7 +77,15 @@ pub fn start_rendering_loop(
width: columns * FONT_SIZE.0
};
std::thread::spawn(move || start_rendering(&str_path, to_main_tx, from_main_rx, size));
std::thread::spawn(move || {
start_rendering(
&str_path,
to_main_tx,
from_main_rx,
size,
tdf::PrerenderLimit::All
)
});
let main_area = Rect {
x: 0,
@@ -139,7 +147,9 @@ pub async fn render_doc(path: impl AsRef<Path>, search_term: Option<&str>) {
} = start_all_rendering(path);
if let Some(term) = search_term {
to_render_tx.send(RenderNotif::Search(term.to_owned())).unwrap();
to_render_tx
.send(RenderNotif::Search(term.to_owned()))
.unwrap();
}
while pages.is_empty() || pages.iter().any(Option::is_none) {
+7
View File
@@ -1,6 +1,13 @@
use std::num::NonZeroUsize;
#[global_allocator]
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
pub enum PrerenderLimit {
All,
Limited(NonZeroUsize)
}
pub mod converter;
pub mod renderer;
pub mod skip;
+11 -1
View File
@@ -17,6 +17,7 @@ use notify::{Event, EventKind, RecursiveMode, Watcher};
use ratatui::{Terminal, backend::CrosstermBackend};
use ratatui_image::picker::Picker;
use tdf::{
PrerenderLimit,
converter::{ConvertedPage, ConverterMsg, run_conversion_loop},
renderer::{self, RenderError, RenderInfo, RenderNotif},
tui::{BottomMessage, InputAction, MessageSetting, Tui}
@@ -47,6 +48,9 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
optional -m,--max-wide max_wide: NonZeroUsize
/// Fullscreen the pdf (hide document name, page count, etc)
optional -f,--fullscreen fullscreen: bool
/// The number of pages to prerender surrounding the currently-shown page; 0 means no
/// limit. By default, there is no limit.
optional -p,--prerender prerender: usize
/// PDF file to read
required file: PathBuf
};
@@ -141,8 +145,12 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
// then we want to spawn off the rendering task
// We need to use the thread::spawn API so that this exists in a thread not owned by tokio,
// since the methods we call in `start_rendering` will panic if called in an async context
let prerender = flags
.prerender
.and_then(NonZeroUsize::new)
.map_or(PrerenderLimit::All, PrerenderLimit::Limited);
std::thread::spawn(move || {
renderer::start_rendering(&file_path, render_tx, render_rx, window_size)
renderer::start_rendering(&file_path, render_tx, render_rx, window_size, prerender)
});
let mut ev_stream = crossterm::event::EventStream::new();
@@ -211,6 +219,8 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
to_converter.send(ConverterMsg::AddImg(info))?;
},
RenderInfo::Reloaded => tui.set_msg(MessageSetting::Some(BottomMessage::Reloaded)),
RenderInfo::SearchResults { page_num, num_results } =>
tui.got_num_results_on_page(page_num, num_results),
},
Err(e) => tui.show_error(e),
}
+159 -35
View File
@@ -1,11 +1,15 @@
use std::{thread::sleep, time::Duration};
use std::{num::NonZeroUsize, thread::sleep, time::Duration};
use crossterm::terminal::WindowSize;
use flume::{Receiver, SendError, Sender, TryRecvError};
use itertools::Itertools;
use mupdf::{Colorspace, Document, Matrix, Page, Pixmap, TextPageOptions};
use mupdf::{
Colorspace, Document, Matrix, Page, Pixmap, Quad, TextPageOptions, text_page::SearchHitResponse
};
use ratatui::layout::Rect;
use crate::PrerenderLimit;
pub enum RenderNotif {
Area(Rect),
JumpToPage(usize),
@@ -24,6 +28,7 @@ pub enum RenderError {
pub enum RenderInfo {
NumPages(usize),
Page(PageInfo),
SearchResults { page_num: usize, num_results: usize },
Reloaded
}
@@ -43,7 +48,15 @@ pub struct ImageData {
#[derive(Default)]
struct PrevRender {
successful: bool,
contained_term: Option<bool>
contained_term: PageSearchResult
}
#[derive(Default, PartialEq)]
enum PageSearchResult {
#[default]
Unknown,
DidNotContain,
Contained(NonZeroUsize)
}
#[inline]
@@ -69,7 +82,8 @@ pub fn start_rendering(
path: &str,
sender: Sender<Result<RenderInfo, RenderError>>,
receiver: Receiver<RenderNotif>,
size: WindowSize
size: WindowSize,
prerender: PrerenderLimit
) -> Result<(), SendError<Result<RenderInfo, RenderError>>> {
// We want this outside of 'reload so that if the doc reloads, the search term that somebody
// set will still get highlighted in the reloaded doc
@@ -159,7 +173,7 @@ pub fn start_rendering(
// what we do with a notif is the same regardless of if we're in the middle of
// rendering the list of pages or we're all done
macro_rules! handle_notif {
($notif:ident) => {
($notif:ident) => {{
match $notif {
RenderNotif::Reload => continue 'reload,
RenderNotif::Invert => {
@@ -184,7 +198,8 @@ pub fn start_rendering(
// the pages wherein there were already no search results. So this
// is a little optimization to allow that.
for page in &mut rendered {
if !page.successful || page.contained_term != Some(true) {
if let PageSearchResult::Contained(_) = page.contained_term {
page.contained_term = PageSearchResult::DidNotContain;
page.successful = false;
}
}
@@ -195,28 +210,43 @@ pub fn start_rendering(
// term, we can render them with the term, but if they don't, we
// don't need to re-render and send it over again.
for page in &mut rendered {
page.contained_term = None;
page.contained_term = PageSearchResult::Unknown;
}
search_term = Some(term);
}
continue 'render_pages;
}
}
};
}};
}
let (left, right) = rendered.split_at_mut(start_point);
// This is our iterator over all the pages we want to look at and render. It uses this
// weird 'interleave' thing to render pages on *both sides* of the currently-displayed
// page in case they device to go forward or backwards.
let page_iter = right
.iter_mut()
.enumerate()
.map(|(idx, p)| (idx + start_point, p))
.map(move |(idx, p)| (idx + start_point, p))
.interleave(
left.iter_mut()
.rev()
.enumerate()
.map(|(idx, p)| (start_point - (idx + 1), p))
);
.map(move |(idx, p)| (start_point - (idx + 1), p))
)
.take(match (&prerender, &search_term) {
// If the user has limited the amount of pages they want to prerender, then we
// just do what they ask. Nice and easy.
(PrerenderLimit::Limited(l), _) => l.get(),
// If they haven't limited it, but we don't have any search term that we're
// currently looking for, just go for all of it
(PrerenderLimit::All, None) => n_pages,
// If they haven't limited it, and we DO have a search term we need to look
// for, just do 20 so that we don't dramatically slow down the search process
// since they've specifically initiated that and so we want it to take priority
(PrerenderLimit::All, Some(_)) => 20
});
let area_w = f32::from(area.width) * f32::from(col_w);
let area_h = f32::from(area.height) * f32::from(col_h);
@@ -225,10 +255,9 @@ pub fn start_rendering(
for (num, rendered) in page_iter {
// we only want to continue if one of the following is met:
// 1. It failed to render last time (we want to retry)
// 2. The `contained_term` is set to None (representing 'Unknown'), meaning that we
// need to at least check if it contains the current term to see if it needs a
// re-render
if rendered.successful && rendered.contained_term.is_some() {
// 2. The `contained_term` is set to Unknown, meaning that we need to at least
// check if it contains the current term to see if it needs a re-render
if rendered.successful && rendered.contained_term != PageSearchResult::Unknown {
continue;
}
@@ -251,14 +280,11 @@ pub fn start_rendering(
Ok(p) => p
};
let rendered_with_no_results =
rendered.successful && rendered.contained_term == Some(false);
// render the page
match render_single_page_to_ctx(
&page,
search_term.as_deref(),
rendered_with_no_results,
rendered,
invert,
(area_w, area_h)
) {
@@ -271,7 +297,8 @@ pub fn start_rendering(
// we make a potentially incorrect assumption here that writing the context
// to a png won't fail, and mark that it all rendered correctly here before
// spawning off the thread to do so and send it.
rendered.contained_term = Some(ctx.result_rects.is_empty());
rendered.contained_term = NonZeroUsize::new(ctx.result_rects.len())
.map_or(PageSearchResult::DidNotContain, PageSearchResult::Contained);
rendered.successful = true;
let w = ctx.pixmap.width();
@@ -302,6 +329,74 @@ pub fn start_rendering(
}
}
// Now, if we have a search term, we want to look through the rest of the document past
// what we've just rendered (and looked at the search results of)
if let Some(ref term) = search_term {
let mut search_start = start_point;
loop {
// hmm maybe this would be nice to make configurable but whatever
const SEARCH_AT_TIME: usize = 20;
// So now we want to look through all the remaining pages, starting after this
// current one (we don't do interleaving here 'cause I'm lazy
let page_idx = rendered[search_start..]
.iter_mut()
.enumerate()
// And we only want to take max SEARCH_AT_TIME of them since we don't want
// to block on this for *too* long
.take(SEARCH_AT_TIME)
// We want to remove all the ones that we've already determined did not
// contain the current term...
.filter(|(_, r)| r.contained_term != PageSearchResult::DidNotContain)
// And then adjust the index to be correct for the actual page number
.map(|(idx, r)| (idx + search_start, r));
// then we go through each...
for (page_num, rendered) in page_idx {
// We get the number of results (using the function that specifically just
// counts them instead of determining the quads of them all)
let num_results = doc
.load_page(page_num as i32)
.and_then(|page| count_search_results(&page, term))
.unwrap();
// Mark the `contained_term` field with this updated value...
rendered.contained_term = NonZeroUsize::new(num_results)
.map_or(PageSearchResult::DidNotContain, PageSearchResult::Contained);
// And send it over to the tui so that they can know and use it to
// determine what next page to jump to
sender.send(Ok(RenderInfo::SearchResults {
page_num,
num_results
}))?;
}
// then once we're done with this iteration, we increment search_start to
// prepare for the next iteration
search_start += SEARCH_AT_TIME;
// now, we want to check if we've gone past the end - if so, we go back to the
// beginning so we can get the pages before the current one.
if search_start > n_pages {
search_start = 0;
} else if ((search_start - SEARCH_AT_TIME) + 1..search_start)
.contains(&start_point)
{
// And if we are back at the place we started, we've looked through all the
// pages. Quit.
break;
}
match receiver.try_recv() {
// If there are no messages left for us, just continue in this loop
Err(TryRecvError::Empty) => (),
Err(TryRecvError::Disconnected) => return Ok(()),
Ok(msg) => handle_notif!(msg)
}
}
}
// Then once we've rendered all these pages, wait until we get another notification
// that this doc needs to be reloaded
// This once returned None despite the main thing being still connected (I think, at
@@ -309,7 +404,8 @@ pub fn start_rendering(
let Ok(msg) = receiver.recv() else {
return Ok(());
};
handle_notif!(msg);
handle_notif!(msg)
}
}
}
@@ -324,23 +420,15 @@ struct RenderedContext {
fn render_single_page_to_ctx(
page: &Page,
search_term: Option<&str>,
already_rendered_no_results: bool,
prev_render: &PrevRender,
invert: bool,
(area_w, area_h): (f32, f32)
) -> Result<Option<RenderedContext>, mupdf::error::Error> {
let result_rects = search_term
.map(|term| {
page.to_text_page(TextPageOptions::empty())
.and_then(|page| page.search(term))
})
.transpose()?
.unwrap_or_default();
// If there are no search terms on this page, and we've already rendered it with no search
// terms, then just return none to avoid this computation
if result_rects.is_empty() && already_rendered_no_results {
return Ok(None);
}
let result_rects = match prev_render.contained_term {
PageSearchResult::Unknown => search_page(page, search_term, None)?,
PageSearchResult::DidNotContain => Vec::new(),
PageSearchResult::Contained(count) => search_page(page, search_term, Some(count))?
};
// then, get the size of the page
let bounds = page.bounds()?;
@@ -413,3 +501,39 @@ pub struct HighlightRect {
pub lr_x: u32,
pub lr_y: u32
}
#[inline]
fn search_page(
page: &Page,
search_term: Option<&str>,
trusted_search_results: Option<NonZeroUsize>
) -> Result<Vec<Quad>, mupdf::error::Error> {
search_term
.map(|term| {
page.to_text_page(TextPageOptions::empty())
.and_then(|page| {
let mut v =
Vec::with_capacity(trusted_search_results.map_or(0, NonZeroUsize::get));
page.search_cb(term, &mut v, |v, results| {
v.extend(results.iter().cloned());
SearchHitResponse::ContinueSearch
})
.map(|_| v)
})
})
.transpose()
.map(Option::unwrap_or_default)
}
#[inline]
fn count_search_results(page: &Page, search_term: &str) -> Result<usize, mupdf::error::Error> {
page.to_text_page(TextPageOptions::empty())
.and_then(|page| {
let mut count = 0;
page.search_cb(search_term, &mut count, |count, results| {
*count += results.len();
SearchHitResponse::ContinueSearch
})?;
Ok(count)
})
}