Rewrite with mupdf as a backend (#50)

* Initial implementation of attempted mupdf rewrite

* Change back to no resizing and don't include alpha channel in conversion

* Remove some more dead code

* Make features more modular and call search more easily

* Switch to git dependency for my fixes

* Update deps

* Fix searching hehe

* Remove unnecessary CI steps?

* fontconfig in CI

* perftools in ci

* Final adjustments to conform to mupdf changes
This commit is contained in:
June
2025-02-19 09:59:29 -07:00
committed by GitHub
parent e123351079
commit 524c069b83
13 changed files with 817 additions and 675 deletions
+24 -11
View File
@@ -1,8 +1,9 @@
use flume::{Receiver, SendError, Sender, TryRecvError};
use futures_util::stream::StreamExt;
use image::ImageFormat;
use image::DynamicImage;
use itertools::Itertools;
use ratatui_image::{picker::Picker, protocol::Protocol, Resize};
use rayon::iter::ParallelIterator;
use crate::renderer::{fill_default, PageInfo, RenderError};
@@ -54,13 +55,25 @@ pub async fn run_conversion_loop(
return Ok(None);
};
let img_area = page_info.img_data.area;
let mut dyn_img = image::load_from_memory_with_format(
&page_info.img_data.pixels,
image::ImageFormat::Pnm
)
.map_err(|e| RenderError::Converting(format!("Can't load image: {e}")))?;
let dyn_img =
image::load_from_memory_with_format(&page_info.img_data.data, ImageFormat::Png)
.map_err(|e| {
RenderError::Render(format!("Couldn't convert Vec<u8> to DynamicImage: {e}"))
})?;
match dyn_img {
DynamicImage::ImageRgb8(ref mut img) =>
for quad in &*page_info.result_rects {
img.par_enumerate_pixels_mut()
.filter(|(x, y, _)| {
*x > quad.ul_x && *x < quad.lr_x && *y > quad.ul_y && *y < quad.lr_y
})
.for_each(|(_, _, px)| px.0[2] = px.0[2].saturating_sub(u8::MAX / 2));
},
_ => unreachable!()
};
let img_area = page_info.img_data.cell_area;
// We don't actually want to Crop this image, but we've already
// verified (with the ImageSurface stuff) that the image is the correct
@@ -69,7 +82,7 @@ pub async fn run_conversion_loop(
let txt_img = picker
.new_protocol(dyn_img, img_area, Resize::None)
.map_err(|e| {
RenderError::Render(format!(
RenderError::Converting(format!(
"Couldn't convert DynamicImage to ratatui image: {e}"
))
})?;
@@ -79,15 +92,15 @@ pub async fn run_conversion_loop(
Ok(Some(ConvertedPage {
page: txt_img,
num: page_info.page,
num_results: page_info.search_results
num: page_info.page_num,
num_results: page_info.result_rects.len()
}))
}
fn handle_notif(msg: ConverterMsg, images: &mut Vec<Option<PageInfo>>, page: &mut usize) {
match msg {
ConverterMsg::AddImg(img) => {
let page_num = img.page;
let page_num = img.page_num;
images[page_num] = Some(img);
}
ConverterMsg::NumPages(n_pages) => {
+4 -14
View File
@@ -13,7 +13,6 @@ use crossterm::{
}
};
use futures_util::{stream::StreamExt, FutureExt};
use glib::{LogField, LogLevel, LogWriterOutput};
use notify::{Event, EventKind, RecursiveMode, Watcher};
use ratatui::{backend::CrosstermBackend, Terminal};
use ratatui_image::picker::Picker;
@@ -80,8 +79,8 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
)?;
// TODO: Handle non-utf8 file names? Maybe by constructing a CString and passing that in to the
// poppler stuff instead of a rust string?
let file_path = format!("file://{}", path.clone().into_os_string().to_string_lossy());
// mupdf stuff instead of a rust string?
let file_path = path.clone().into_os_string().to_string_lossy().to_string();
let mut window_size = window_size()?;
@@ -161,11 +160,6 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
let mut term = Terminal::new(backend)?;
term.skip_diff(true);
// poppler has some annoying logging (e.g. if you request a page index out-of-bounds of a
// document's pages, then it will return `None`, but still log to stderr with CRITICAL level),
// so we want to just ignore all logging since this is a tui app.
glib::log_set_writer_func(noop);
execute!(
term.backend_mut(),
EnterAlternateScreen,
@@ -208,7 +202,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
to_converter.send(ConverterMsg::NumPages(num))?;
},
RenderInfo::Page(info) => {
tui.got_num_results_on_page(info.page, info.search_results);
tui.got_num_results_on_page(info.page_num, info.result_rects.len());
to_converter.send(ConverterMsg::AddImg(info))?;
},
RenderInfo::Reloaded => tui.set_msg(MessageSetting::Some(BottomMessage::Reloaded)),
@@ -275,7 +269,7 @@ fn on_notify_ev(
match ev.kind {
EventKind::Access(_) => (),
EventKind::Remove(_) => to_tui_tx
.send(Err(RenderError::Render("File was deleted".into())))
.send(Err(RenderError::Converting("File was deleted".into())))
.unwrap(),
// This shouldn't fail to send unless the receiver gets disconnected. If that's
// happened, then like the main thread has panicked or something, so it doesn't matter
@@ -286,7 +280,3 @@ fn on_notify_ev(
}
}
}
fn noop(_: LogLevel, _: &[LogField<'_>]) -> LogWriterOutput {
LogWriterOutput::Handled
}
+114 -142
View File
@@ -1,10 +1,9 @@
use std::thread;
use std::{thread::sleep, time::Duration};
use cairo::{Antialias, Context, Format, Surface};
use crossterm::terminal::WindowSize;
use flume::{Receiver, SendError, Sender, TryRecvError};
use itertools::Itertools;
use poppler::{Color, Document, FindFlags, Page, Rectangle, SelectionStyle};
use mupdf::{Colorspace, Document, Matrix, Page, Pixmap};
use ratatui::layout::Rect;
pub enum RenderNotif {
@@ -17,10 +16,8 @@ pub enum RenderNotif {
#[derive(Debug)]
pub enum RenderError {
Notify(notify::Error),
Doc(glib::Error),
// Don't like storing an error as a string but it needs to be Send to send to the main thread,
// and it's just going to be shown to the user, so whatever
Render(String)
Doc(mupdf::error::Error),
Converting(String)
}
pub enum RenderInfo {
@@ -32,14 +29,14 @@ pub enum RenderInfo {
#[derive(Clone)]
pub struct PageInfo {
pub img_data: ImageData,
pub page: usize,
pub search_results: usize
pub page_num: usize,
pub result_rects: Vec<HighlightRect>
}
#[derive(Clone)]
pub struct ImageData {
pub data: Vec<u8>,
pub area: Rect
pub pixels: Vec<u8>,
pub cell_area: Rect
}
#[derive(Default)]
@@ -56,7 +53,7 @@ pub fn fill_default<T: Default>(vec: &mut Vec<T>, size: usize) {
}
}
// this function has to be sync (non-async) because the poppler::Document needs to be held during
// this function has to be sync (non-async) because the mupdf::Document needs to be held during
// most of it, but that's basically just a wrapper around `*c_void` cause it's just a binding to C
// code, so it's !Send and thus can't be held across await points. So we can't call any of the
// async `send` or `recv` methods in this function body, since those create await points. Which
@@ -71,7 +68,7 @@ pub fn fill_default<T: Default>(vec: &mut Vec<T>, size: usize) {
#[allow(clippy::needless_pass_by_value)]
pub fn start_rendering(
path: &str,
mut sender: Sender<Result<RenderInfo, RenderError>>,
sender: Sender<Result<RenderInfo, RenderError>>,
receiver: Receiver<RenderNotif>,
size: WindowSize
) -> Result<(), SendError<Result<RenderInfo, RenderError>>> {
@@ -95,7 +92,7 @@ pub fn start_rendering(
let mut stored_doc = None;
'reload: loop {
let doc = match Document::from_file(path, None) {
let doc = match Document::open(path) {
Err(e) => {
// if there's an error, tell the main loop
sender.send(Err(RenderError::Doc(e)))?;
@@ -125,7 +122,16 @@ pub fn start_rendering(
}
};
let n_pages = doc.n_pages() as usize;
let n_pages = match doc.page_count() {
Ok(n) => n as usize,
Err(e) => {
sender.send(Err(RenderError::Doc(e)))?;
// just basic backoff i think
sleep(Duration::from_secs(1));
continue 'reload;
}
};
sender.send(Ok(RenderInfo::NumPages(n_pages)))?;
// We're using this vec of bools to indicate which page numbers have already been rendered,
@@ -205,8 +211,8 @@ pub fn start_rendering(
.map(|(idx, p)| (start_point - (idx + 1), p))
);
let area_w = f64::from(area.width) * f64::from(col_w);
let area_h = f64::from(area.height) * f64::from(col_h);
let area_w = f32::from(area.width) * f32::from(col_w);
let area_h = f32::from(area.height) * f32::from(col_h);
// we go through each page
for (num, rendered) in page_iter {
@@ -230,12 +236,12 @@ pub fn start_rendering(
// We know this is in range 'cause we're iterating over it but we still just want
// to be safe
let Some(page) = doc.page(num as i32) else {
sender.send(Err(RenderError::Render(format!(
"Couldn't get page {num} ({}) of doc?",
num as i32
))))?;
continue;
let page = match doc.load_page(num as i32) {
Err(e) => {
sender.send(Err(RenderError::Doc(e)))?;
continue;
}
Ok(p) => p
};
let rendered_with_no_results =
@@ -257,26 +263,34 @@ pub fn start_rendering(
// we make a potentially incorrect assumption here that writing the context
// to a png won't fail, and mark that it all rendered correctly here before
// spawning off the thread to do so and send it.
rendered.contained_term = Some(ctx.num_results > 0);
rendered.contained_term = Some(ctx.result_rects.is_empty());
rendered.successful = true;
// if this is the page that the user is currently trying to look at, don't
// bother spawning off a thread to render it to a png - it'll only slow
// down the time til the user can see it (due to the overhead of creating a
// thread), but we still want to spawn threads to render the other pages
// since the effects of parallelizing that will be noticeable if the user
// tries to move through pages more quickly
if num == start_point {
render_ctx_to_png(&ctx, &mut sender, (col_w, col_h), num)?;
} else {
let mut sender = sender.clone();
thread::spawn(move || {
render_ctx_to_png(&ctx, &mut sender, (col_w, col_h), num)
});
}
let cap = (ctx.pixmap.width()
* ctx.pixmap.height() * u32::from(ctx.pixmap.n()))
as usize;
let mut pixels = Vec::with_capacity(cap);
if let Err(e) = ctx.pixmap.write_to(&mut pixels, mupdf::ImageFormat::PNM) {
sender.send(Err(RenderError::Doc(e)))?;
continue;
};
sender.send(Ok(RenderInfo::Page(PageInfo {
img_data: ImageData {
pixels,
cell_area: Rect {
x: 0,
y: 0,
width: (ctx.surface_w / f32::from(col_w)) as u16,
height: (ctx.surface_h / f32::from(col_h)) as u16
}
},
page_num: num,
result_rects: ctx.result_rects
})))?;
}
// And if we got an error, then obviously we need to propagate that
Err(e) => sender.send(Err(RenderError::Render(e)))?
Err(e) => sender.send(Err(RenderError::Doc(e)))?
}
}
@@ -295,34 +309,37 @@ pub fn start_rendering(
}
struct RenderedContext {
surface: Surface,
num_results: usize,
surface_width: f64,
surface_height: f64
pixmap: Pixmap,
surface_w: f32,
surface_h: f32,
result_rects: Vec<HighlightRect>
}
/// SAFETY: I think this is safe because, although the backing struct for `Surface` does contain
/// pointers to like the cairo_backend_t struct that all the cairo stuff is using, that struct is
/// basically just a vtable, so accessing it from multiple threads *should* be safe since we're
/// just calling the same functions with different data. The only other thing it holds reference to
/// is a `cairo_device_t`, but that seems to be thread-safe because it's managed through ref counts
/// and a mutex. Also, as far as I can tell from reading the source code, write_to_png_stream (the
/// only function we call on this struct) doesn't access the device at all, so we should be fine
/// there.
/// We want this to be Send so that we can delegate the png writing to a separate thread (since
/// that's the thing that takes the most time, by far, in this app).
unsafe impl Send for RenderedContext {}
fn render_single_page_to_ctx(
page: &Page,
search_term: Option<&str>,
already_rendered_no_results: bool,
(area_w, area_h): (f64, f64)
) -> Result<Option<RenderedContext>, String> {
let mut result_rects = search_term
.as_ref()
.map(|term| page.find_text_with_options(term, FindFlags::DEFAULT | FindFlags::MULTILINE))
.unwrap_or_default();
(area_w, area_h): (f32, f32)
) -> Result<Option<RenderedContext>, mupdf::error::Error> {
let mut max_hits = 10;
let result_rects = loop {
let rects = search_term
.as_ref()
// mupdf allocates a buffer of the size we give it to try to fill it with results. If we
// pass in u32::MAX, it allocates too much memory to function. If we pass too small of a
// number in, we may miss out on some of the results. Ideally, we'd like to make a better
// interface than this, but we're stuck with this kinda ugly looping until we make sure
// that we've found every instance of it on this page.
.map(|term| page.search(term, max_hits))
.transpose()?
.unwrap_or_default();
if rects.len() < (max_hits as usize) {
break rects;
}
max_hits *= 2;
};
// If there are no search terms on this page, and we've already rendered it with no search
// terms, then just return none to avoid this computation
@@ -331,7 +348,8 @@ fn render_single_page_to_ctx(
}
// then, get the size of the page
let (p_width, p_height) = page.size();
let bounds = page.bounds()?;
let (p_width, p_height) = (bounds.x1 - bounds.x0, bounds.y1 - bounds.y0);
// and get its aspect ratio
let p_aspect_ratio = p_width / p_height;
@@ -353,93 +371,47 @@ fn render_single_page_to_ctx(
area_h / p_height
};
let surface_width = p_width * scale_factor;
let surface_height = p_height * scale_factor;
let surface_w = p_width * scale_factor;
let surface_h = p_height * scale_factor;
let surface = cairo::ImageSurface::create(
Format::Rgb16_565,
// No matter how big you make these arguments, the image will be drawn at the same
// size. So if you make them really big, the image will be drawn on a quarter of it. If
// you make them really small, the image will cover more than all of the surface.
//
// However, that only stands as long as you don't scale the context that you place this
// surface into. If you scale the dimensions of this image by n, then scale the context
// by that same amount, then it'll still fit perfectly into the context, but be
// rendered at higher quality.
surface_width as i32,
surface_height as i32
)
.map_err(|e| format!("Couldn't create ImageSurface: {e}"))?;
surface.set_device_scale(scale_factor, scale_factor);
let colorspace = Colorspace::device_rgb();
let matrix = Matrix::new_scale(scale_factor, scale_factor);
let ctx = Context::new(surface).map_err(|e| format!("Couldn't create Context: {e}"))?;
let mut pixmap = page.to_pixmap(&matrix, &colorspace, 0.0, false)?;
// The default background color of PDFs (at least, I think) is white, so we need to set
// that as the background color, then paint, then render.
ctx.set_source_rgba(1.0, 1.0, 1.0, 1.0);
let (x_res, y_res) = pixmap.resolution();
let new_x = (x_res as f32 * scale_factor) as i32;
let new_y = (y_res as f32 * scale_factor) as i32;
pixmap.set_resolution(new_x, new_y);
ctx.set_antialias(Antialias::None);
ctx.paint()
.map_err(|e| format!("Couldn't paint Context: {e}"))?;
page.render(&ctx);
let num_results = result_rects.len();
if !result_rects.is_empty() {
let mut highlight_color = Color::new();
highlight_color.set_red((u16::MAX / 5) * 4);
highlight_color.set_green((u16::MAX / 5) * 4);
let mut old_rect = Rectangle::new();
for rect in &mut result_rects {
// According to https://gitlab.freedesktop.org/poppler/poppler/-/issues/763, these rects
// need to be corrected since they use different references as the y-coordinate base
rect.set_y1(p_height - rect.y1());
rect.set_y2(p_height - rect.y2());
page.render_selection(
&ctx,
rect,
&mut old_rect,
SelectionStyle::Glyph,
&mut Color::new(),
&mut highlight_color
);
}
}
let result_rects = result_rects
.into_iter()
.map(|quad| {
let ul_x = (quad.ul.x * scale_factor) as u32;
let ul_y = (quad.ul.y * scale_factor) as u32;
let lr_x = (quad.lr.x * scale_factor) as u32;
let lr_y = (quad.lr.y * scale_factor) as u32;
HighlightRect {
ul_x,
ul_y,
lr_x,
lr_y
}
})
.collect::<Vec<_>>();
Ok(Some(RenderedContext {
surface: ctx.target(),
num_results,
surface_width,
surface_height
pixmap,
surface_w,
surface_h,
result_rects
}))
}
fn render_ctx_to_png(
ctx: &RenderedContext,
sender: &mut Sender<Result<RenderInfo, RenderError>>,
(col_w, col_h): (u16, u16),
page: usize
) -> Result<(), SendError<Result<RenderInfo, RenderError>>> {
let mut img_data = Vec::with_capacity((ctx.surface_height * ctx.surface_width) as usize);
match ctx.surface.write_to_png(&mut img_data) {
Err(e) => sender.send(Err(RenderError::Render(format!(
"Couldn't write surface to png: {e}"
)))),
Ok(()) => sender.send(Ok(RenderInfo::Page(PageInfo {
img_data: ImageData {
data: img_data,
area: Rect {
width: ctx.surface_width as u16 / col_w,
height: ctx.surface_height as u16 / col_h,
x: 0,
y: 0
}
},
page,
search_results: ctx.num_results
})))
}
#[derive(Clone)]
pub struct HighlightRect {
pub ul_x: u32,
pub ul_y: u32,
pub lr_x: u32,
pub lr_y: u32
}
+2 -2
View File
@@ -569,8 +569,8 @@ impl Tui {
pub fn show_error(&mut self, err: RenderError) {
self.set_msg(MessageSetting::Some(BottomMessage::Error(match err {
RenderError::Notify(e) => format!("Auto-reload failed: {e}"),
RenderError::Doc(e) => format!("Couldn't open document: {e}"),
RenderError::Render(e) => format!("Couldn't render page: {e}")
RenderError::Doc(e) => format!("Couldn't process document: {e}"),
RenderError::Converting(e) => format!("Couldn't convert page after rendering: {e}")
})));
}