mirror of
https://github.com/itsjunetime/tdf.git
synced 2026-06-02 08:01:47 -04:00
Rewrite with mupdf as a backend (#50)
* Initial implementation of attempted mupdf rewrite * Change back to no resizing and don't include alpha channel in conversion * Remove some more dead code * Make features more modular and call search more easily * Switch to git dependency for my fixes * Update deps * Fix searching hehe * Remove unnecessary CI steps? * fontconfig in CI * perftools in ci * Final adjustments to conform to mupdf changes
This commit is contained in:
+24
-11
@@ -1,8 +1,9 @@
|
||||
use flume::{Receiver, SendError, Sender, TryRecvError};
|
||||
use futures_util::stream::StreamExt;
|
||||
use image::ImageFormat;
|
||||
use image::DynamicImage;
|
||||
use itertools::Itertools;
|
||||
use ratatui_image::{picker::Picker, protocol::Protocol, Resize};
|
||||
use rayon::iter::ParallelIterator;
|
||||
|
||||
use crate::renderer::{fill_default, PageInfo, RenderError};
|
||||
|
||||
@@ -54,13 +55,25 @@ pub async fn run_conversion_loop(
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
let img_area = page_info.img_data.area;
|
||||
let mut dyn_img = image::load_from_memory_with_format(
|
||||
&page_info.img_data.pixels,
|
||||
image::ImageFormat::Pnm
|
||||
)
|
||||
.map_err(|e| RenderError::Converting(format!("Can't load image: {e}")))?;
|
||||
|
||||
let dyn_img =
|
||||
image::load_from_memory_with_format(&page_info.img_data.data, ImageFormat::Png)
|
||||
.map_err(|e| {
|
||||
RenderError::Render(format!("Couldn't convert Vec<u8> to DynamicImage: {e}"))
|
||||
})?;
|
||||
match dyn_img {
|
||||
DynamicImage::ImageRgb8(ref mut img) =>
|
||||
for quad in &*page_info.result_rects {
|
||||
img.par_enumerate_pixels_mut()
|
||||
.filter(|(x, y, _)| {
|
||||
*x > quad.ul_x && *x < quad.lr_x && *y > quad.ul_y && *y < quad.lr_y
|
||||
})
|
||||
.for_each(|(_, _, px)| px.0[2] = px.0[2].saturating_sub(u8::MAX / 2));
|
||||
},
|
||||
_ => unreachable!()
|
||||
};
|
||||
|
||||
let img_area = page_info.img_data.cell_area;
|
||||
|
||||
// We don't actually want to Crop this image, but we've already
|
||||
// verified (with the ImageSurface stuff) that the image is the correct
|
||||
@@ -69,7 +82,7 @@ pub async fn run_conversion_loop(
|
||||
let txt_img = picker
|
||||
.new_protocol(dyn_img, img_area, Resize::None)
|
||||
.map_err(|e| {
|
||||
RenderError::Render(format!(
|
||||
RenderError::Converting(format!(
|
||||
"Couldn't convert DynamicImage to ratatui image: {e}"
|
||||
))
|
||||
})?;
|
||||
@@ -79,15 +92,15 @@ pub async fn run_conversion_loop(
|
||||
|
||||
Ok(Some(ConvertedPage {
|
||||
page: txt_img,
|
||||
num: page_info.page,
|
||||
num_results: page_info.search_results
|
||||
num: page_info.page_num,
|
||||
num_results: page_info.result_rects.len()
|
||||
}))
|
||||
}
|
||||
|
||||
fn handle_notif(msg: ConverterMsg, images: &mut Vec<Option<PageInfo>>, page: &mut usize) {
|
||||
match msg {
|
||||
ConverterMsg::AddImg(img) => {
|
||||
let page_num = img.page;
|
||||
let page_num = img.page_num;
|
||||
images[page_num] = Some(img);
|
||||
}
|
||||
ConverterMsg::NumPages(n_pages) => {
|
||||
|
||||
+4
-14
@@ -13,7 +13,6 @@ use crossterm::{
|
||||
}
|
||||
};
|
||||
use futures_util::{stream::StreamExt, FutureExt};
|
||||
use glib::{LogField, LogLevel, LogWriterOutput};
|
||||
use notify::{Event, EventKind, RecursiveMode, Watcher};
|
||||
use ratatui::{backend::CrosstermBackend, Terminal};
|
||||
use ratatui_image::picker::Picker;
|
||||
@@ -80,8 +79,8 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
)?;
|
||||
|
||||
// TODO: Handle non-utf8 file names? Maybe by constructing a CString and passing that in to the
|
||||
// poppler stuff instead of a rust string?
|
||||
let file_path = format!("file://{}", path.clone().into_os_string().to_string_lossy());
|
||||
// mupdf stuff instead of a rust string?
|
||||
let file_path = path.clone().into_os_string().to_string_lossy().to_string();
|
||||
|
||||
let mut window_size = window_size()?;
|
||||
|
||||
@@ -161,11 +160,6 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let mut term = Terminal::new(backend)?;
|
||||
term.skip_diff(true);
|
||||
|
||||
// poppler has some annoying logging (e.g. if you request a page index out-of-bounds of a
|
||||
// document's pages, then it will return `None`, but still log to stderr with CRITICAL level),
|
||||
// so we want to just ignore all logging since this is a tui app.
|
||||
glib::log_set_writer_func(noop);
|
||||
|
||||
execute!(
|
||||
term.backend_mut(),
|
||||
EnterAlternateScreen,
|
||||
@@ -208,7 +202,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
to_converter.send(ConverterMsg::NumPages(num))?;
|
||||
},
|
||||
RenderInfo::Page(info) => {
|
||||
tui.got_num_results_on_page(info.page, info.search_results);
|
||||
tui.got_num_results_on_page(info.page_num, info.result_rects.len());
|
||||
to_converter.send(ConverterMsg::AddImg(info))?;
|
||||
},
|
||||
RenderInfo::Reloaded => tui.set_msg(MessageSetting::Some(BottomMessage::Reloaded)),
|
||||
@@ -275,7 +269,7 @@ fn on_notify_ev(
|
||||
match ev.kind {
|
||||
EventKind::Access(_) => (),
|
||||
EventKind::Remove(_) => to_tui_tx
|
||||
.send(Err(RenderError::Render("File was deleted".into())))
|
||||
.send(Err(RenderError::Converting("File was deleted".into())))
|
||||
.unwrap(),
|
||||
// This shouldn't fail to send unless the receiver gets disconnected. If that's
|
||||
// happened, then like the main thread has panicked or something, so it doesn't matter
|
||||
@@ -286,7 +280,3 @@ fn on_notify_ev(
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn noop(_: LogLevel, _: &[LogField<'_>]) -> LogWriterOutput {
|
||||
LogWriterOutput::Handled
|
||||
}
|
||||
|
||||
+114
-142
@@ -1,10 +1,9 @@
|
||||
use std::thread;
|
||||
use std::{thread::sleep, time::Duration};
|
||||
|
||||
use cairo::{Antialias, Context, Format, Surface};
|
||||
use crossterm::terminal::WindowSize;
|
||||
use flume::{Receiver, SendError, Sender, TryRecvError};
|
||||
use itertools::Itertools;
|
||||
use poppler::{Color, Document, FindFlags, Page, Rectangle, SelectionStyle};
|
||||
use mupdf::{Colorspace, Document, Matrix, Page, Pixmap};
|
||||
use ratatui::layout::Rect;
|
||||
|
||||
pub enum RenderNotif {
|
||||
@@ -17,10 +16,8 @@ pub enum RenderNotif {
|
||||
#[derive(Debug)]
|
||||
pub enum RenderError {
|
||||
Notify(notify::Error),
|
||||
Doc(glib::Error),
|
||||
// Don't like storing an error as a string but it needs to be Send to send to the main thread,
|
||||
// and it's just going to be shown to the user, so whatever
|
||||
Render(String)
|
||||
Doc(mupdf::error::Error),
|
||||
Converting(String)
|
||||
}
|
||||
|
||||
pub enum RenderInfo {
|
||||
@@ -32,14 +29,14 @@ pub enum RenderInfo {
|
||||
#[derive(Clone)]
|
||||
pub struct PageInfo {
|
||||
pub img_data: ImageData,
|
||||
pub page: usize,
|
||||
pub search_results: usize
|
||||
pub page_num: usize,
|
||||
pub result_rects: Vec<HighlightRect>
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ImageData {
|
||||
pub data: Vec<u8>,
|
||||
pub area: Rect
|
||||
pub pixels: Vec<u8>,
|
||||
pub cell_area: Rect
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
@@ -56,7 +53,7 @@ pub fn fill_default<T: Default>(vec: &mut Vec<T>, size: usize) {
|
||||
}
|
||||
}
|
||||
|
||||
// this function has to be sync (non-async) because the poppler::Document needs to be held during
|
||||
// this function has to be sync (non-async) because the mupdf::Document needs to be held during
|
||||
// most of it, but that's basically just a wrapper around `*c_void` cause it's just a binding to C
|
||||
// code, so it's !Send and thus can't be held across await points. So we can't call any of the
|
||||
// async `send` or `recv` methods in this function body, since those create await points. Which
|
||||
@@ -71,7 +68,7 @@ pub fn fill_default<T: Default>(vec: &mut Vec<T>, size: usize) {
|
||||
#[allow(clippy::needless_pass_by_value)]
|
||||
pub fn start_rendering(
|
||||
path: &str,
|
||||
mut sender: Sender<Result<RenderInfo, RenderError>>,
|
||||
sender: Sender<Result<RenderInfo, RenderError>>,
|
||||
receiver: Receiver<RenderNotif>,
|
||||
size: WindowSize
|
||||
) -> Result<(), SendError<Result<RenderInfo, RenderError>>> {
|
||||
@@ -95,7 +92,7 @@ pub fn start_rendering(
|
||||
let mut stored_doc = None;
|
||||
|
||||
'reload: loop {
|
||||
let doc = match Document::from_file(path, None) {
|
||||
let doc = match Document::open(path) {
|
||||
Err(e) => {
|
||||
// if there's an error, tell the main loop
|
||||
sender.send(Err(RenderError::Doc(e)))?;
|
||||
@@ -125,7 +122,16 @@ pub fn start_rendering(
|
||||
}
|
||||
};
|
||||
|
||||
let n_pages = doc.n_pages() as usize;
|
||||
let n_pages = match doc.page_count() {
|
||||
Ok(n) => n as usize,
|
||||
Err(e) => {
|
||||
sender.send(Err(RenderError::Doc(e)))?;
|
||||
// just basic backoff i think
|
||||
sleep(Duration::from_secs(1));
|
||||
continue 'reload;
|
||||
}
|
||||
};
|
||||
|
||||
sender.send(Ok(RenderInfo::NumPages(n_pages)))?;
|
||||
|
||||
// We're using this vec of bools to indicate which page numbers have already been rendered,
|
||||
@@ -205,8 +211,8 @@ pub fn start_rendering(
|
||||
.map(|(idx, p)| (start_point - (idx + 1), p))
|
||||
);
|
||||
|
||||
let area_w = f64::from(area.width) * f64::from(col_w);
|
||||
let area_h = f64::from(area.height) * f64::from(col_h);
|
||||
let area_w = f32::from(area.width) * f32::from(col_w);
|
||||
let area_h = f32::from(area.height) * f32::from(col_h);
|
||||
|
||||
// we go through each page
|
||||
for (num, rendered) in page_iter {
|
||||
@@ -230,12 +236,12 @@ pub fn start_rendering(
|
||||
|
||||
// We know this is in range 'cause we're iterating over it but we still just want
|
||||
// to be safe
|
||||
let Some(page) = doc.page(num as i32) else {
|
||||
sender.send(Err(RenderError::Render(format!(
|
||||
"Couldn't get page {num} ({}) of doc?",
|
||||
num as i32
|
||||
))))?;
|
||||
continue;
|
||||
let page = match doc.load_page(num as i32) {
|
||||
Err(e) => {
|
||||
sender.send(Err(RenderError::Doc(e)))?;
|
||||
continue;
|
||||
}
|
||||
Ok(p) => p
|
||||
};
|
||||
|
||||
let rendered_with_no_results =
|
||||
@@ -257,26 +263,34 @@ pub fn start_rendering(
|
||||
// we make a potentially incorrect assumption here that writing the context
|
||||
// to a png won't fail, and mark that it all rendered correctly here before
|
||||
// spawning off the thread to do so and send it.
|
||||
rendered.contained_term = Some(ctx.num_results > 0);
|
||||
rendered.contained_term = Some(ctx.result_rects.is_empty());
|
||||
rendered.successful = true;
|
||||
|
||||
// if this is the page that the user is currently trying to look at, don't
|
||||
// bother spawning off a thread to render it to a png - it'll only slow
|
||||
// down the time til the user can see it (due to the overhead of creating a
|
||||
// thread), but we still want to spawn threads to render the other pages
|
||||
// since the effects of parallelizing that will be noticeable if the user
|
||||
// tries to move through pages more quickly
|
||||
if num == start_point {
|
||||
render_ctx_to_png(&ctx, &mut sender, (col_w, col_h), num)?;
|
||||
} else {
|
||||
let mut sender = sender.clone();
|
||||
thread::spawn(move || {
|
||||
render_ctx_to_png(&ctx, &mut sender, (col_w, col_h), num)
|
||||
});
|
||||
}
|
||||
let cap = (ctx.pixmap.width()
|
||||
* ctx.pixmap.height() * u32::from(ctx.pixmap.n()))
|
||||
as usize;
|
||||
let mut pixels = Vec::with_capacity(cap);
|
||||
if let Err(e) = ctx.pixmap.write_to(&mut pixels, mupdf::ImageFormat::PNM) {
|
||||
sender.send(Err(RenderError::Doc(e)))?;
|
||||
continue;
|
||||
};
|
||||
|
||||
sender.send(Ok(RenderInfo::Page(PageInfo {
|
||||
img_data: ImageData {
|
||||
pixels,
|
||||
cell_area: Rect {
|
||||
x: 0,
|
||||
y: 0,
|
||||
width: (ctx.surface_w / f32::from(col_w)) as u16,
|
||||
height: (ctx.surface_h / f32::from(col_h)) as u16
|
||||
}
|
||||
},
|
||||
page_num: num,
|
||||
result_rects: ctx.result_rects
|
||||
})))?;
|
||||
}
|
||||
// And if we got an error, then obviously we need to propagate that
|
||||
Err(e) => sender.send(Err(RenderError::Render(e)))?
|
||||
Err(e) => sender.send(Err(RenderError::Doc(e)))?
|
||||
}
|
||||
}
|
||||
|
||||
@@ -295,34 +309,37 @@ pub fn start_rendering(
|
||||
}
|
||||
|
||||
struct RenderedContext {
|
||||
surface: Surface,
|
||||
num_results: usize,
|
||||
surface_width: f64,
|
||||
surface_height: f64
|
||||
pixmap: Pixmap,
|
||||
surface_w: f32,
|
||||
surface_h: f32,
|
||||
result_rects: Vec<HighlightRect>
|
||||
}
|
||||
|
||||
/// SAFETY: I think this is safe because, although the backing struct for `Surface` does contain
|
||||
/// pointers to like the cairo_backend_t struct that all the cairo stuff is using, that struct is
|
||||
/// basically just a vtable, so accessing it from multiple threads *should* be safe since we're
|
||||
/// just calling the same functions with different data. The only other thing it holds reference to
|
||||
/// is a `cairo_device_t`, but that seems to be thread-safe because it's managed through ref counts
|
||||
/// and a mutex. Also, as far as I can tell from reading the source code, write_to_png_stream (the
|
||||
/// only function we call on this struct) doesn't access the device at all, so we should be fine
|
||||
/// there.
|
||||
/// We want this to be Send so that we can delegate the png writing to a separate thread (since
|
||||
/// that's the thing that takes the most time, by far, in this app).
|
||||
unsafe impl Send for RenderedContext {}
|
||||
|
||||
fn render_single_page_to_ctx(
|
||||
page: &Page,
|
||||
search_term: Option<&str>,
|
||||
already_rendered_no_results: bool,
|
||||
(area_w, area_h): (f64, f64)
|
||||
) -> Result<Option<RenderedContext>, String> {
|
||||
let mut result_rects = search_term
|
||||
.as_ref()
|
||||
.map(|term| page.find_text_with_options(term, FindFlags::DEFAULT | FindFlags::MULTILINE))
|
||||
.unwrap_or_default();
|
||||
(area_w, area_h): (f32, f32)
|
||||
) -> Result<Option<RenderedContext>, mupdf::error::Error> {
|
||||
let mut max_hits = 10;
|
||||
let result_rects = loop {
|
||||
let rects = search_term
|
||||
.as_ref()
|
||||
// mupdf allocates a buffer of the size we give it to try to fill it with results. If we
|
||||
// pass in u32::MAX, it allocates too much memory to function. If we pass too small of a
|
||||
// number in, we may miss out on some of the results. Ideally, we'd like to make a better
|
||||
// interface than this, but we're stuck with this kinda ugly looping until we make sure
|
||||
// that we've found every instance of it on this page.
|
||||
.map(|term| page.search(term, max_hits))
|
||||
.transpose()?
|
||||
.unwrap_or_default();
|
||||
|
||||
if rects.len() < (max_hits as usize) {
|
||||
break rects;
|
||||
}
|
||||
|
||||
max_hits *= 2;
|
||||
};
|
||||
|
||||
// If there are no search terms on this page, and we've already rendered it with no search
|
||||
// terms, then just return none to avoid this computation
|
||||
@@ -331,7 +348,8 @@ fn render_single_page_to_ctx(
|
||||
}
|
||||
|
||||
// then, get the size of the page
|
||||
let (p_width, p_height) = page.size();
|
||||
let bounds = page.bounds()?;
|
||||
let (p_width, p_height) = (bounds.x1 - bounds.x0, bounds.y1 - bounds.y0);
|
||||
|
||||
// and get its aspect ratio
|
||||
let p_aspect_ratio = p_width / p_height;
|
||||
@@ -353,93 +371,47 @@ fn render_single_page_to_ctx(
|
||||
area_h / p_height
|
||||
};
|
||||
|
||||
let surface_width = p_width * scale_factor;
|
||||
let surface_height = p_height * scale_factor;
|
||||
let surface_w = p_width * scale_factor;
|
||||
let surface_h = p_height * scale_factor;
|
||||
|
||||
let surface = cairo::ImageSurface::create(
|
||||
Format::Rgb16_565,
|
||||
// No matter how big you make these arguments, the image will be drawn at the same
|
||||
// size. So if you make them really big, the image will be drawn on a quarter of it. If
|
||||
// you make them really small, the image will cover more than all of the surface.
|
||||
//
|
||||
// However, that only stands as long as you don't scale the context that you place this
|
||||
// surface into. If you scale the dimensions of this image by n, then scale the context
|
||||
// by that same amount, then it'll still fit perfectly into the context, but be
|
||||
// rendered at higher quality.
|
||||
surface_width as i32,
|
||||
surface_height as i32
|
||||
)
|
||||
.map_err(|e| format!("Couldn't create ImageSurface: {e}"))?;
|
||||
surface.set_device_scale(scale_factor, scale_factor);
|
||||
let colorspace = Colorspace::device_rgb();
|
||||
let matrix = Matrix::new_scale(scale_factor, scale_factor);
|
||||
|
||||
let ctx = Context::new(surface).map_err(|e| format!("Couldn't create Context: {e}"))?;
|
||||
let mut pixmap = page.to_pixmap(&matrix, &colorspace, 0.0, false)?;
|
||||
|
||||
// The default background color of PDFs (at least, I think) is white, so we need to set
|
||||
// that as the background color, then paint, then render.
|
||||
ctx.set_source_rgba(1.0, 1.0, 1.0, 1.0);
|
||||
let (x_res, y_res) = pixmap.resolution();
|
||||
let new_x = (x_res as f32 * scale_factor) as i32;
|
||||
let new_y = (y_res as f32 * scale_factor) as i32;
|
||||
pixmap.set_resolution(new_x, new_y);
|
||||
|
||||
ctx.set_antialias(Antialias::None);
|
||||
ctx.paint()
|
||||
.map_err(|e| format!("Couldn't paint Context: {e}"))?;
|
||||
page.render(&ctx);
|
||||
|
||||
let num_results = result_rects.len();
|
||||
|
||||
if !result_rects.is_empty() {
|
||||
let mut highlight_color = Color::new();
|
||||
highlight_color.set_red((u16::MAX / 5) * 4);
|
||||
highlight_color.set_green((u16::MAX / 5) * 4);
|
||||
|
||||
let mut old_rect = Rectangle::new();
|
||||
for rect in &mut result_rects {
|
||||
// According to https://gitlab.freedesktop.org/poppler/poppler/-/issues/763, these rects
|
||||
// need to be corrected since they use different references as the y-coordinate base
|
||||
rect.set_y1(p_height - rect.y1());
|
||||
rect.set_y2(p_height - rect.y2());
|
||||
|
||||
page.render_selection(
|
||||
&ctx,
|
||||
rect,
|
||||
&mut old_rect,
|
||||
SelectionStyle::Glyph,
|
||||
&mut Color::new(),
|
||||
&mut highlight_color
|
||||
);
|
||||
}
|
||||
}
|
||||
let result_rects = result_rects
|
||||
.into_iter()
|
||||
.map(|quad| {
|
||||
let ul_x = (quad.ul.x * scale_factor) as u32;
|
||||
let ul_y = (quad.ul.y * scale_factor) as u32;
|
||||
let lr_x = (quad.lr.x * scale_factor) as u32;
|
||||
let lr_y = (quad.lr.y * scale_factor) as u32;
|
||||
HighlightRect {
|
||||
ul_x,
|
||||
ul_y,
|
||||
lr_x,
|
||||
lr_y
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
Ok(Some(RenderedContext {
|
||||
surface: ctx.target(),
|
||||
num_results,
|
||||
surface_width,
|
||||
surface_height
|
||||
pixmap,
|
||||
surface_w,
|
||||
surface_h,
|
||||
result_rects
|
||||
}))
|
||||
}
|
||||
|
||||
fn render_ctx_to_png(
|
||||
ctx: &RenderedContext,
|
||||
sender: &mut Sender<Result<RenderInfo, RenderError>>,
|
||||
(col_w, col_h): (u16, u16),
|
||||
page: usize
|
||||
) -> Result<(), SendError<Result<RenderInfo, RenderError>>> {
|
||||
let mut img_data = Vec::with_capacity((ctx.surface_height * ctx.surface_width) as usize);
|
||||
|
||||
match ctx.surface.write_to_png(&mut img_data) {
|
||||
Err(e) => sender.send(Err(RenderError::Render(format!(
|
||||
"Couldn't write surface to png: {e}"
|
||||
)))),
|
||||
Ok(()) => sender.send(Ok(RenderInfo::Page(PageInfo {
|
||||
img_data: ImageData {
|
||||
data: img_data,
|
||||
area: Rect {
|
||||
width: ctx.surface_width as u16 / col_w,
|
||||
height: ctx.surface_height as u16 / col_h,
|
||||
x: 0,
|
||||
y: 0
|
||||
}
|
||||
},
|
||||
page,
|
||||
search_results: ctx.num_results
|
||||
})))
|
||||
}
|
||||
#[derive(Clone)]
|
||||
pub struct HighlightRect {
|
||||
pub ul_x: u32,
|
||||
pub ul_y: u32,
|
||||
pub lr_x: u32,
|
||||
pub lr_y: u32
|
||||
}
|
||||
|
||||
+2
-2
@@ -569,8 +569,8 @@ impl Tui {
|
||||
pub fn show_error(&mut self, err: RenderError) {
|
||||
self.set_msg(MessageSetting::Some(BottomMessage::Error(match err {
|
||||
RenderError::Notify(e) => format!("Auto-reload failed: {e}"),
|
||||
RenderError::Doc(e) => format!("Couldn't open document: {e}"),
|
||||
RenderError::Render(e) => format!("Couldn't render page: {e}")
|
||||
RenderError::Doc(e) => format!("Couldn't process document: {e}"),
|
||||
RenderError::Converting(e) => format!("Couldn't convert page after rendering: {e}")
|
||||
})));
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user