Compare commits

..

11 Commits

5 changed files with 1236 additions and 91 deletions

859
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -1,20 +1,20 @@
[package] [package]
name = "md2pdf" name = "md2pdf"
version = "0.0.1" version = "0.0.3"
authors = ["Thomas Forgione <thomas@forgione.fr>"] authors = ["Thomas Forgione <thomas@forgione.fr>"]
description = "A small utility to convert markdown files to pdf exploiting tectonic." description = "A small utility to convert markdown files to pdf exploiting tectonic."
edition = "2021" edition = "2021"
license = "MIT" license = "MIT"
repository = "https://gitea.tforgione.fr/tforgione/md2pdf" repository = "https://gitea.tforgione.fr/tforgione/md2pdf"
readme = "README.md" readme = "README.md"
cargo = "1.64"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies] [dependencies]
tectonic = "0.5.2" tectonic = "0.9"
pulldown-cmark = "0.5.3" pulldown-cmark = "0.5.3"
clap = { version = "4.0.7", features = ["cargo"] } clap = { version = "4.0.7", features = ["cargo"] }
convert_case = "0.6.0"
[lib] [lib]
name = "md2pdf" name = "md2pdf"
@ -23,3 +23,7 @@ path = "src/lib.rs"
[[bin]] [[bin]]
name = "md2pdf" name = "md2pdf"
path = "src/main.rs" path = "src/main.rs"
[dev-dependencies]
pretty_assertions = "1.3.0"
lopdf = "0.27.0"

View File

@ -5,7 +5,7 @@
### Usage ### Usage
``` ```sh
md2pdf -i input.md -o output.pdf md2pdf -i input.md -o output.pdf
``` ```

View File

@ -1,6 +1,7 @@
use convert_case::{Case, Casing};
use pulldown_cmark::{Event, Parser, Tag}; use pulldown_cmark::{Event, Parser, Tag};
pub const LATEX_HEADER:&str = r#"\documentclass{scrartcl} pub const LATEX_HEADER: &str = r#"\documentclass{scrartcl}
\usepackage{graphicx} \usepackage{graphicx}
\usepackage{hyperref} \usepackage{hyperref}
\usepackage{listings} \usepackage{listings}
@ -41,26 +42,83 @@ pub const LATEX_HEADER:&str = r#"\documentclass{scrartcl}
pub const LATEX_FOOTER: &str = "\n\\end{document}\n"; pub const LATEX_FOOTER: &str = "\n\\end{document}\n";
/// Used to keep track of current pulldown_cmark "event".
/// TODO: Is there a native pulldown_cmark method to do this?
#[derive(Debug)]
enum EventType {
//Code,
Emphasis,
Header,
//Html,
Strong,
Table,
TableHead,
Text,
}
struct CurrentType {
event_type: EventType,
}
/**
* Part of this function is Copyright Liam Beckman <liam@liambeckman.com> (license: MPL-2.0)
* Source: https://github.com/lbeckman314/md2tex/blob/25fa878ccce122c224c24659ee1c1dd30c8a5d51/src/lib.rs
*
*/
pub fn markdown_to_latex(markdown: String) -> String { pub fn markdown_to_latex(markdown: String) -> String {
let mut output = String::from(LATEX_HEADER); let mut output = String::from(LATEX_HEADER);
let parser = Parser::new(&markdown); let parser = Parser::new(&markdown);
let mut header_value = String::new();
let mut current: CurrentType = CurrentType {
event_type: EventType::Text,
};
let mut cells = 0;
let mut equation_mode = false;
let mut buffer = String::new();
for event in parser { for event in parser {
match event { match event {
Event::Start(Tag::Header(level)) => { Event::Start(Tag::Header(level)) => {
current.event_type = EventType::Header;
output.push_str("\n");
output.push_str("\\"); output.push_str("\\");
for _ in 1 .. level { match level {
output.push_str("sub"); -1 => output.push_str("part{"),
0 => output.push_str("chapter{"),
1 => output.push_str("section{"),
2 => output.push_str("subsection{"),
3 => output.push_str("subsubsection{"),
4 => output.push_str("paragraph{"),
5 => output.push_str("subparagraph{"),
_ => eprintln!("header is out of range."),
} }
output.push_str("section{"); }
}, Event::End(Tag::Header(_)) => {
Event::End(Tag::Header(_)) => output.push_str("}\n"), output.push_str("}\n");
output.push_str("\\");
output.push_str("label{");
output.push_str(&header_value);
output.push_str("}\n");
Event::Start(Tag::Emphasis) => output.push_str("\\emph{"), output.push_str("\\");
output.push_str("label{");
output.push_str(&header_value.to_case(Case::Kebab));
output.push_str("}\n");
}
Event::Start(Tag::Emphasis) => {
current.event_type = EventType::Emphasis;
output.push_str("\\emph{");
}
Event::End(Tag::Emphasis) => output.push_str("}"), Event::End(Tag::Emphasis) => output.push_str("}"),
Event::Start(Tag::Strong) => output.push_str("\\textbf{"), Event::Start(Tag::Strong) => {
current.event_type = EventType::Strong;
output.push_str("\\textbf{");
}
Event::End(Tag::Strong) => output.push_str("}"), Event::End(Tag::Strong) => output.push_str("}"),
Event::Start(Tag::List(None)) => output.push_str("\\begin{itemize}\n"), Event::Start(Tag::List(None)) => output.push_str("\\begin{itemize}\n"),
@ -69,16 +127,116 @@ pub fn markdown_to_latex(markdown: String) -> String {
Event::Start(Tag::List(Some(_))) => output.push_str("\\begin{enumerate}\n"), Event::Start(Tag::List(Some(_))) => output.push_str("\\begin{enumerate}\n"),
Event::End(Tag::List(Some(_))) => output.push_str("\\end{enumerate}\n"), Event::End(Tag::List(Some(_))) => output.push_str("\\end{enumerate}\n"),
Event::Start(Tag::Paragraph) => {
output.push_str("\n");
}
Event::End(Tag::Paragraph) => {
// ~ adds a space to prevent
// "There's no line here to end" error on empty lines.
output.push_str(r"~\\");
output.push_str("\n");
}
Event::Start(Tag::Link(_, url, _)) => { Event::Start(Tag::Link(_, url, _)) => {
output.push_str("\\href{"); output.push_str("\\href{");
output.push_str(&*url); output.push_str(&*url);
output.push_str("}{"); output.push_str("}{");
}, }
Event::End(Tag::Link(_, _, _)) => { Event::End(Tag::Link(_, _, _)) => {
output.push_str("}"); output.push_str("}");
}
Event::Start(Tag::Table(_)) => {
current.event_type = EventType::Table;
let table_start = vec![
"\n",
r"\begingroup",
r"\setlength{\LTleft}{-20cm plus -1fill}",
r"\setlength{\LTright}{\LTleft}",
r"\begin{longtable}{!!!}",
r"\hline",
r"\hline",
"\n",
];
for element in table_start {
output.push_str(element);
output.push_str("\n");
}
}
Event::Start(Tag::TableHead) => {
current.event_type = EventType::TableHead;
}
Event::End(Tag::TableHead) => {
output.truncate(output.len() - 2);
output.push_str(r"\\");
output.push_str("\n");
output.push_str(r"\hline");
output.push_str("\n");
// we presume that a table follows every table head.
current.event_type = EventType::Table;
}
Event::End(Tag::Table(_)) => {
let table_end = vec![
r"\arrayrulecolor{black}\hline",
r"\end{longtable}",
r"\endgroup",
"\n",
];
for element in table_end {
output.push_str(element);
output.push_str("\n");
}
let mut cols = String::new();
for _i in 0..cells {
cols.push_str(&format!(
r"C{{{width}\textwidth}} ",
width = 1. / cells as f64
));
}
output = output.replace("!!!", &cols);
cells = 0;
current.event_type = EventType::Text;
}
Event::Start(Tag::TableCell) => match current.event_type {
EventType::TableHead => {
output.push_str(r"\bfseries{");
}
_ => (),
}, },
Event::End(Tag::TableCell) => {
match current.event_type {
EventType::TableHead => {
output.push_str(r"}");
cells += 1;
}
_ => (),
}
output.push_str(" & ");
}
Event::Start(Tag::TableRow) => {
current.event_type = EventType::Table;
}
Event::End(Tag::TableRow) => {
output.truncate(output.len() - 2);
output.push_str(r"\\");
output.push_str(r"\arrayrulecolor{lightgray}\hline");
output.push_str("\n");
}
Event::Start(Tag::Image(_, path, title)) => { Event::Start(Tag::Image(_, path, title)) => {
output.push_str("\\begin{figure}\n"); output.push_str("\\begin{figure}\n");
output.push_str("\\centering\n"); output.push_str("\\centering\n");
@ -88,32 +246,105 @@ pub fn markdown_to_latex(markdown: String) -> String {
output.push_str("\\caption{"); output.push_str("\\caption{");
output.push_str(&*title); output.push_str(&*title);
output.push_str("}\n\\end{figure}\n"); output.push_str("}\n\\end{figure}\n");
}, }
Event::Start(Tag::Item) => output.push_str("\\item "), Event::Start(Tag::Item) => output.push_str("\\item "),
Event::End(Tag::Item) => output.push_str("\n"), Event::End(Tag::Item) => output.push_str("\n"),
Event::Start(Tag::CodeBlock(lang)) => { Event::Start(Tag::CodeBlock(lang)) => {
if ! lang.is_empty() { if !lang.is_empty() {
output.push_str("\\begin{lstlisting}[language="); output.push_str("\\begin{lstlisting}[language=");
output.push_str(&*lang); output.push_str(&*lang);
output.push_str("]\n"); output.push_str("]\n");
} else { } else {
output.push_str("\\begin{lstlisting}\n"); output.push_str("\\begin{lstlisting}\n");
} }
}, }
Event::End(Tag::CodeBlock(_)) => { Event::End(Tag::CodeBlock(_)) => {
output.push_str("\n\\end{lstlisting}\n"); output.push_str("\n\\end{lstlisting}\n");
}, current.event_type = EventType::Text;
}
Event::Code(t) => {
output.push_str("\\lstinline|");
match current.event_type {
EventType::Header => output
.push_str(&*t.replace("#", r"\#").replace("", "...").replace("З", "3")),
_ => output
.push_str(&*t.replace("", "...").replace("З", "3").replace("<EFBFBD>", r"\<5C>")),
}
output.push_str("|");
}
Event::Text(t) => { Event::Text(t) => {
// if "\(" or "\[" are encountered, then begin equation
// and don't replace any characters.
let delim_start = vec![r"\(", r"\["];
let delim_end = vec![r"\)", r"\]"];
if buffer.len() > 100 {
buffer.clear();
}
buffer.push_str(&t.clone().into_string());
match current.event_type {
EventType::Strong
| EventType::Emphasis
| EventType::Text
| EventType::Header
| EventType::Table => {
// TODO more elegant way to do ordered `replace`s (structs?).
if delim_start
.into_iter()
.any(|element| buffer.contains(element))
{
let popped = output.pop().unwrap();
if popped != '\\' {
output.push(popped);
}
output.push_str(&*t); output.push_str(&*t);
}, equation_mode = true;
} else if delim_end
.into_iter()
.any(|element| buffer.contains(element))
|| equation_mode == true
{
let popped = output.pop().unwrap();
if popped != '\\' {
output.push(popped);
}
output.push_str(&*t);
equation_mode = false;
} else {
output.push_str(
&*t.replace(r"\", r"\\")
.replace("&", r"\&")
.replace(r"\s", r"\textbackslash{}s")
.replace(r"\w", r"\textbackslash{}w")
.replace("_", r"\_")
.replace(r"\<", "<")
.replace(r"%", r"\%")
.replace(r"$", r"\$")
.replace(r"—", "---")
.replace("#", r"\#"),
);
}
header_value = t.into_string();
}
_ => output.push_str(&*t),
}
}
Event::SoftBreak => { Event::SoftBreak => {
output.push('\n'); output.push('\n');
}, }
Event::HardBreak => {
output.push_str(r"\\");
output.push('\n');
}
_ => (), _ => (),
} }
@ -128,3 +359,112 @@ pub fn markdown_to_pdf(markdown: String) -> Result<Vec<u8>, tectonic::Error> {
tectonic::latex_to_pdf(markdown_to_latex(markdown)) tectonic::latex_to_pdf(markdown_to_latex(markdown))
} }
#[cfg(test)]
mod tests {
use super::{markdown_to_latex, markdown_to_pdf};
use lopdf::Document;
use pretty_assertions::assert_eq;
use std::io::Cursor;
const MARKDOWN_IN: &str = r#"# First title
Some content
## Second level
Text
[link](https://example.com)
**Bold**
__Italic__
some code:
```sh
sudo make-it-work
```
issue [#12345](https://example.com)
"#;
const LATEXT_OUT: &str = r#"\documentclass{scrartcl}
\usepackage{graphicx}
\usepackage{hyperref}
\usepackage{listings}
\usepackage{xcolor}
\definecolor{colKeys}{rgb}{0,0.5,0}
\definecolor{colIdentifier}{rgb}{0,0,0}
\definecolor{colComments}{rgb}{0,0.5,1}
\definecolor{colString}{rgb}{0.6,0.1,0.1}
\definecolor{colBackground}{rgb}{0.95,0.95,1}
\lstset{%configuration de listings
float=hbp,%
basicstyle=\ttfamily\small,%
%
identifierstyle=\color{colIdentifier}, %
keywordstyle=\color{colKeys}, %
stringstyle=\color{colString}, %
commentstyle=\color{colComments}\textit, %
%
backgroundcolor=\color{colBackground},%
%
columns=flexible, %
tabsize=2, %
frame=trbl, %
%frameround=tttt,%
extendedchars=true, %
showspaces=false, %
showstringspaces=false, %
numbers=left, %
numberstyle=\tiny, %
breaklines=true, %
breakautoindent=true, %
captionpos=b,%
xrightmargin=0.2cm, %
xleftmargin=0.2cm
}
\begin{document}
\section{First title}
\label{First title}
\label{first-title}
Some content~\\
\subsection{Second level}
\label{Second level}
\label{second-level}
Text
\href{https://example.com}{link}
\textbf{Bold}
\textbf{Italic}~\\
some code:~\\
\begin{lstlisting}[language=sh]
sudo make-it-work
\end{lstlisting}
issue \href{https://example.com}{\#12345}~\\
\end{document}
"#;
#[test]
fn test_md_to_latex() {
let output = markdown_to_latex(MARKDOWN_IN.to_string());
assert_eq!(LATEXT_OUT, output);
}
#[test]
fn test_latex_to_pdf() {
let output = markdown_to_pdf(MARKDOWN_IN.to_string());
match output {
Ok(data) => {
let mut file = Cursor::new(data);
match Document::load_from(&mut file) {
Ok(doc) => {
assert_eq!("1.5", doc.version);
}
Err(_) => assert!(true),
}
}
Err(_) => assert!(true),
}
}
}

View File

@ -1,10 +1,10 @@
use std::ffi::OsStr;
use std::fs::File; use std::fs::File;
use std::io::{Read, Write}; use std::io::{Read, Write};
use std::process::exit;
use std::path::PathBuf; use std::path::PathBuf;
use std::ffi::OsStr; use std::process::exit;
use clap::{crate_authors, crate_description, crate_name, crate_version, Command, Arg}; use clap::{crate_authors, crate_description, crate_name, crate_version, Arg, Command};
use md2pdf::{markdown_to_latex, markdown_to_pdf}; use md2pdf::{markdown_to_latex, markdown_to_pdf};
@ -17,36 +17,40 @@ macro_rules! unwrap {
exit(1); exit(1);
} }
} }
} };
} }
fn main() { fn main() {
let matches = Command::new(crate_name!()) let matches = Command::new(crate_name!())
.bin_name(crate_name!()) .bin_name(crate_name!())
.version(crate_version!()) .version(crate_version!())
.author(crate_authors!("\n")) .author(crate_authors!("\n"))
.about(crate_description!()) .about(crate_description!())
.arg(Arg::new("INPUT") .arg(
Arg::new("INPUT")
.long("input") .long("input")
.short('i') .short('i')
.help("Input markdown files") .help("Input markdown files")
.required(true) .required(true)
.value_parser(clap::value_parser!(PathBuf)) .value_parser(clap::value_parser!(PathBuf)),
) )
.arg(Arg::new("OUTPUT") .arg(
Arg::new("OUTPUT")
.long("output") .long("output")
.short('o') .short('o')
.help("Output tex or pdf file") .help("Output tex or pdf file")
.required(true) .required(true)
.value_parser(clap::value_parser!(PathBuf)) .value_parser(clap::value_parser!(PathBuf)),
) )
.get_matches(); .get_matches();
let input_path = matches.get_one::<PathBuf>("INPUT").unwrap(); let input_path = matches.get_one::<PathBuf>("INPUT").unwrap();
let mut content = String::new(); let mut content = String::new();
let mut input = unwrap!(File::open(input_path), "couldn't open input file"); let mut input = unwrap!(File::open(input_path), "couldn't open input file");
unwrap!(input.read_to_string(&mut content), "couldn't read file content"); unwrap!(
input.read_to_string(&mut content),
"couldn't read file content"
);
let output_path = matches.get_one::<PathBuf>("OUTPUT").unwrap(); let output_path = matches.get_one::<PathBuf>("OUTPUT").unwrap();
let output_path_ext = output_path.extension().and_then(OsStr::to_str); let output_path_ext = output_path.extension().and_then(OsStr::to_str);
@ -56,37 +60,33 @@ fn main() {
Some("tex") => { Some("tex") => {
let tex = markdown_to_latex(content); let tex = markdown_to_latex(content);
unwrap!(output.write(tex.as_bytes()), "couldn't write output file"); unwrap!(output.write(tex.as_bytes()), "couldn't write output file");
}, }
Some("pdf") => { Some("pdf") => match markdown_to_pdf(content) {
match markdown_to_pdf(content) { Ok(data) => match output.write(&data) {
Ok(data) => {
match output.write(&data) {
Ok(_) => { Ok(_) => {
exit(0); exit(0);
}, }
Err(error) => { Err(error) => {
eprintln!( eprintln!("error while writing file: {}", error);
"error while writing file: {}", error
);
exit(1); exit(1);
},
} }
}, },
Err(error) => { Err(error) => {
eprintln!( eprintln!("error while compiling latex: {}", error.description());
"error while compiling latex: {}", error.description()
);
exit(1); exit(1);
} }
}
}, },
Some(ext) => { Some(ext) => {
eprintln!("unknown file format ({}) for output: {}", ext, output_path.display()); eprintln!(
"unknown file format ({}) for output: {}",
ext,
output_path.display()
);
exit(1); exit(1);
}, }
None => { None => {
eprintln!("unknown file format for output: {}", output_path.display()); eprintln!("unknown file format for output: {}", output_path.display());
exit(1); exit(1);
}, }
} }
} }