Files
historian/src/lib.rs
Gregory Marco 3ec4c11219
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
Add tag search.
2025-08-31 01:52:25 -05:00

811 lines
28 KiB
Rust

static DEFAULT_INDEX_FILENAME: &'static str = "README.md";
static DEFAULT_TEMPLATES_PATH: &'static str = "templates";
static DEFAULT_TOML_FILENAME: &'static str = "Historian.toml";
static MD_EXTENSION: &'static str = ".md";
extern crate pulldown_cmark;
extern crate tera;
extern crate serde;
extern crate git2;
extern crate regex;
extern crate pathdiff;
extern crate chrono;
extern crate grep;
#[macro_use] extern crate toml;
use std::fs;
use std::path::{Path, PathBuf};
use std::io::{Read, Write};
use std::collections::{HashMap, VecDeque};
use std::convert::From;
use std::sync::Mutex;
use std::ffi::OsStr;
use git2::{Repository, Signature, Commit};
use rocket::form::validate::Contains;
use tera::Tera;
use toml::value::Table;
use serde::Serialize;
use regex::Regex;
use pathdiff::diff_paths;
use chrono::{DateTime, MappedLocalTime, TimeZone, Local};
use pulldown_cmark::{Event, Tag, TagEnd};
pub struct Historian {
source_root: PathBuf,
index_filename: String,
pub repository: Option<Mutex<Repository>>,
pub site_config: Table
}
impl Historian {
pub fn new(root: String) -> Historian {
let source_root: PathBuf = root.into();
let toml_path = source_root.join(DEFAULT_TOML_FILENAME);
let site_config = match fs::metadata(&toml_path) {
Err(_) => Table::new(),
Ok(_) => {
let toml_data = fs::read_to_string(&toml_path).unwrap();
toml::de::from_str(&toml_data).unwrap()
}
};
Historian {
source_root: source_root.clone(),
index_filename: DEFAULT_INDEX_FILENAME.to_owned(),
repository: Repository::open(source_root).ok().map(Mutex::new),
site_config
}
}
pub fn resolve_from_file(file_path_str: &str) -> Option<(Historian, Page)> {
let file_path: PathBuf = std::path::absolute(file_path_str).unwrap();
match fs::metadata(&file_path) {
Err(_) => None,
Ok(metadata) => {
if !metadata.is_file() {
None
} else {
let mut wiki_path = file_path.parent().unwrap();
while wiki_path.parent() != None {
let toml_path = wiki_path.join(DEFAULT_TOML_FILENAME);
match fs::metadata(&toml_path) {
Err(_) => {
wiki_path = wiki_path.parent().unwrap();
},
Ok(_) => {
let historian = Historian::new(wiki_path.to_str().unwrap().to_owned());
let relative_page_path = diff_paths(&file_path, &wiki_path).unwrap();
return relative_page_path.to_str()
.and_then(|page_path| historian.resolve_to_page(page_path))
.map(|page| (historian, page));
}
};
}
None
}
}
}
}
pub fn resolve_to_page(&self, name: &str) -> Option<Page> {
let mut file_path = self.source_root.clone().join(Path::new(name));
match fs::metadata(&file_path) {
Err(_) => None,
Ok(metadata) => {
let mut children = vec![];
let mut attachments = vec![];
if metadata.is_dir() {
for entry in fs::read_dir(&file_path).unwrap() {
let entry_file = entry.unwrap();
let child = entry_file.file_name().into_string().unwrap();
let entry_metadata = entry_file.metadata().unwrap();
let entry_path = entry_file.path();
if entry_metadata.is_file() && !child.ends_with(MD_EXTENSION) {
attachments.push(child);
} else if !(child.starts_with(".") || child == self.index_filename || child == DEFAULT_TOML_FILENAME) {
let mut full_name = format!("{}/{}", name, child);
if full_name.starts_with('/') {
full_name.remove(0);
}
children.push(Child {
name: child.to_owned(),
full_name: full_name,
title: if entry_metadata.is_file() {
entry_path.file_stem().unwrap().to_str().unwrap().to_owned()
} else {
child.to_owned()
},
path: entry_path
});
}
}
file_path.push(&self.index_filename);
}
let mut split_path = name.rsplitn(2, "/");
let base_name = split_path.next().unwrap().to_owned();
let parent_page = if name != "" {
self.resolve_to_page(split_path.next().unwrap_or(""))
.map(Box::new)
} else {
None
};
let mut url = name.to_owned();
if url != "" && metadata.is_dir() {
url.push('/');
}
Some(Page {
title: if metadata.is_file() {
file_path.file_stem().unwrap().to_str().unwrap().to_owned()
} else {
base_name.to_owned()
},
full_name: name.to_owned(),
name: base_name,
url,
parent: parent_page,
path: file_path,
is_directory: metadata.is_dir(),
children,
attachments
})
}
}
}
pub fn resolve_to_attachment(&self, name: &str) -> Option<PathBuf> {
if name.ends_with(MD_EXTENSION) {
return None;
}
let file_path = self.source_root.clone().join(Path::new(name));
match fs::metadata(&file_path) {
Err(_) => None,
Ok(metadata) => {
if metadata.is_dir() {
None
} else {
Some(file_path)
}
}
}
}
// Creates a struct for a new page.
// The page is not saved until an edit is submitted.
pub fn start_page(&self, name: &str) -> Page {
let mut file_path = self.source_root.clone().join(Path::new(name));
let is_directory = file_path.extension().is_none();
let mut url = name.to_owned();
if is_directory {
file_path.push(&self.index_filename);
url.push('/');
}
let mut split_path = name.rsplitn(2, "/");
let base_name = split_path.next().unwrap().to_owned();
Page {
title: name.to_owned(),
full_name: name.to_owned(),
name: base_name,
url,
parent: None,
path: file_path,
is_directory,
children: vec![],
attachments: vec![]
}
}
pub fn submit_edit(&self, page: &Page, change: &Edit) {
// Create parent directories if necessary
if let Some(directory) = &page.path.parent() {
fs::create_dir_all(directory);
}
// write contents of file
let mut page_html_file = fs::File::create(&page.path).unwrap();
page_html_file.write_all(change.content.as_bytes()).unwrap();
// commit file to git repository
if let Some(repository_mutex) = &self.repository {
if let Ok(repository) = repository_mutex.lock() {
// add file to index
let mut index = repository.index().unwrap();
index.add_path(&page.path.strip_prefix(&self.source_root).unwrap()).unwrap();
index.write().unwrap();
let tree_oid = index.write_tree().unwrap();
let tree = repository.find_tree(tree_oid).unwrap();
// find parent commit
let parent_commit = if let Ok(commit) = repository.revparse_single("HEAD") {
commit.into_commit().ok()
} else {
None
};
let mut parents = Vec::new();
if parent_commit.is_some() {
parents.push(parent_commit.as_ref().unwrap());
}
// create commit
let signature = Signature::now(change.author.as_deref().unwrap_or("Historian"), "historian@local").unwrap();
repository.commit(
Some("HEAD"),
&signature,
&signature,
&change.summary,
&tree,
&parents[..],
).unwrap();
}
}
}
pub fn get_history_of_page(&self, page: &Page) -> Vec<Revision> {
let mut changes = Vec::new();
let page_path_in_repo = &page.path.strip_prefix(&self.source_root).unwrap();
if let Some(repository_mutex) = &self.repository {
if let Ok(repository) = repository_mutex.lock() {
let mut revwalk = repository.revwalk().unwrap();
revwalk.push_head().unwrap();
for result in revwalk {
if let Ok(rev) = result {
let commit = repository.find_commit(rev).unwrap();
if commit_includes_file(&repository, &commit, page_path_in_repo) {
changes.push(Revision {
id: commit.id().to_string(),
author: commit.author().name().unwrap().to_owned(),
summary: commit.summary().unwrap().to_owned(),
datetime: Local.timestamp_opt(commit.time().seconds(), 0).unwrap()
});
}
}
}
}
}
changes
}
pub fn get_revision_by_id(&self, id: &str) -> Option<Revision> {
if let Some(repository_mutex) = &self.repository {
if let Ok(repository) = repository_mutex.lock() {
let object = repository.revparse_single(id).ok()?;
let commit = object.as_commit()?;
return Some(Revision {
id: commit.id().to_string(),
author: commit.author().name().unwrap().to_owned(),
summary: commit.summary().unwrap().to_owned(),
datetime: Local.timestamp_opt(commit.time().seconds(), 0).unwrap()
});
}
}
None
}
// ref: https://github.com/rust-lang/git2-rs/issues/996
pub fn get_page_text_of_revision(&self, page: &Page, revision: &Revision) -> Option<String> {
let page_path_in_repo = &page.path.strip_prefix(&self.source_root).ok()?;
if let Some(repository_mutex) = &self.repository {
if let Ok(repository) = repository_mutex.lock() {
let object = repository.revparse_single(&revision.id).ok()?;
let tree = object.peel_to_tree().ok()?;
let rev_path = tree.get_path(page_path_in_repo).ok()?;
let path_object = rev_path.to_object(&repository).ok()?;
let blob = path_object.into_blob().ok()?;
return Some(std::str::from_utf8(blob.content()).ok()?.to_owned());
}
}
None
}
}
fn commit_includes_file(repository: &Repository, commit: &Commit, path: &Path) -> bool {
if commit.parent_count() != 1 {
return false;
}
let prev_commit = commit.parent(0).unwrap();
let tree = commit.tree().unwrap();
let prev_tree = prev_commit.tree().unwrap();
let diff = repository.diff_tree_to_tree(Some(&prev_tree), Some(&tree), None).unwrap();
for delta in diff.deltas() {
let file_path = delta.new_file().path().unwrap();
if file_path == path {
println!(" -- file path {:?}", file_path);
return true;
}
}
false
}
#[derive(Serialize)]
pub struct Page {
pub title: String,
pub full_name: String,
pub name: String,
pub path: PathBuf,
pub url: String,
pub is_directory: bool,
pub parent: Option<Box<Page>>,
pub children: Vec<Child>,
pub attachments: Vec<String>
}
#[derive(Serialize)]
pub struct Child {
pub title: String,
pub full_name: String,
pub name: String,
pub path: PathBuf
}
#[derive(Serialize)]
pub struct Edit {
pub author: Option<String>,
pub content: String,
pub summary: String
}
#[derive(Serialize)]
pub struct Revision {
pub id: String,
pub author: String,
pub summary: String,
pub datetime: DateTime<Local>
}
pub struct PageRenderer {
template_root: PathBuf,
tera: Tera
}
fn parse_markdown (content: &str, metadata: &mut Table) -> String {
let mut metadata_str = String::new();
let mut in_metadata = false;
let parser = pulldown_cmark::Parser::new_ext(content, pulldown_cmark::Options::ENABLE_YAML_STYLE_METADATA_BLOCKS).filter(|event| {
match &event {
Event::Start(tag) => {
if let Tag::MetadataBlock(_) = tag {
in_metadata = true;
}
}
Event::Text(text) => {
if in_metadata {
metadata_str.push_str(text);
}
}
Event::Html(html) => {
if html.starts_with("<!---") {
in_metadata = true;
} else if html.starts_with("--->") {
in_metadata = false;
return false;
} else if in_metadata {
metadata_str.push_str(html);
}
}
Event::End(tag) => {
if !metadata_str.is_empty() {
if let Ok(parsed_metadata) = toml::de::from_str::<Table>(&metadata_str) {
println!("{:?}", parsed_metadata);
for (key, value) in parsed_metadata {
metadata.insert(key, value);
}
}
metadata_str.clear();
in_metadata = false;
return false;
}
}
_ => {}
}
!in_metadata
});
let mut html_output = String::new();
pulldown_cmark::html::push_html(&mut html_output, parser);
html_output
}
fn render_markdown (content: &tera::Value, args: &HashMap<String, tera::Value>) -> tera::Result<tera::Value> {
Ok(tera::Value::String(parse_markdown(content.as_str().unwrap(), &mut Table::new())))
}
impl PageRenderer {
pub fn new() -> PageRenderer {
Self::with_template_path(DEFAULT_TEMPLATES_PATH)
}
pub fn with_template_path(template_path: &str) -> PageRenderer {
let mut tera = tera::Tera::new(&format!("{template_path}/**/*.html")).unwrap();
tera.register_filter("markdown", render_markdown);
PageRenderer {
template_root: template_path.into(),
tera
}
}
pub fn render_page(&self, historian: &Historian, page: &Page, options: &Table) -> String {
self.render_page_template("page.html", &historian, &page, &options)
}
pub fn render_page_template(&self, template: &str, historian: &Historian, page: &Page, options: &Table) -> String {
self.template(template)
.with_page(page)
.with_historian(historian)
.insert("options", options)
.render()
}
pub fn render_page_template_with_variables(&self, template: &str, historian: &Historian, page: &Page, variables: &Table) -> String {
self.template(template)
.with_page(page)
.with_historian(historian)
.insert_all(variables)
.render()
}
pub fn render_template(&self, template: &str, historian: &Historian, variables: &Table, options: &Table) -> String {
self.template(template)
.with_url("/")
.with_historian(historian)
.insert_all(variables)
.insert("options", options)
.render()
}
pub fn resource_names(&self) -> Vec<String> {
let mut children = vec![];
for entry in fs::read_dir(&self.template_root).unwrap() {
let child = entry.unwrap().file_name().into_string().unwrap();
if !child.ends_with(".html") {
children.push(child);
}
}
children
}
pub fn resolve_to_resource(&self, name: &str) -> Option<PathBuf> {
let file_path = self.template_root.clone().join(Path::new(name));
match fs::metadata(&file_path) {
Err(_) => None,
Ok(metadata) => {
if metadata.is_dir() {
None
} else {
Some(file_path)
}
}
}
}
pub fn template<'a>(&'a self, template: &'a str) -> Render<'a> {
Render::new(&self.tera, template)
}
}
pub struct Render<'a> {
tera: &'a Tera,
context: tera::Context,
template: &'a str
}
fn make_relative_root (url: &str) -> String {
let mut slash_count = 0;
for character in url.chars() {
if character == '/' {
slash_count = slash_count + 1;
}
}
"../".repeat(slash_count)
}
impl<'a> Render<'a> {
fn new(tera: &'a Tera, template: &'a str) -> Render<'a> {
Render {
tera,
context: tera::Context::new(),
template
}
}
pub fn insert<T: Serialize + ?Sized, S: Into<String>>(mut self, key: S, value: &'a T) -> Render {
self.context.insert(key, value);
self
}
pub fn insert_all(mut self, variables: &'a Table) -> Render {
for (key, value) in variables {
self.context.insert(key, value);
}
self
}
pub fn with_url(mut self, url: &'a str) -> Render {
self.context.insert("relative_root", &make_relative_root(url));
self
}
pub fn with_historian(mut self, historian: &'a Historian) -> Render {
self.context.insert("site", &historian.site_config);
self.context.insert("has_git", &historian.repository.is_some());
self
}
pub fn with_page(mut self, page: &'a Page) -> Render {
self.context.insert("page", &page);
let mut content = String::new();
if let Ok(mut file) = fs::File::open(&page.path) {
file.read_to_string(&mut content).unwrap();
}
self.context.insert("content", &content);
let mut metadata = Table::new();
let html_content = parse_markdown(&content, &mut metadata);
self.context.insert("html_content", &html_content);
self.context.insert("metadata", &metadata);
let mut ancestors: VecDeque<&Page> = VecDeque::new();
let mut ancestor = page.parent.as_ref();
while ancestor.is_some() {
ancestors.push_front(ancestor.unwrap().as_ref());
ancestor = ancestor.unwrap().parent.as_ref();
}
self.context.insert("ancestors", &ancestors);
let mut page_url = page.full_name.clone();
if page.full_name != "" && page.is_directory {
page_url.push('/');
}
self.context.insert("relative_root", &make_relative_root(&page.url));
self
}
pub fn render(self) -> String {
self.tera.render(&self.template, &self.context).unwrap()
}
}
impl From<Render<'_>> for String {
fn from(render: Render) -> Self {
render.render()
}
}
pub fn export_wiki(historian: &Historian, renderer: &PageRenderer, output_path: &str) {
fs::create_dir_all(&output_path).unwrap();
for resource_name in renderer.resource_names() {
let mut resource_output_path: PathBuf = output_path.into();
resource_output_path.push(&resource_name);
export_resource(renderer.resolve_to_resource(&resource_name).unwrap(), output_path);
}
export_wiki_page(historian, renderer, "", output_path);
}
fn export_resource(resource_path: PathBuf, output_path: &str) {
let mut resource_output_path: PathBuf = output_path.into();
let resource_name = resource_path.file_name().unwrap().to_str().unwrap();
resource_output_path.push(&resource_name);
println!("export resource {} from {:?} to {:?}", resource_name, resource_path, resource_output_path);
fs::copy(resource_path, resource_output_path).unwrap();
}
fn export_wiki_page(historian: &Historian, renderer: &PageRenderer, name: &str, output_path: &str) {
println!("resolve page {} {:?}", name, historian.resolve_to_page(name).is_some());
if let Some(page) = historian.resolve_to_page(name) {
let page_path: PathBuf = page.full_name.to_owned().replace(".md", ".html").into();
let mut page_output_path: PathBuf = output_path.into();
page_output_path.push(&page_path);
let page_html_output_path = if page.is_directory {
println!("create directory {:?}", page_output_path);
fs::create_dir_all(&page_output_path);
page_output_path.join("index.html")
} else {
page_output_path.clone()
};
println!("export: {} to {:?}", page.full_name, page_html_output_path);
let page_html = renderer.render_page_template("page.html", historian, &page, &toml! {
dynamic = false
}).replace(".md", ".html");
let mut page_html_file = fs::File::create(page_html_output_path).unwrap();
page_html_file.write_all(page_html.as_bytes());
for attachment in page.attachments {
let attachment_path = page.path.parent().unwrap().join(attachment);
export_resource(attachment_path, page_output_path.to_str().unwrap());
}
for child in page.children {
println!("child {} {} {:?} {:?}", child.full_name, child.name, child.path, output_path);
export_wiki_page(historian, renderer, &child.full_name, output_path);
}
}
}
pub struct Linker<'a> {
historian: &'a Historian,
link_regex: Regex
}
impl<'a> Linker<'a> {
pub fn new(historian: &Historian) -> Linker {
Linker {
historian,
link_regex: Regex::new(r"\[\[(?<link>[\w\s\-]+)(?:\|(?<label>[\w\s\-]+))?\]\]").unwrap()
}
}
pub fn resolve_link(&self, link: &str) -> Option<String> {
let root = self.historian.resolve_to_page("")?;
let mut page_names: Vec<String> = root.children.iter().map(|child| child.name.to_owned()).collect();
loop {
let mut next_page_names: Vec<String> = vec![];
// loop through current list of page names to find match
for page_name in page_names {
let page = self.historian.resolve_to_page(&page_name)?;
if self.link_matches_path(link, &page_name) {
return Some(page.full_name.to_owned());
}
// Collect list of (fully qualified) subpages
for child in page.children {
let mut child_path = page.full_name.to_owned();
child_path.push('/');
child_path.push_str(&child.name);
next_page_names.push(child_path);
}
}
if next_page_names.is_empty() {
break;
}
//
page_names = next_page_names;
}
None
}
pub fn resolve_links(&self, page: &Page) -> String {
let mut content = String::new();
if let Ok(mut file) = fs::File::open(&page.path) {
file.read_to_string(&mut content).unwrap();
}
self.resolve_links_for_edit(&page, &Edit {
author: None,
content,
summary: String::new()
})
}
pub fn resolve_links_for_edit(&self, page: &Page, edit: &Edit) -> String {
let mut content = edit.content.to_owned();
for capture in self.link_regex.captures_iter(&content.to_owned()) {
let link = capture.name("link").unwrap();
if let Some(resolved_link) = self.resolve_link(link.as_str()) {
let link_full = capture.get(0).unwrap().as_str();
let label = capture.name("label").unwrap_or(link).as_str();
let mut absolute_link_path = self.historian.source_root.to_owned();
absolute_link_path.push(&resolved_link);
let relative_link_path = diff_paths(&absolute_link_path, &page.path.parent().unwrap()).unwrap();
content = content.replace(link_full, &format!("[{}](<{}>)", label, relative_link_path.display()));
}
}
content
}
fn link_matches_path(&self, link: &str, path: &str) -> bool {
if let Some(page_name) = path.split('/').last() {
page_name == link || Path::new(page_name).file_stem() == Some(OsStr::new(link))
} else {
false
}
}
}
pub struct Searcher<'a> {
historian: &'a Historian
}
#[derive(Serialize)]
pub struct SearchResult {
pub page: Page,
pub matches: Vec<String>
}
impl<'a> Searcher<'a> {
pub fn new(historian: &Historian) -> Searcher {
Searcher {
historian
}
}
pub fn search(&self, root: &Page, query: &str) -> Vec<SearchResult> {
let mut searcher = grep::searcher::SearcherBuilder::new().build();
let matcher = grep::regex::RegexMatcherBuilder::new()
.fixed_strings(true)
.case_insensitive(true)
.build(query)
.unwrap();
let mut results: Vec<SearchResult> = vec![];
self.do_search(root, &mut results, &mut searcher, &matcher);
results
}
pub fn tag_search(&self, root: &Page, tag: &str) -> Vec<SearchResult> {
let mut results: Vec<SearchResult> = vec![];
for child in &root.children {
if let Some(child_page) = self.historian.resolve_to_page(&child.full_name) {
for result in self.tag_search(&child_page, tag) {
results.push(result);
}
let mut content = String::new();
if let Ok(mut file) = fs::File::open(&child_page.path) {
file.read_to_string(&mut content).unwrap();
}
let mut metadata = Table::new();
parse_markdown(&content, &mut metadata);
if metadata.contains_key("tags") {
let tags = metadata.get("tags").unwrap();
if tags.is_array() && tags.as_array().contains(toml::Value::String(tag.to_owned())) {
results.push(SearchResult {
page: child_page,
matches: vec![]
});
}
}
}
}
results
}
fn do_search(&self, root: &Page, results: &mut Vec<SearchResult>, searcher: &mut grep::searcher::Searcher, matcher: &grep::regex::RegexMatcher) {
for child in &root.children {
if let Some(child_page) = self.historian.resolve_to_page(&child.full_name) {
let mut matches: Vec<String> = vec![];
searcher.search_path(matcher, &child.path, grep::searcher::sinks::UTF8(|lnum, line| {
matches.push(line.to_owned());
Ok(true)
}));
self.do_search(&child_page, results, searcher, matcher);
if !matches.is_empty() {
results.push(SearchResult {
page: child_page,
matches
});
}
}
}
}
}