diff --git a/Cargo.lock b/Cargo.lock index 58f77e41..cb12b9bc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2639,6 +2639,7 @@ dependencies = [ "serde", "serde_json", "serde_yaml_ng", + "sha2", "strum", "svg_metadata", "thiserror", diff --git a/Cargo.toml b/Cargo.toml index 0b1b3585..3542c306 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -88,6 +88,7 @@ reqwest = { version = "0.12", default-features = false, features = [ indoc = "2" base64 = "0.22" console = "0.15" +sha2 = "0.10" [dependencies] diff --git a/crates/rari-doc/Cargo.toml b/crates/rari-doc/Cargo.toml index b087faf0..62dfcd6c 100644 --- a/crates/rari-doc/Cargo.toml +++ b/crates/rari-doc/Cargo.toml @@ -16,6 +16,7 @@ url.workspace = true itertools.workspace = true constcat.workspace = true indexmap.workspace = true +sha2.workspace = true serde_yaml_ng = "0.10" yaml-rust = "0.4" diff --git a/crates/rari-doc/src/build.rs b/crates/rari-doc/src/build.rs index 0e5a98e2..1ad4d4bf 100644 --- a/crates/rari-doc/src/build.rs +++ b/crates/rari-doc/src/build.rs @@ -1,24 +1,26 @@ use std::borrow::Cow; use std::fs::{self, File}; -use std::io::BufWriter; +use std::io::{BufWriter, Write}; use std::iter::once; use std::path::PathBuf; use rari_types::globals::build_out_root; use rari_types::locale::Locale; use rayon::iter::{IntoParallelIterator, ParallelIterator}; -use tracing::{error, span, Level}; +use sha2::{Digest, Sha256}; +use tracing::{span, Level}; use crate::cached_readers::{ blog_files, contributor_spotlight_files, curriculum_files, generic_pages_files, }; use crate::error::DocError; use crate::pages::build::copy_additional_files; +use crate::pages::json::BuiltDocy; use crate::pages::page::{Page, PageBuilder, PageLike}; use crate::pages::types::spa::SPA; use crate::resolve::url_to_folder_path; -pub fn build_single_page(page: &Page) { +pub fn build_single_page(page: &Page) -> Result<(), DocError> { let file = page.full_path().to_string_lossy(); let span = span!( Level::ERROR, @@ -28,48 +30,45 @@ pub fn build_single_page(page: &Page) { file = file.as_ref() ); let _enter = span.enter(); - let built_page = page.build(); - match built_page { - Ok(built_page) => { - let out_path = build_out_root() - .expect("No BUILD_OUT_ROOT") - .join(url_to_folder_path(page.url().trim_start_matches('/'))); - fs::create_dir_all(&out_path).unwrap(); - let out_file = out_path.join("index.json"); - let file = File::create(out_file).unwrap(); - let buffed = BufWriter::new(file); + let built_page = page.build()?; + let out_path = build_out_root() + .expect("No BUILD_OUT_ROOT") + .join(url_to_folder_path(page.url().trim_start_matches('/'))); + fs::create_dir_all(&out_path)?; + let out_file = out_path.join("index.json"); + let file = File::create(out_file).unwrap(); + let mut buffed = BufWriter::new(file); - serde_json::to_writer(buffed, &built_page).unwrap(); + if let BuiltDocy::Doc(json) = built_page { + let json_str = serde_json::to_string(&json)?; + buffed.write_all(json_str.as_bytes())?; + let hash = Sha256::digest(json_str.as_bytes()); + let meta_out_file = out_path.join("metadata.json"); + let meta_file = File::create(meta_out_file).unwrap(); + let meta_buffed = BufWriter::new(meta_file); + serde_json::to_writer(meta_buffed, &json.doc.as_meta(format!("{hash:x}")))?; + } else { + serde_json::to_writer(buffed, &built_page)?; + } - if let Some(in_path) = page.full_path().parent() { - copy_additional_files(in_path, &out_path, page.full_path()).unwrap(); - } - } - Err(e) => { - error!("Error: {e}"); - } + if let Some(in_path) = page.full_path().parent() { + copy_additional_files(in_path, &out_path, page.full_path())?; } + Ok(()) } pub fn build_docs(docs: &[Page]) -> Result>, DocError> { - Ok(docs - .into_par_iter() - .map(|page| { - build_single_page(page); - Cow::Borrowed(page.url()) - }) - .collect()) + docs.into_par_iter() + .map(|page| build_single_page(page).map(|_| Cow::Borrowed(page.url()))) + .collect() } pub fn build_curriculum_pages() -> Result>, DocError> { - Ok(curriculum_files() + curriculum_files() .by_path .values() - .map(|page| { - build_single_page(page); - Cow::Owned(page.url().to_string()) - }) - .collect()) + .map(|page| build_single_page(page).map(|_| Cow::Owned(page.url().to_string()))) + .collect() } fn copy_blog_author_avatars() -> Result<(), DocError> { @@ -97,44 +96,32 @@ fn copy_blog_author_avatars() -> Result<(), DocError> { pub fn build_blog_pages() -> Result>, DocError> { copy_blog_author_avatars()?; - Ok(blog_files() + blog_files() .posts .values() .chain(once(&SPA::from_url("/en-US/blog/").unwrap())) - .map(|page| { - build_single_page(page); - Cow::Owned(page.url().to_string()) - }) - .collect()) + .map(|page| build_single_page(page).map(|_| Cow::Owned(page.url().to_string()))) + .collect() } pub fn build_generic_pages() -> Result>, DocError> { - Ok(generic_pages_files() + generic_pages_files() .values() - .map(|page| { - build_single_page(page); - Cow::Owned(page.url().to_string()) - }) - .collect()) + .map(|page| build_single_page(page).map(|_| Cow::Owned(page.url().to_string()))) + .collect() } pub fn build_contributor_spotlight_pages() -> Result>, DocError> { - Ok(contributor_spotlight_files() + contributor_spotlight_files() .values() - .map(|page| { - build_single_page(page); - Cow::Owned(page.url().to_string()) - }) - .collect()) + .map(|page| build_single_page(page).map(|_| Cow::Owned(page.url().to_string()))) + .collect() } pub fn build_spas() -> Result>, DocError> { - Ok(SPA::all() + SPA::all() .iter() .filter_map(|(slug, locale)| SPA::from_slug(slug, *locale)) - .map(|page| { - build_single_page(&page); - Cow::Owned(page.url().to_string()) - }) - .collect()) + .map(|page| build_single_page(&page).map(|_| Cow::Owned(page.url().to_string()))) + .collect() } diff --git a/crates/rari-doc/src/pages/json.rs b/crates/rari-doc/src/pages/json.rs index 532c3d5f..7c40dc79 100644 --- a/crates/rari-doc/src/pages/json.rs +++ b/crates/rari-doc/src/pages/json.rs @@ -119,6 +119,68 @@ pub struct JsonDoc { pub page_type: PageType, } +impl JsonDoc { + pub fn as_meta(&self, hash: String) -> JsonDocMetadata { + JsonDocMetadata { + is_active: &self.is_active, + is_markdown: &self.is_markdown, + is_translated: &self.is_translated, + locale: &self.locale, + mdn_url: &self.mdn_url, + modified: &self.modified, + native: &self.native, + no_indexing: &self.no_indexing, + other_translations: &self.other_translations, + page_title: &self.page_title, + parents: &self.parents, + popularity: &self.popularity, + short_title: &self.short_title, + source: &self.source, + summary: &self.summary, + title: &self.title, + baseline: &self.baseline, + browser_compat: &self.browser_compat, + page_type: &self.page_type, + hash, + } + } +} + +#[derive(Debug, Clone, Serialize)] +pub struct JsonDocMetadata<'a> { + #[serde(rename = "isActive")] + pub is_active: &'a bool, + #[serde(rename = "isMarkdown")] + pub is_markdown: &'a bool, + #[serde(rename = "isTranslated")] + pub is_translated: &'a bool, + pub locale: &'a Locale, + pub mdn_url: &'a String, + #[serde(serialize_with = "modified_dt")] + pub modified: &'a NaiveDateTime, + pub native: &'a Native, + #[serde(rename = "noIndexing")] + pub no_indexing: &'a bool, + pub other_translations: &'a Vec, + #[serde(rename = "pageTitle")] + pub page_title: &'a String, + #[serde(skip_serializing_if = "Vec::is_empty")] + pub parents: &'a Vec, + pub popularity: &'a Option, + pub short_title: &'a String, + pub source: &'a Source, + #[serde(skip_serializing_if = "Option::is_none")] + pub summary: &'a Option, + pub title: &'a String, + #[serde(skip_serializing_if = "Option::is_none")] + pub baseline: &'a Option<&'static SupportStatusWithByKey>, + #[serde(rename = "browserCompat", skip_serializing_if = "Vec::is_empty")] + pub browser_compat: &'a Vec, + #[serde(rename = "pageType")] + pub page_type: &'a PageType, + pub hash: String, +} + #[derive(Debug, Clone, Serialize)] pub struct BlogIndex { pub posts: Vec, diff --git a/crates/rari-tools/Cargo.toml b/crates/rari-tools/Cargo.toml index 1a4d92f1..00056ab3 100644 --- a/crates/rari-tools/Cargo.toml +++ b/crates/rari-tools/Cargo.toml @@ -23,10 +23,10 @@ url.workspace = true indoc.workspace = true rayon.workspace = true console.workspace = true +sha2.workspace = true dialoguer = "0.11" csv = "1" -sha2 = "0.10" [dev-dependencies] serial_test = { version = "3", features = ["file_locks"] }