From 5db16a08a9726b67b5bb96fd17f813fa1b74f568 Mon Sep 17 00:00:00 2001 From: Volpeon Date: Tue, 5 Jan 2021 10:58:04 +0100 Subject: Moved all metadata processing from the build script into a Pandoc Lua filter --- scripts/build_content.sh | 200 ++++++++------------------------------- scripts/metadata_filter.lua | 225 ++++++++++++++++++++++++++++++++++++++++++++ scripts/metadata_tpl.json | 7 +- 3 files changed, 267 insertions(+), 165 deletions(-) create mode 100644 scripts/metadata_filter.lua (limited to 'scripts') diff --git a/scripts/build_content.sh b/scripts/build_content.sh index c640b12..c88ea86 100755 --- a/scripts/build_content.sh +++ b/scripts/build_content.sh @@ -23,8 +23,6 @@ get_filters() { FILTERS=$(get_filters) -CATEGORIES=$(cat "$METADATA_DIR/categories.json") - get_target_filename() { filename=$1 filename=${filename#"$CONTENT_DIR"} @@ -34,43 +32,29 @@ get_target_filename() { echo "$OUTPUT_DIR$filename" } -get_url_rel() { - basedir=$1 - filename=$2 - ext=$3 - filename=$(realpath --relative-to="$basedir" "$filename") - filename="${filename%.md}.$ext" - if [ "${filename##*/}" = "index.html" ]; then - filename=${filename%index.html} - fi - echo "$filename" -} - -get_url_abs() { - echo "/$(get_url_rel "$CONTENT_DIR" "$1" "$2")" -} - get_metadata() { - pandoc "$1" \ + pandoc "$2" \ -f markdown \ -t plain \ + --metadata content_dir="$CONTENT_DIR" \ + --metadata base_file="$1" \ + --metadata file="$2" \ + --metadata-file metadata/metadata.yaml \ --no-highlight \ - --template scripts/metadata_tpl.json + --template scripts/metadata_tpl.json \ + --lua-filter scripts/metadata_filter.lua } get_content() { - pandoc "$1" \ + pandoc "$2" \ -f markdown \ -t plain \ - --no-highlight -} - -get_section() { - filename=$1 - filename=${filename#"$CONTENT_DIR"} - filename=${filename%.md} - filename=$(echo "$filename" | cut -f1 -d /) - echo "$filename" + --metadata content_dir="$CONTENT_DIR" \ + --metadata base_file="$1" \ + --metadata file="$2" \ + --metadata-file metadata/metadata.yaml \ + --no-highlight \ + --lua-filter scripts/metadata_filter.lua } get_subpages_basedir() { @@ -82,7 +66,7 @@ get_subpages_basedir() { echo "$filename" } -get_subpages_meta() { +get_subpages_metadata() { basedir=$(get_subpages_basedir "$1") child_pages=() @@ -97,98 +81,25 @@ get_subpages_meta() { -print0) fi - pages=$(echo "{}" | jq ". + { \ - all: [], \ - by_year: {}, \ - }") - - categories="{}" + pages="[]" if [ ${#child_pages[@]} -ne 0 ]; then - #echo -e "\033[0;90m[////////]\033[0m Child pages:" - for file in "${child_pages[@]}"; do - #echo -e "\033[0;90m[////////]\033[0m - $file" - - metadata=$(get_metadata "$file") - - section=$(get_section "$file") - - url_rel=$(get_url_rel "$basedir" "$file" "html") - url_abs=$(get_url_abs "$file" "html") - url_full="${SITE%/}$url_abs" - - date=$(echo "$metadata" | jq -r .date) - date_rfc3339="" - - if [ "$date" != "" ]; then - date_rfc3339=$(date -d "$date" --rfc-3339=seconds | sed 's/ /T/') - fi - - last_update=$(echo "$metadata" | jq -r .last_update) - last_update_rfc3339="" - - if [ "$last_update" != "" ]; then - last_update=$date - fi - - if [ "$last_update" != "" ]; then - last_update_rfc3339=$(date -d "$last_update" --rfc-3339=seconds | sed 's/ /T/') - fi - - category=$(echo "$metadata" | jq -r .category) - category_name="" - - if [ "$section" != "" ] && [ "$category" != "" ]; then - category_name=$(echo "$CATEGORIES" | jq -r ".$section.$category") - fi - - content=$(get_content "$file") - - metadata=$(echo "$metadata" | jq ". + { \ - content: \"$(echo "$content" | sed -z 's/\\/\\\\/g;s/\n/\\n/g;s/"/\"/g')\", \ - date: { \ - yyyy_mm_dd: \"$date\", \ - rfc3339: \"$date_rfc3339\" \ - }, \ - last_update: { \ - yyyy_mm_dd: \"$last_update\", \ - rfc3339: \"$last_update_rfc3339\" \ - }, \ - url: { \ - rel: \"$url_rel\", \ - abs: \"$url_abs\", \ - full: \"$url_full\" \ - } \ - }") - - pages=$(echo "$pages" | jq ".all += [ $metadata ]") - - if [ "$date" != "" ]; then - date_year=$(date -d "$date" +%Y) - #date_month=$(date -d "$date" +%m) - #date_day=$(date -d "$date" +%d) - - pages=$(echo "$pages" | jq ".by_year.\"$date_year\" += [ $metadata ]") - fi - - if [ "$category" != "" ]; then - categories=$(echo "$categories" | jq ".$category = { name: \"$category_name\", count: (.$category.count + 1) } ") - fi + metadata=$(get_metadata "$1" "$file" | jq "{ \ + url: .url, \ + author: .author, \ + title: .title, \ + date: .date, \ + last_update: .last_update, \ + category: .category, \ + content: \"$(get_content "$1" "$file" | sed -z 's/\\/\\\\/g;s/\n/\\n/g;s/"/\"/g')\" \ + } | del(.[] | nulls)") + + pages=$(echo "$pages" | jq ". += [ $metadata ]") done fi - pages=$(echo "$pages" \ - | jq ".all |= (sort_by(.date) | reverse) \ - | .by_year |= (to_entries | sort_by(.key) | reverse \ - | .[].value |= (sort_by(.date) | reverse))") - - categories=$(echo "$categories" | jq ". | to_entries | { by_id: (. | sort_by(.key)), by_count: (. | sort_by(.value.count) | reverse) }") - - echo "{}" | jq ". + { \ - pages: $pages, \ - categories: $categories - }" + echo "{ \"pages\": $pages }" } handle () { @@ -201,47 +112,10 @@ handle () { echo -e "\033[0;90m[COMPILE ]\033[0m Getting metadata" - included_metadata=$(get_metadata "$1") + included_metadata=$(get_metadata "$1" "$1") + added_metadata=$(get_subpages_metadata "$1") create_feed=$(echo "$included_metadata" | jq -r .create_feed) - - url_abs=$(get_url_abs "$1" "html") - url_full="${SITE%/}$url_abs" - - section=$(get_section "$1") - - is_home=false - if [ "$target_file" = "${OUTPUT_DIR}index.html" ]; then - is_home=true - fi - - added_metadata=$(echo "$(get_subpages_meta "$1")" | jq ". + { \ - url: { \ - abs: \"$url_abs\", \ - full: \"$url_full\" \ - }, \ - section: { \ - id: \"$section\", \ - is_$section: true \ - }, \ - is_home: $is_home \ - }") - - if [ "$create_feed" = "true" ]; then - feed_url_abs="$(get_url_abs "$1" "xml")" - feed_url_full="${SITE%/}$feed_url_abs" - last_update=$(echo "$added_metadata" | jq -r ".pages.all[0].last_update") - - added_metadata=$(echo "$added_metadata" | jq ". + { \ - feed: { \ - last_update: $last_update, \ - url: { \ - abs: \"$feed_url_abs\", \ - full: \"$feed_url_full\" \ - } \ - } \ - }") - fi meta_file=$(mktemp) @@ -256,8 +130,13 @@ handle () { --no-highlight \ --template "${TEMPLATES_DIR}feed.xml" \ -o "${target_file%.html}.xml" \ + --metadata content_dir="$CONTENT_DIR" \ + --metadata base_file="$1" \ + --metadata file="$1" \ + --metadata type=feed \ --metadata-file metadata/metadata.yaml \ --metadata-file "$meta_file" \ + --lua-filter scripts/metadata_filter.lua \ $FILTERS fi @@ -269,15 +148,18 @@ handle () { --no-highlight \ --template "${TEMPLATES_DIR}base.html" \ -o "$target_file" \ + --metadata content_dir="$CONTENT_DIR" \ + --metadata base_file="$1" \ + --metadata file="$1" \ --metadata-file metadata/metadata.yaml \ --metadata-file "$meta_file" \ + --lua-filter scripts/metadata_filter.lua \ $FILTERS rm "$meta_file" - # echo $(pandoc "$1" \ - # -f markdown \ - # -t json) + # echo "$included_metadata" + # echo "$added_metadata" echo -e "\033[0;90m[COMPILE ]\033[0m Done" else diff --git a/scripts/metadata_filter.lua b/scripts/metadata_filter.lua new file mode 100644 index 0000000..f31bc1f --- /dev/null +++ b/scripts/metadata_filter.lua @@ -0,0 +1,225 @@ +function format_date(date) + if date == nil then + return date + end + + date = pandoc.utils.normalize_date(pandoc.utils.stringify(date)) + local year, month, day = date:match("(%d%d%d%d)-(%d%d)-(%d%d)") + if year == nil then + return nil + end + + local time = os.time({ + year = tonumber(year), + month = tonumber(month), + day = tonumber(day) + }) + return pandoc.MetaMap({ + yyyy_mm_dd = pandoc.MetaString(os.date("%F", time)), + yyyy = pandoc.MetaString(os.date("%Y", time)), + mm_dd = pandoc.MetaString(os.date("%m-%d", time)), + rfc3339 = pandoc.MetaString(os.date("%FT%T+00:00", time)) + }) +end + +function table_to_list(t, cmp) + local l = pandoc.MetaList({}) + + for key, value in pairs(t) do + l:insert(pandoc.MetaMap({ + key = key, + value = value + })) + end + + l:sort(cmp or function(i1, i2) + return i1.key < i2.key + end) + + return l +end + +function splitstr(input, sep) + sep = sep or "%s" + local t = {} + for str in input:gmatch("([^" .. sep .. "]+)") do + table.insert(t, str) + end + return t +end + +function relative_to(dir, target) + dir = splitstr(dir, "/") + target = splitstr(target, "/") + + local prefix = true + local path = "" + + for i = 1, math.min(#dir, #target) do + local t = target[i] + if prefix then + if dir[i] ~= t then + prefix = false + path = "../" .. t + end + else + path = "../" .. path .. "/" .. t + end + end + + if #dir < #target then + for i = #dir + 1, #target do + path = path .. (path == "" and "" or "/") .. target[i] + end + elseif #dir > #target then + for i = #target + 1, #dir do + path = "../" .. path + end + end + + return path +end + +function resolve_url(page_type, site_url, content_dir, base_dir, cur_file) + if page_type == "page" then + cur_file = cur_file:gsub("%.md$", ".html") + elseif page_type == "feed" then + cur_file = cur_file:gsub("%.md$", ".xml") + end + + local abs = cur_file:gsub("^" .. content_dir, ""):gsub("/index.html$", "/") + local rel = relative_to(base_dir, cur_file):gsub("/index.html$", "/") + + return pandoc.MetaMap({ + abs = pandoc.MetaString(abs), + rel = pandoc.MetaString(rel), + full = pandoc.MetaString(site_url .. abs) + }) +end + +function resolve_section(abs_url) + local section = abs_url:match("^/(.-)[/.]") or "index" + return pandoc.MetaMap({ + id = pandoc.MetaString(section), + ["is_" .. section] = pandoc.MetaBool(true) + }) +end + +function organize_subpages(site_url, content_dir, base_dir, pages, categories) + local categories_data = pandoc.MetaList({}) + + pages:sort(function(p1, p2) + if p1.date and p2.date then + return pandoc.utils.stringify(p1.date.yyyy_mm_dd) > pandoc.utils.stringify(p2.date.yyyy_mm_dd) + elseif p2.date then + return true + elseif p1.date then + return false + else + return pandoc.utils.stringify(p1.title) < pandoc.utils.stringify(p2.title) + end + end) + + local pages_data = pandoc.MetaMap({ + all = pages, + by_year = pandoc.MetaList({}), + last_update = nil + }) + + if pages then + local pages_by_year_map = {} + local categories_map = {} + + for i = 1, #pages do + local page = pages[i] + + if page.date then + local yyyy = pandoc.utils.stringify(page.date.yyyy) + local pages_by_yyyy = pages_by_year_map[yyyy] + + if not pages_by_yyyy then + pages_by_yyyy = pandoc.MetaList(pandoc.List()) + pages_by_year_map[yyyy] = pages_by_yyyy + end + + pages_by_yyyy:insert(page) + end + + if page.category and categories then + local category = pandoc.utils.stringify(page.category) + + if categories[category] then + local current_category = categories_map[category] + + if not current_category then + current_category = pandoc.MetaMap({ + name = pandoc.MetaString(categories[category]), + count = 0 + }) + categories_map[category] = current_category + end + + current_category.count = current_category.count + 1 + end + end + end + + pages_data.by_year = table_to_list(pages_by_year_map, function(i1, i2) + return i1.key > i2.key + end) + + categories_data = table_to_list(categories_map) + + for _, item in ipairs(categories_data) do + item.value.count = pandoc.MetaString(("%d"):format(item.value.count)) + end + + if #pages_data.by_year ~= 0 then + pages_data.last_update = pages_data.by_year[1].value[1].last_update + end + end + + return pages_data, categories_data +end + +function Meta(meta) + meta.content_dir = meta.content_dir:gsub("/$", "") + meta.site.url = pandoc.utils.stringify(meta.site.url):gsub("/$", "") + meta.base_dir = meta.base_file:gsub("^(.*)/.-$", "%1") + meta.type = meta.type or "page" + + meta.date = format_date(meta.date) + if meta.last_update ~= nil then + meta.last_update = format_date(meta.last_update) + else + meta.last_update = meta.date + end + + if meta.type == "feed" then + meta.page = pandoc.MetaMap({ + url = resolve_url("page", meta.site.url, meta.content_dir, meta.base_dir, meta.file) + }) + end + + meta.url = resolve_url(meta.type, meta.site.url, meta.content_dir, meta.base_dir, meta.file) + meta.section = resolve_section(meta.url.abs) + meta.categories = meta.categories[meta.section.id] + + if meta.menus and meta.menus.main then + for i = 1, #meta.menus.main do + local item = meta.menus.main[i] + item.active = pandoc.MetaBool(pandoc.utils.stringify(item.id) == meta.section.id) + end + end + + if meta.pages then + local pages, categories = organize_subpages(meta.site.url, meta.content_dir, meta.base_dir, meta.pages, + meta.categories) + meta.pages = pages + meta.categories = categories + else + meta.categories = nil + end + + return meta +end diff --git a/scripts/metadata_tpl.json b/scripts/metadata_tpl.json index 79eabe1..fb36f0f 100755 --- a/scripts/metadata_tpl.json +++ b/scripts/metadata_tpl.json @@ -1,6 +1 @@ -{ - "title": "$title$", - "date": "$date$", - "category": "$category$", - "create_feed": "$create_feed$" -} +$meta-json$ -- cgit v1.2.3-70-g09d2