#!/usr/bin/env python3 import os import re from datetime import datetime from pathlib import Path from jinja2 import Environment, FileSystemLoader import markdown from config import (BLOG_TITLE, BLOG_DESCRIPTION, THEME, EXCLUDE_FEEDS_FROM_MAIN, NAVBAR_ITEMS, HIDE_LOGO, HIDE_TITLE, LOGO_PATH, ENABLE_RSS_FEED, RSS_FEED_PATH, BASE_URL, AUTHOR_NAME, AUTHOR_EMAIL, FEED_MAX_ITEMS) class SSGGGenerator: def __init__(self, items_dir='items', output_dir='output', theme=None, blog_title=None, blog_description=None): self.items_dir = Path(items_dir) self.output_dir = Path(output_dir) self.theme = theme or THEME self.theme_dir = Path('theme') / self.theme self.templates_dir = self.theme_dir / 'templates' self.assets_dir = self.theme_dir / 'assets' self.blog_title = blog_title or BLOG_TITLE self.blog_description = blog_description or BLOG_DESCRIPTION self.exclude_feeds = EXCLUDE_FEEDS_FROM_MAIN self.navbar_items = NAVBAR_ITEMS self.hide_logo = HIDE_LOGO self.hide_title = HIDE_TITLE self.logo_path = LOGO_PATH # Setup Jinja2 self.env = Environment(loader=FileSystemLoader(self.templates_dir)) # Add custom filter for random sampling def random_sample(items, count): import random items_list = list(items) return random.sample(items_list, min(count, len(items_list))) self.env.filters['random_sample'] = random_sample # Setup markdown with toc extension for header anchors self.md = markdown.Markdown(extensions=['extra', 'toc']) def parse_filename(self, filename, subpath=''): """Parse filename format: YYYY-MM-DD_type_name[_feed].md Args: filename: The markdown filename subpath: Optional subdirectory path (e.g., 'notes' for items/notes/) """ pattern = r'(\d{4}-\d{2}-\d{2})_(short|long|page)_(.+?)(?:_([a-z0-9-]+))?\.md' match = re.match(pattern, filename) if not match: return None date_str, post_type, name, feed = match.groups() date = datetime.strptime(date_str, '%Y-%m-%d') return { 'date': date, 'date_str': date.strftime('%Y-%m-%d'), 'type': post_type, 'name': name, 'feed': feed, 'filename': filename, 'subpath': subpath } def add_header_anchors(self, html_content): """Add anchor links to headers with IDs""" # Pattern to match headers with id attributes:

Text

def replace_header(match): tag = match.group(1) header_id = match.group(2) text = match.group(3) # Add anchor link with # symbol return f'<{tag} id="{header_id}">{text} #' # Match h2-h6 tags with id attributes pattern = r'<(h[2-6]) id="([^"]+)">([^<]+)<\/\1>' return re.sub(pattern, replace_header, html_content) def read_post(self, filepath): """Read markdown file and extract title and content""" with open(filepath, 'r', encoding='utf-8') as f: content = f.read() # Extract title (first # heading) title_match = re.match(r'^#\s+(.+)$', content, re.MULTILINE) title = title_match.group(1) if title_match else 'Untitled' # Remove title from content if title_match: content = content[title_match.end():].strip() # Convert markdown to HTML html_content = self.md.convert(content) # Add anchor links to headers html_content = self.add_header_anchors(html_content) return title, html_content def collect_posts(self): """Collect and parse all posts from items directory, including subdirectories""" posts = [] if not self.items_dir.exists(): print(f"Warning: {self.items_dir} does not exist") return posts # Use rglob to recursively find all .md files for filepath in self.items_dir.rglob('*.md'): # Calculate subpath relative to items_dir relative_path = filepath.relative_to(self.items_dir) subpath = str(relative_path.parent) if relative_path.parent != Path('.') else '' parsed = self.parse_filename(filepath.name, subpath) if not parsed: print(f"Skipping {filepath}: doesn't match naming convention") continue title, content = self.read_post(filepath) # Build slug and URL with subpath if parsed['subpath']: slug = f"{parsed['subpath']}/{parsed['name']}" url = f"/{parsed['subpath']}/{parsed['name']}/" else: slug = parsed['name'] url = f"/{parsed['name']}/" post = { 'date': parsed['date_str'], 'type': parsed['type'], 'name': parsed['name'], 'title': title, 'content': content, 'slug': slug, 'url': url, 'feed': parsed['feed'], 'source': str(relative_path), 'subpath': parsed['subpath'] } posts.append(post) # Sort by date, newest first posts.sort(key=lambda x: x['date'], reverse=True) return posts def generate_index(self, posts, feed_name=None, all_posts=None): """Generate index.html with all posts (or feed-specific index)""" template = self.env.get_template('index.tmpl') if feed_name: title = f"{feed_name} - {self.blog_title}" output_path = self.output_dir / 'feed' / feed_name / 'index.html' else: title = self.blog_title output_path = self.output_dir / 'index.html' html = template.render( title=title, blog_title=self.blog_title, blog_description=self.blog_description, navbar_items=self.navbar_items, posts=posts, all_posts=all_posts or posts, hide_logo=self.hide_logo, hide_title=self.hide_title, logo_path=self.logo_path, rss_feed_enabled=ENABLE_RSS_FEED, rss_feed_path=RSS_FEED_PATH ) output_path.parent.mkdir(parents=True, exist_ok=True) with open(output_path, 'w', encoding='utf-8') as f: f.write(html) print(f"✓ Generated {output_path}") def generate_feeds_overview(self, feeds, all_posts=None): """Generate /feed/index.html with list of all non-excluded feeds""" template = self.env.get_template('feeds.tmpl') # Prepare feed data with counts, excluding feeds in EXCLUDE_FEEDS_FROM_MAIN feed_list = [] for feed_name, posts in sorted(feeds.items()): if feed_name not in self.exclude_feeds: feed_list.append({ 'name': feed_name, 'count': len(posts) }) title = f"Feeds - {self.blog_title}" output_path = self.output_dir / 'feed' / 'index.html' html = template.render( title=title, blog_title=self.blog_title, blog_description=self.blog_description, navbar_items=self.navbar_items, feeds=feed_list, all_posts=all_posts or [], hide_logo=self.hide_logo, hide_title=self.hide_title, logo_path=self.logo_path, rss_feed_enabled=ENABLE_RSS_FEED, rss_feed_path=RSS_FEED_PATH ) output_path.parent.mkdir(parents=True, exist_ok=True) with open(output_path, 'w', encoding='utf-8') as f: f.write(html) print(f"✓ Generated {output_path}") def generate_subdir_index(self, subpath, posts, all_posts=None): """Generate index page for a subdirectory (e.g., /projects/)""" template = self.env.get_template('index.tmpl') # Use the subpath as the title (capitalize first letter) subpath_title = subpath.replace('/', ' / ').title() title = f"{subpath_title} - {self.blog_title}" output_path = self.output_dir / subpath / 'index.html' html = template.render( title=title, blog_title=self.blog_title, blog_description=self.blog_description, navbar_items=self.navbar_items, posts=posts, all_posts=all_posts or posts, hide_logo=self.hide_logo, hide_title=self.hide_title, logo_path=self.logo_path, rss_feed_enabled=ENABLE_RSS_FEED, rss_feed_path=RSS_FEED_PATH ) output_path.parent.mkdir(parents=True, exist_ok=True) with open(output_path, 'w', encoding='utf-8') as f: f.write(html) print(f"✓ Generated {output_path}") def generate_post_page(self, post, all_posts=None): """Generate individual post page for 'long' posts""" template = self.env.get_template('post.tmpl') html = template.render( title=f"{post['title']} - {self.blog_title}", blog_title=self.blog_title, blog_description=self.blog_description, navbar_items=self.navbar_items, post=post, all_posts=all_posts or [], hide_logo=self.hide_logo, hide_title=self.hide_title, logo_path=self.logo_path, rss_feed_enabled=ENABLE_RSS_FEED, rss_feed_path=RSS_FEED_PATH ) # Create directory for the post slug (with parents for nested paths) post_dir = self.output_dir / post['slug'] post_dir.mkdir(parents=True, exist_ok=True) # Generate index.html inside the slug directory output_path = post_dir / 'index.html' with open(output_path, 'w', encoding='utf-8') as f: f.write(html) print(f"✓ Generated {output_path}") def generate_rss_feed(self, posts): """Generate RSS 2.0 feed for main feed posts""" from xml.etree.ElementTree import Element, SubElement, tostring, register_namespace from xml.dom import minidom import re # Register atom namespace to avoid ns0 prefix register_namespace('atom', 'http://www.w3.org/2005/Atom') # Limit posts posts = posts[:FEED_MAX_ITEMS] # Build feed URL correctly - ensure no double slashes feed_path = RSS_FEED_PATH.lstrip('/') # Remove trailing slash from BASE_URL if present for clean URL construction base_url_clean = BASE_URL.rstrip('/') feed_url = f"{base_url_clean}/{feed_path}" # Create RSS element (namespace will be added automatically when we use atom:link) rss = Element('rss', version='2.0') channel = SubElement(rss, 'channel') # Channel metadata SubElement(channel, 'title').text = self.blog_title SubElement(channel, 'description').text = self.blog_description SubElement(channel, 'link').text = base_url_clean # Add atom:link with rel="self" (required by RSS best practices) atom_link = SubElement(channel, '{http://www.w3.org/2005/Atom}link') atom_link.set('href', feed_url) atom_link.set('rel', 'self') atom_link.set('type', 'application/rss+xml') SubElement(channel, 'lastBuildDate').text = datetime.utcnow().strftime('%a, %d %b %Y %H:%M:%S +0000') # Add author information (managingEditor format: email (name)) if AUTHOR_EMAIL and AUTHOR_NAME: SubElement(channel, 'managingEditor').text = f"{AUTHOR_EMAIL} ({AUTHOR_NAME})" elif AUTHOR_EMAIL: SubElement(channel, 'managingEditor').text = AUTHOR_EMAIL # Helper function to convert relative URLs to absolute def make_absolute_urls(html_content): # Replace relative URLs with absolute ones html_content = re.sub(r'href="/', f'href="{base_url_clean}/', html_content) html_content = re.sub(r'src="/', f'src="{base_url_clean}/', html_content) return html_content # Add items for post in posts: item = SubElement(channel, 'item') SubElement(item, 'title').text = post['title'] SubElement(item, 'link').text = f"{base_url_clean}{post['url']}" SubElement(item, 'guid', isPermaLink='true').text = f"{base_url_clean}{post['url']}" SubElement(item, 'pubDate').text = datetime.strptime(post['date'], '%Y-%m-%d').strftime('%a, %d %b %Y 00:00:00 +0000') # Content type based on post type if post['type'] == 'long': # For long posts, just show title/summary SubElement(item, 'description').text = f"Read more at {base_url_clean}{post['url']}" else: # For short posts, include full content with absolute URLs content_absolute = make_absolute_urls(post['content']) SubElement(item, 'description').text = content_absolute # Pretty print XML xml_str = minidom.parseString(tostring(rss, encoding='utf-8')).toprettyxml(indent=' ', encoding='utf-8') # Write to file output_path = self.output_dir / RSS_FEED_PATH output_path.parent.mkdir(parents=True, exist_ok=True) with open(output_path, 'wb') as f: f.write(xml_str) print(f"✓ Generated {output_path}") def copy_assets(self): """Copy theme assets and images to output directory""" import shutil # Copy theme assets if self.assets_dir.exists(): dest_dir = self.output_dir / 'assets' if dest_dir.exists(): shutil.rmtree(dest_dir) shutil.copytree(self.assets_dir, dest_dir) print(f"✓ Copied theme assets to output") # Copy images images_dir = Path('images') if images_dir.exists(): dest_dir = self.output_dir / 'images' if dest_dir.exists(): shutil.rmtree(dest_dir) shutil.copytree(images_dir, dest_dir) print(f"✓ Copied images/ to output") # Copy static files (GPG keys, .well-known, etc.) static_dir = Path('static') if static_dir.exists(): for item in static_dir.rglob('*'): if item.is_file(): # Preserve directory structure rel_path = item.relative_to(static_dir) dest_path = self.output_dir / rel_path dest_path.parent.mkdir(parents=True, exist_ok=True) shutil.copy2(item, dest_path) print(f"✓ Copied static/ to output") def generate(self): """Main generation process""" print(f"Starting picopaper generation with theme '{self.theme}'...") # Create output directory self.output_dir.mkdir(exist_ok=True) # Collect posts all_posts = self.collect_posts() print(f"Found {len(all_posts)} posts") # Filter out pages and excluded feeds from main feed feed_posts = [p for p in all_posts if p['type'] != 'page' and p['feed'] not in self.exclude_feeds] # Generate main index with filtered feed posts self.generate_index(feed_posts, all_posts=feed_posts) # Group posts by feed (include all posts, not just those in main feed) feeds = {} for post in all_posts: if post['feed'] and post['type'] != 'page': feeds.setdefault(post['feed'], []).append(post) # Generate feed-specific pages for feed_name, posts in feeds.items(): self.generate_index(posts, feed_name, all_posts=feed_posts) # Generate feeds overview page if feeds: self.generate_feeds_overview(feeds, all_posts=feed_posts) # Group posts by subdirectory subdirs = {} for post in all_posts: if post['subpath']: # Only posts in subdirectories subdirs.setdefault(post['subpath'], []).append(post) # Generate subdirectory index pages (e.g., /projects/) for subpath, subdir_posts in subdirs.items(): self.generate_subdir_index(subpath, subdir_posts, all_posts=feed_posts) # Generate individual pages for long posts, short posts, and pages for post in all_posts: if post['type'] in ['long', 'short', 'page']: self.generate_post_page(post, all_posts=feed_posts) # Generate RSS feed if ENABLE_RSS_FEED: self.generate_rss_feed(feed_posts) # Copy assets self.copy_assets() print(f"\n✓ Site generated successfully in {self.output_dir}/") def main(): generator = SSGGGenerator() generator.generate() if __name__ == '__main__': main()