291 lines
		
	
	
	
		
			10 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			291 lines
		
	
	
	
		
			10 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| #!/usr/bin/env python3
 | |
| 
 | |
| import os
 | |
| import re
 | |
| from datetime import datetime
 | |
| from pathlib import Path
 | |
| from jinja2 import Environment, FileSystemLoader
 | |
| import markdown
 | |
| from config import BLOG_TITLE, BLOG_DESCRIPTION, THEME, EXCLUDE_FEEDS_FROM_MAIN, NAVBAR_ITEMS
 | |
| 
 | |
| class SSGGGenerator:
 | |
|     def __init__(self, items_dir='items', output_dir='output', theme=None, blog_title=None, blog_description=None):
 | |
|         self.items_dir = Path(items_dir)
 | |
|         self.output_dir = Path(output_dir)
 | |
|         self.theme = theme or THEME
 | |
|         self.theme_dir = Path('theme') / self.theme
 | |
|         self.templates_dir = self.theme_dir / 'templates'
 | |
|         self.assets_dir = self.theme_dir / 'assets'
 | |
|         self.blog_title = blog_title or BLOG_TITLE
 | |
|         self.blog_description = blog_description or BLOG_DESCRIPTION
 | |
|         self.exclude_feeds = EXCLUDE_FEEDS_FROM_MAIN
 | |
|         self.navbar_items = NAVBAR_ITEMS
 | |
| 
 | |
|         # Setup Jinja2
 | |
|         self.env = Environment(loader=FileSystemLoader(self.templates_dir))
 | |
| 
 | |
|         # Add custom filter for random sampling
 | |
|         def random_sample(items, count):
 | |
|             import random
 | |
|             items_list = list(items)
 | |
|             return random.sample(items_list, min(count, len(items_list)))
 | |
| 
 | |
|         self.env.filters['random_sample'] = random_sample
 | |
| 
 | |
|         # Setup markdown with toc extension for header anchors
 | |
|         self.md = markdown.Markdown(extensions=['extra', 'toc'])
 | |
| 
 | |
|     def parse_filename(self, filename):
 | |
|         """Parse filename format: YYYY-MM-DD_type_name[_feed].md"""
 | |
|         pattern = r'(\d{4}-\d{2}-\d{2})_(short|long|page)_(.+?)(?:_([a-z0-9-]+))?\.md'
 | |
|         match = re.match(pattern, filename)
 | |
| 
 | |
|         if not match:
 | |
|             return None
 | |
| 
 | |
|         date_str, post_type, name, feed = match.groups()
 | |
|         date = datetime.strptime(date_str, '%Y-%m-%d')
 | |
| 
 | |
|         return {
 | |
|             'date': date,
 | |
|             'date_str': date.strftime('%Y-%m-%d'),
 | |
|             'type': post_type,
 | |
|             'name': name,
 | |
|             'feed': feed,
 | |
|             'filename': filename
 | |
|         }
 | |
| 
 | |
|     def add_header_anchors(self, html_content):
 | |
|         """Add anchor links to headers with IDs"""
 | |
|         # Pattern to match headers with id attributes: <h2 id="some-id">Text</h2>
 | |
|         def replace_header(match):
 | |
|             tag = match.group(1)
 | |
|             header_id = match.group(2)
 | |
|             text = match.group(3)
 | |
|             # Add anchor link with # symbol
 | |
|             return f'<{tag} id="{header_id}">{text} <a href="#{header_id}" class="header-anchor">#</a></{tag}>'
 | |
| 
 | |
|         # Match h2-h6 tags with id attributes
 | |
|         pattern = r'<(h[2-6]) id="([^"]+)">([^<]+)<\/\1>'
 | |
|         return re.sub(pattern, replace_header, html_content)
 | |
| 
 | |
|     def read_post(self, filepath):
 | |
|         """Read markdown file and extract title and content"""
 | |
|         with open(filepath, 'r', encoding='utf-8') as f:
 | |
|             content = f.read()
 | |
| 
 | |
|         # Extract title (first # heading)
 | |
|         title_match = re.match(r'^#\s+(.+)$', content, re.MULTILINE)
 | |
|         title = title_match.group(1) if title_match else 'Untitled'
 | |
| 
 | |
|         # Remove title from content
 | |
|         if title_match:
 | |
|             content = content[title_match.end():].strip()
 | |
| 
 | |
|         # Convert markdown to HTML
 | |
|         html_content = self.md.convert(content)
 | |
| 
 | |
|         # Add anchor links to headers
 | |
|         html_content = self.add_header_anchors(html_content)
 | |
| 
 | |
|         return title, html_content
 | |
| 
 | |
|     def collect_posts(self):
 | |
|         """Collect and parse all posts from items directory"""
 | |
|         posts = []
 | |
| 
 | |
|         if not self.items_dir.exists():
 | |
|             print(f"Warning: {self.items_dir} does not exist")
 | |
|             return posts
 | |
| 
 | |
|         for filepath in self.items_dir.glob('*.md'):
 | |
|             parsed = self.parse_filename(filepath.name)
 | |
| 
 | |
|             if not parsed:
 | |
|                 print(f"Skipping {filepath.name}: doesn't match naming convention")
 | |
|                 continue
 | |
| 
 | |
|             title, content = self.read_post(filepath)
 | |
| 
 | |
|             post = {
 | |
|                 'date': parsed['date_str'],
 | |
|                 'type': parsed['type'],
 | |
|                 'name': parsed['name'],
 | |
|                 'title': title,
 | |
|                 'content': content,
 | |
|                 'slug': parsed['name'],
 | |
|                 'url': f"/{parsed['name']}/",
 | |
|                 'feed': parsed['feed'],
 | |
|                 'source': filepath.name
 | |
|             }
 | |
| 
 | |
|             posts.append(post)
 | |
| 
 | |
|         # Sort by date, newest first
 | |
|         posts.sort(key=lambda x: x['date'], reverse=True)
 | |
| 
 | |
|         return posts
 | |
| 
 | |
|     def generate_index(self, posts, feed_name=None, all_posts=None):
 | |
|         """Generate index.html with all posts (or feed-specific index)"""
 | |
|         template = self.env.get_template('index.tmpl')
 | |
| 
 | |
|         if feed_name:
 | |
|             title = f"{feed_name} - {self.blog_title}"
 | |
|             output_path = self.output_dir / 'feed' / feed_name / 'index.html'
 | |
|         else:
 | |
|             title = self.blog_title
 | |
|             output_path = self.output_dir / 'index.html'
 | |
| 
 | |
|         html = template.render(
 | |
|             title=title,
 | |
|             blog_title=self.blog_title,
 | |
|             blog_description=self.blog_description,
 | |
|             navbar_items=self.navbar_items,
 | |
|             posts=posts,
 | |
|             all_posts=all_posts or posts
 | |
|         )
 | |
| 
 | |
|         output_path.parent.mkdir(parents=True, exist_ok=True)
 | |
|         with open(output_path, 'w', encoding='utf-8') as f:
 | |
|             f.write(html)
 | |
| 
 | |
|         print(f"✓ Generated {output_path}")
 | |
| 
 | |
|     def generate_feeds_overview(self, feeds, all_posts=None):
 | |
|         """Generate /feed/index.html with list of all non-excluded feeds"""
 | |
|         template = self.env.get_template('feeds.tmpl')
 | |
| 
 | |
|         # Prepare feed data with counts, excluding feeds in EXCLUDE_FEEDS_FROM_MAIN
 | |
|         feed_list = []
 | |
|         for feed_name, posts in sorted(feeds.items()):
 | |
|             if feed_name not in self.exclude_feeds:
 | |
|                 feed_list.append({
 | |
|                     'name': feed_name,
 | |
|                     'count': len(posts)
 | |
|                 })
 | |
| 
 | |
|         title = f"Feeds - {self.blog_title}"
 | |
|         output_path = self.output_dir / 'feed' / 'index.html'
 | |
| 
 | |
|         html = template.render(
 | |
|             title=title,
 | |
|             blog_title=self.blog_title,
 | |
|             blog_description=self.blog_description,
 | |
|             navbar_items=self.navbar_items,
 | |
|             feeds=feed_list,
 | |
|             all_posts=all_posts or []
 | |
|         )
 | |
| 
 | |
|         output_path.parent.mkdir(parents=True, exist_ok=True)
 | |
|         with open(output_path, 'w', encoding='utf-8') as f:
 | |
|             f.write(html)
 | |
| 
 | |
|         print(f"✓ Generated {output_path}")
 | |
| 
 | |
|     def generate_post_page(self, post, all_posts=None):
 | |
|         """Generate individual post page for 'long' posts"""
 | |
|         template = self.env.get_template('post.tmpl')
 | |
| 
 | |
|         html = template.render(
 | |
|             title=f"{post['title']} - {self.blog_title}",
 | |
|             blog_title=self.blog_title,
 | |
|             blog_description=self.blog_description,
 | |
|             navbar_items=self.navbar_items,
 | |
|             post=post,
 | |
|             all_posts=all_posts or []
 | |
|         )
 | |
| 
 | |
|         # Create directory for the post slug
 | |
|         post_dir = self.output_dir / post['slug']
 | |
|         post_dir.mkdir(exist_ok=True)
 | |
| 
 | |
|         # Generate index.html inside the slug directory
 | |
|         output_path = post_dir / 'index.html'
 | |
|         with open(output_path, 'w', encoding='utf-8') as f:
 | |
|             f.write(html)
 | |
| 
 | |
|         print(f"✓ Generated {output_path}")
 | |
| 
 | |
|     def copy_assets(self):
 | |
|         """Copy theme assets and images to output directory"""
 | |
|         import shutil
 | |
| 
 | |
|         # Copy theme assets
 | |
|         if self.assets_dir.exists():
 | |
|             dest_dir = self.output_dir / 'assets'
 | |
|             if dest_dir.exists():
 | |
|                 shutil.rmtree(dest_dir)
 | |
|             shutil.copytree(self.assets_dir, dest_dir)
 | |
|             print(f"✓ Copied theme assets to output")
 | |
| 
 | |
|         # Copy images
 | |
|         images_dir = Path('images')
 | |
|         if images_dir.exists():
 | |
|             dest_dir = self.output_dir / 'images'
 | |
|             if dest_dir.exists():
 | |
|                 shutil.rmtree(dest_dir)
 | |
|             shutil.copytree(images_dir, dest_dir)
 | |
|             print(f"✓ Copied images/ to output")
 | |
| 
 | |
|         # Copy static files (GPG keys, .well-known, etc.)
 | |
|         static_dir = Path('static')
 | |
|         if static_dir.exists():
 | |
|             for item in static_dir.rglob('*'):
 | |
|                 if item.is_file():
 | |
|                     # Preserve directory structure
 | |
|                     rel_path = item.relative_to(static_dir)
 | |
|                     dest_path = self.output_dir / rel_path
 | |
|                     dest_path.parent.mkdir(parents=True, exist_ok=True)
 | |
|                     shutil.copy2(item, dest_path)
 | |
|             print(f"✓ Copied static/ to output")
 | |
| 
 | |
|     def generate(self):
 | |
|         """Main generation process"""
 | |
|         print(f"Starting picopaper generation with theme '{self.theme}'...")
 | |
| 
 | |
|         # Create output directory
 | |
|         self.output_dir.mkdir(exist_ok=True)
 | |
| 
 | |
|         # Collect posts
 | |
|         all_posts = self.collect_posts()
 | |
|         print(f"Found {len(all_posts)} posts")
 | |
| 
 | |
|         # Filter out pages and excluded feeds from main feed
 | |
|         feed_posts = [p for p in all_posts
 | |
|                       if p['type'] != 'page'
 | |
|                       and p['feed'] not in self.exclude_feeds]
 | |
| 
 | |
|         # Generate main index with filtered feed posts
 | |
|         self.generate_index(feed_posts, all_posts=feed_posts)
 | |
| 
 | |
|         # Group posts by feed (include all posts, not just those in main feed)
 | |
|         feeds = {}
 | |
|         for post in all_posts:
 | |
|             if post['feed'] and post['type'] != 'page':
 | |
|                 feeds.setdefault(post['feed'], []).append(post)
 | |
| 
 | |
|         # Generate feed-specific pages
 | |
|         for feed_name, posts in feeds.items():
 | |
|             self.generate_index(posts, feed_name, all_posts=feed_posts)
 | |
| 
 | |
|         # Generate feeds overview page
 | |
|         if feeds:
 | |
|             self.generate_feeds_overview(feeds, all_posts=feed_posts)
 | |
| 
 | |
|         # Generate individual pages for long posts, short posts, and pages
 | |
|         for post in all_posts:
 | |
|             if post['type'] in ['long', 'short', 'page']:
 | |
|                 self.generate_post_page(post, all_posts=feed_posts)
 | |
| 
 | |
|         # Copy assets
 | |
|         self.copy_assets()
 | |
| 
 | |
|         print(f"\n✓ Site generated successfully in {self.output_dir}/")
 | |
| 
 | |
| def main():
 | |
|     generator = SSGGGenerator()
 | |
|     generator.generate()
 | |
| 
 | |
| if __name__ == '__main__':
 | |
|     main()
 |