picopaper/picopaper.py

#!/usr/bin/env python3

import os
import re
from datetime import datetime
from pathlib import Path
from jinja2 import Environment, FileSystemLoader
import markdown
from config import BLOG_TITLE, BLOG_DESCRIPTION, THEME, EXCLUDE_FEEDS_FROM_MAIN, NAVBAR_ITEMS

class SSGGGenerator:
    def __init__(self, items_dir='items', output_dir='output', theme=None, blog_title=None, blog_description=None):
        self.items_dir = Path(items_dir)
        self.output_dir = Path(output_dir)
        self.theme = theme or THEME
        self.theme_dir = Path('theme') / self.theme
        self.templates_dir = self.theme_dir / 'templates'
        self.assets_dir = self.theme_dir / 'assets'
        self.blog_title = blog_title or BLOG_TITLE
        self.blog_description = blog_description or BLOG_DESCRIPTION
        self.exclude_feeds = EXCLUDE_FEEDS_FROM_MAIN
        self.navbar_items = NAVBAR_ITEMS

        # Setup Jinja2
        self.env = Environment(loader=FileSystemLoader(self.templates_dir))

        # Setup markdown with toc extension for header anchors
        self.md = markdown.Markdown(extensions=['extra', 'toc'])

    def parse_filename(self, filename):
        """Parse filename format: YYYY-MM-DD_type_name[_feed].md"""
        pattern = r'(\d{4}-\d{2}-\d{2})_(short|long|page)_(.+?)(?:_([a-z0-9-]+))?\.md'
        match = re.match(pattern, filename)

        if not match:
            return None

        date_str, post_type, name, feed = match.groups()
        date = datetime.strptime(date_str, '%Y-%m-%d')

        return {
            'date': date,
            'date_str': date.strftime('%Y-%m-%d'),
            'type': post_type,
            'name': name,
            'feed': feed,
            'filename': filename
        }

    def add_header_anchors(self, html_content):
        """Add anchor links to headers with IDs"""
        # Pattern to match headers with id attributes: <h2 id="some-id">Text</h2>
        def replace_header(match):
            tag = match.group(1)
            header_id = match.group(2)
            text = match.group(3)
            # Add anchor link with # symbol
            return f'<{tag} id="{header_id}">{text} <a href="#{header_id}" class="header-anchor">#</a></{tag}>'

        # Match h2-h6 tags with id attributes
        pattern = r'<(h[2-6]) id="([^"]+)">([^<]+)<\/\1>'
        return re.sub(pattern, replace_header, html_content)

    def read_post(self, filepath):
        """Read markdown file and extract title and content"""
        with open(filepath, 'r', encoding='utf-8') as f:
            content = f.read()

        # Extract title (first # heading)
        title_match = re.match(r'^#\s+(.+)$', content, re.MULTILINE)
        title = title_match.group(1) if title_match else 'Untitled'

        # Remove title from content
        if title_match:
            content = content[title_match.end():].strip()

        # Convert markdown to HTML
        html_content = self.md.convert(content)

        # Add anchor links to headers
        html_content = self.add_header_anchors(html_content)

        return title, html_content

    def collect_posts(self):
        """Collect and parse all posts from items directory"""
        posts = []

        if not self.items_dir.exists():
            print(f"Warning: {self.items_dir} does not exist")
            return posts

        for filepath in self.items_dir.glob('*.md'):
            parsed = self.parse_filename(filepath.name)

            if not parsed:
                print(f"Skipping {filepath.name}: doesn't match naming convention")
                continue

            title, content = self.read_post(filepath)

            post = {
                'date': parsed['date_str'],
                'type': parsed['type'],
                'name': parsed['name'],
                'title': title,
                'content': content,
                'slug': parsed['name'],
                'url': f"{parsed['name']}/",
                'feed': parsed['feed'],
                'source': filepath.name
            }

            posts.append(post)

        # Sort by date, newest first
        posts.sort(key=lambda x: x['date'], reverse=True)

        return posts

    def generate_index(self, posts, feed_name=None):
        """Generate index.html with all posts (or feed-specific index)"""
        template = self.env.get_template('index.tmpl')

        if feed_name:
            title = f"{feed_name} - {self.blog_title}"
            output_path = self.output_dir / 'feed' / feed_name / 'index.html'
        else:
            title = self.blog_title
            output_path = self.output_dir / 'index.html'

        html = template.render(
            title=title,
            blog_title=self.blog_title,
            blog_description=self.blog_description,
            navbar_items=self.navbar_items,
            posts=posts
        )

        output_path.parent.mkdir(parents=True, exist_ok=True)
        with open(output_path, 'w', encoding='utf-8') as f:
            f.write(html)

        print(f"✓ Generated {output_path}")

    def generate_post_page(self, post):
        """Generate individual post page for 'long' posts"""
        template = self.env.get_template('post.tmpl')

        html = template.render(
            title=f"{post['title']} - {self.blog_title}",
            blog_title=self.blog_title,
            blog_description=self.blog_description,
            navbar_items=self.navbar_items,
            post=post
        )

        # Create directory for the post slug
        post_dir = self.output_dir / post['slug']
        post_dir.mkdir(exist_ok=True)

        # Generate index.html inside the slug directory
        output_path = post_dir / 'index.html'
        with open(output_path, 'w', encoding='utf-8') as f:
            f.write(html)

        print(f"✓ Generated {output_path}")

    def copy_assets(self):
        """Copy theme assets and images to output directory"""
        import shutil

        # Copy theme assets
        if self.assets_dir.exists():
            dest_dir = self.output_dir / 'assets'
            if dest_dir.exists():
                shutil.rmtree(dest_dir)
            shutil.copytree(self.assets_dir, dest_dir)
            print(f"✓ Copied theme assets to output")

        # Copy images
        images_dir = Path('images')
        if images_dir.exists():
            dest_dir = self.output_dir / 'images'
            if dest_dir.exists():
                shutil.rmtree(dest_dir)
            shutil.copytree(images_dir, dest_dir)
            print(f"✓ Copied images/ to output")

        # Copy static files (GPG keys, .well-known, etc.)
        static_dir = Path('static')
        if static_dir.exists():
            for item in static_dir.rglob('*'):
                if item.is_file():
                    # Preserve directory structure
                    rel_path = item.relative_to(static_dir)
                    dest_path = self.output_dir / rel_path
                    dest_path.parent.mkdir(parents=True, exist_ok=True)
                    shutil.copy2(item, dest_path)
            print(f"✓ Copied static/ to output")

    def generate(self):
        """Main generation process"""
        print(f"Starting picopaper generation with theme '{self.theme}'...")

        # Create output directory
        self.output_dir.mkdir(exist_ok=True)

        # Collect posts
        all_posts = self.collect_posts()
        print(f"Found {len(all_posts)} posts")

        # Filter out pages and excluded feeds from main feed
        feed_posts = [p for p in all_posts
                      if p['type'] != 'page'
                      and p['feed'] not in self.exclude_feeds]

        # Generate main index with filtered feed posts
        self.generate_index(feed_posts)

        # Group posts by feed (include all posts, not just those in main feed)
        feeds = {}
        for post in all_posts:
            if post['feed'] and post['type'] != 'page':
                feeds.setdefault(post['feed'], []).append(post)

        # Generate feed-specific pages
        for feed_name, posts in feeds.items():
            self.generate_index(posts, feed_name)

        # Generate individual pages for long posts, short posts, and pages
        for post in all_posts:
            if post['type'] in ['long', 'short', 'page']:
                self.generate_post_page(post)

        # Copy assets
        self.copy_assets()

        print(f"\n✓ Site generated successfully in {self.output_dir}/")

def main():
    generator = SSGGGenerator()
    generator.generate()

if __name__ == '__main__':
    main()