#!/bin/bash # # Outline Export Script with Tree Visualization # Exports all Outline documents with full hierarchy and shows side-by-side tree comparison # # Usage: ./export_with_trees.sh [OPTIONS] # Options are passed through to the Python script (--dry-run, -v, etc.) # set -e # Exit on error # Capture CLI arguments to pass to Python CLI_ARGS="$@" # Colors for output GREEN='\033[0;32m' BLUE='\033[0;34m' YELLOW='\033[1;33m' RED='\033[0;31m' NC='\033[0m' # No Color # Configuration WORK_DIR="$(pwd)" SETTINGS_FILE="$WORK_DIR/settings.json" EXPORT_DIR="$WORK_DIR/outline_export" echo -e "${BLUE}════════════════════════════════════════════════════════════${NC}" echo -e "${BLUE} OUTLINE EXPORT${NC}" echo -e "${BLUE}════════════════════════════════════════════════════════════${NC}" echo "" # Check if settings.json exists if [ ! -f "$SETTINGS_FILE" ]; then echo -e "${RED}Error: settings.json not found${NC}" exit 1 fi # Extract API details from settings.json API_URL=$(jq -r '.source.url' "$SETTINGS_FILE") API_TOKEN=$(jq -r '.source.token' "$SETTINGS_FILE") # Backup old export if it exists if [ -d "$EXPORT_DIR" ]; then TIMESTAMP=$(date +%Y%m%d_%H%M%S) BACKUP_FILE="$WORK_DIR/outline_backup_${TIMESTAMP}.tar.gz" echo -e "${YELLOW}Backing up previous export...${NC}" tar -czf "$BACKUP_FILE" -C "$WORK_DIR" "outline_export" 2>/dev/null echo -e "${GREEN}✓ Backup: $BACKUP_FILE ($(du -sh "$BACKUP_FILE" | cut -f1))${NC}" rm -rf "$EXPORT_DIR" fi echo -e "${GREEN}Exporting documents...${NC}" echo "" # Run the export with CLI arguments (as current user to avoid root-owned files) docker run --rm --network domnet \ --user "$(id -u):$(id -g)" \ -e HOME=/tmp \ -v "$WORK_DIR:/work" \ -w /work \ python:3.11-slim \ bash -c "pip install -qqq requests tqdm 2>/dev/null && python3 outline_export_fixed.py $CLI_ARGS" echo "" # Create Python script for side-by-side tree comparison cat > "$WORK_DIR/.tree_compare.py" << 'PYTHON_SCRIPT' #!/usr/bin/env python3 """ Side-by-side comparison of Outline online vs exported files. Matches documents row by row and highlights differences. """ import sys import re import shutil import requests from pathlib import Path # Colors GREEN = '\033[0;32m' RED = '\033[0;31m' YELLOW = '\033[1;33m' BLUE = '\033[0;34m' CYAN = '\033[0;36m' BOLD = '\033[1m' DIM = '\033[2m' RESET = '\033[0m' def get_terminal_width(): try: return shutil.get_terminal_size().columns except: return 120 def normalize_filename(name): """Normalize a name for comparison (handles / -> _ conversion etc).""" # Replace characters that filesystems don't allow normalized = name.replace('/', '_').replace('\\', '_') normalized = normalized.replace(':', '_').replace('*', '_') normalized = normalized.replace('?', '_').replace('"', '_') normalized = normalized.replace('<', '_').replace('>', '_') normalized = normalized.replace('|', '_') return normalized.strip() def get_online_docs(api_url, api_token): """Fetch all documents from Outline API, organized by collection.""" headers = { "Authorization": f"Bearer {api_token}", "Content-Type": "application/json" } response = requests.post(f"{api_url}/api/collections.list", headers=headers, json={}) collections = response.json().get("data", []) collections = sorted(collections, key=lambda c: c.get('name', '')) # Build collection ID to name mapping coll_id_to_name = {c['id']: c['name'] for c in collections} # Fetch all documents with timestamps using documents.list all_docs_response = requests.post( f"{api_url}/api/documents.list", headers=headers, json={"limit": 1000} # Get all docs ) all_docs = all_docs_response.json().get("data", []) # Create timestamp lookup by (collection_name, normalized_title) timestamp_lookup = {} for doc in all_docs: coll_id = doc.get("collectionId") coll_name = coll_id_to_name.get(coll_id, "Unknown") title = doc.get("title", "Untitled") norm_title = normalize_filename(title) timestamp_lookup[(coll_name, norm_title)] = doc.get("updatedAt") result = {} for coll in collections: coll_name = coll['name'] result[coll_name] = [] # Get navigation tree nav_response = requests.post( f"{api_url}/api/collections.documents", headers=headers, json={"id": coll["id"]} ) nav_tree = nav_response.json().get("data", []) def collect_docs(nodes): docs = [] for node in nodes: title = node.get("title", "Untitled") norm_title = normalize_filename(title) has_children = len(node.get("children", [])) > 0 updated_at = timestamp_lookup.get((coll_name, norm_title)) docs.append({ 'title': title, 'normalized': norm_title, 'has_children': has_children, 'updatedAt': updated_at }) if has_children: docs.extend(collect_docs(node.get("children", []))) return docs result[coll_name] = collect_docs(nav_tree) return result def get_export_docs(export_dir): """Get all exported documents, organized by collection.""" import os export_path = Path(export_dir) result = {} if not export_path.exists(): return result for coll_dir in sorted(export_path.iterdir()): if coll_dir.is_dir(): coll_name = coll_dir.name docs = [] for md_file in sorted(coll_dir.glob("*.md")): title = md_file.stem if title: # Skip empty filenames mtime = os.path.getmtime(md_file) docs.append({ 'title': title, 'normalized': normalize_filename(title), 'path': md_file, 'mtime': mtime }) result[coll_name] = docs return result def match_and_compare(online_docs, export_docs): """Match online and export docs, return comparison data per collection.""" from datetime import datetime all_collections = sorted(set(online_docs.keys()) | set(export_docs.keys())) comparison = [] for coll_name in all_collections: online_list = online_docs.get(coll_name, []) export_list = export_docs.get(coll_name, []) # Create lookup by normalized name export_lookup = {d['normalized']: d for d in export_list} online_lookup = {d['normalized']: d for d in online_list} rows = [] matched_export = set() # First pass: match online docs to export for doc in sorted(online_list, key=lambda d: d['title'].lower()): norm = doc['normalized'] if norm in export_lookup: export_doc = export_lookup[norm] # Check freshness freshness = 'current' # default if doc.get('updatedAt') and export_doc.get('mtime'): online_dt = datetime.fromisoformat(doc['updatedAt'].replace('Z', '+00:00')) online_ts = online_dt.timestamp() export_ts = export_doc['mtime'] # Allow 60s tolerance if export_ts < online_ts - 60: freshness = 'stale' rows.append({ 'online': doc['title'], 'export': export_doc['title'], 'status': 'match', 'is_folder': doc['has_children'], 'freshness': freshness }) matched_export.add(norm) else: rows.append({ 'online': doc['title'], 'export': None, 'status': 'missing', 'is_folder': doc['has_children'], 'freshness': None }) # Second pass: find extra export docs for doc in sorted(export_list, key=lambda d: d['title'].lower()): if doc['normalized'] not in matched_export: rows.append({ 'online': None, 'export': doc['title'], 'status': 'extra', 'is_folder': False, 'freshness': None }) # Sort rows: matched first, then missing, then extra rows.sort(key=lambda r: ( 0 if r['status'] == 'match' else (1 if r['status'] == 'missing' else 2), (r['online'] or r['export'] or '').lower() )) comparison.append({ 'collection': coll_name, 'rows': rows, 'online_count': len(online_list), 'export_count': len(export_list) }) return comparison def print_comparison(comparison): """Print the side-by-side comparison with status indicators.""" term_width = get_terminal_width() col_width = (term_width - 10) // 2 # -10 for separators and status icons total_online = 0 total_export = 0 total_matched = 0 total_missing = 0 total_extra = 0 total_stale = 0 print(f"\n{BLUE}{'═' * term_width}{RESET}") print(f"{BOLD}{CYAN}{'ONLINE':<{col_width}} {'':5} {'EXPORTED':<{col_width}}{RESET}") print(f"{BLUE}{'═' * term_width}{RESET}") for coll in comparison: total_online += coll['online_count'] total_export += coll['export_count'] # Collection header coll_matched = sum(1 for r in coll['rows'] if r['status'] == 'match') coll_missing = sum(1 for r in coll['rows'] if r['status'] == 'missing') coll_extra = sum(1 for r in coll['rows'] if r['status'] == 'extra') coll_stale = sum(1 for r in coll['rows'] if r.get('freshness') == 'stale') total_matched += coll_matched total_missing += coll_missing total_extra += coll_extra total_stale += coll_stale if coll_missing == 0 and coll_extra == 0: coll_status = f"{GREEN}✓{RESET}" else: coll_status = f"{RED}✗{RESET}" header = f"{coll['collection']}/ ({coll['online_count']} → {coll['export_count']})" print(f"\n{BOLD}{YELLOW}{header}{RESET} {coll_status}") print(f"{BLUE}{'─' * term_width}{RESET}") for row in coll['rows']: online_name = row['online'] or '' export_name = row['export'] or '' # Add folder indicator if row['is_folder'] and online_name: online_name = f"📁 {online_name}" # Truncate if needed if len(online_name) > col_width - 1: online_name = online_name[:col_width-4] + '...' if len(export_name) > col_width - 1: export_name = export_name[:col_width-4] + '...' # Status and colors if row['status'] == 'match': # Freshness indicator if row.get('freshness') == 'stale': freshness = f"{YELLOW}●{RESET}" else: freshness = f"{GREEN}●{RESET}" status = f"{GREEN}✓{RESET}{freshness}" left = f"{online_name}" right = f"{export_name}" elif row['status'] == 'missing': status = f"{RED}✗{RESET} " left = f"{RED}{online_name}{RESET}" right = f"{DIM}---{RESET}" else: # extra status = f"{YELLOW}+{RESET} " left = f"{DIM}---{RESET}" right = f"{YELLOW}{export_name}{RESET}" # Calculate visible width (without ANSI codes) def visible_len(s): return len(re.sub(r'\033\[[0-9;]*m', '', s)) left_pad = col_width - visible_len(left) right_pad = col_width - visible_len(right) print(f" {left}{' ' * max(0, left_pad)} {status} {right}") # Summary print(f"\n{BLUE}{'═' * term_width}{RESET}") print(f"{BOLD}SUMMARY:{RESET}") print(f" Online: {total_online} documents") print(f" Exported: {total_export} documents") print(f" {GREEN}✓● Matched & current: {total_matched - total_stale}{RESET}") if total_stale > 0: print(f" {YELLOW}✓● Matched but stale: {total_stale} (export older than online){RESET}") if total_missing > 0: print(f" {RED}✗ Missing: {total_missing} (online but not exported){RESET}") if total_extra > 0: print(f" {YELLOW}+ Extra: {total_extra} (exported but not online){RESET}") if total_missing == 0 and total_extra == 0 and total_stale == 0: print(f"\n{GREEN}✓ All documents exported and current!{RESET}") elif total_missing == 0 and total_extra == 0: print(f"\n{YELLOW}⚠ All documents exported but {total_stale} are stale{RESET}") print() def get_latest_changes(api_url, api_token, limit=3): """Fetch the most recently updated documents.""" headers = { "Authorization": f"Bearer {api_token}", "Content-Type": "application/json" } response = requests.post( f"{api_url}/api/documents.list", headers=headers, json={ "sort": "updatedAt", "direction": "DESC", "limit": limit } ) docs = response.json().get("data", []) result = [] for doc in docs: # Get collection name coll_id = doc.get("collectionId") coll_name = "Unknown" if coll_id: coll_response = requests.post( f"{api_url}/api/collections.info", headers=headers, json={"id": coll_id} ) coll_data = coll_response.json().get("data", {}) coll_name = coll_data.get("name", "Unknown") result.append({ 'title': doc.get("title", "Untitled"), 'collection': coll_name, 'updatedAt': doc.get("updatedAt"), 'normalized': normalize_filename(doc.get("title", "Untitled")) }) return result def find_export_file(export_dir, collection, normalized_title): """Find the exported file matching the document.""" export_path = Path(export_dir) # Try exact collection match first coll_dir = export_path / collection if coll_dir.exists(): for md_file in coll_dir.glob("*.md"): if normalize_filename(md_file.stem) == normalized_title: return md_file # Try all collections (in case of name mismatch) for coll_dir in export_path.iterdir(): if coll_dir.is_dir(): for md_file in coll_dir.glob("*.md"): if normalize_filename(md_file.stem) == normalized_title: return md_file return None def print_latest_changes(latest_docs, export_dir): """Print the latest changes section.""" term_width = get_terminal_width() from datetime import datetime import os print(f"\n{BLUE}{'═' * term_width}{RESET}") print(f"{BOLD}{CYAN}LATEST CHANGES (verify actuality){RESET}") print(f"{BLUE}{'─' * term_width}{RESET}") for i, doc in enumerate(latest_docs, 1): title = doc['title'] collection = doc['collection'] updated_at = doc['updatedAt'] # Parse online timestamp if updated_at: # Handle ISO format with timezone online_dt = datetime.fromisoformat(updated_at.replace('Z', '+00:00')) online_str = online_dt.strftime("%Y-%m-%d %H:%M:%S") else: online_str = "Unknown" # Find export file export_file = find_export_file(export_dir, collection, doc['normalized']) if export_file and export_file.exists(): export_mtime = os.path.getmtime(export_file) export_dt = datetime.fromtimestamp(export_mtime) export_str = export_dt.strftime("%Y-%m-%d %H:%M:%S") # Compare (export should be same time or newer) if updated_at: # Convert online to local timestamp for comparison online_ts = online_dt.timestamp() if export_mtime >= online_ts - 60: # Allow 60s tolerance status = f"{GREEN}✓{RESET}" else: status = f"{YELLOW}⚠ older{RESET}" else: status = f"{GREEN}✓{RESET}" else: export_str = "NOT FOUND" status = f"{RED}✗{RESET}" # Print entry print(f"\n {BOLD}{i}. {title}{RESET}") print(f" {DIM}Collection:{RESET} {collection}") print(f" {DIM}Online:{RESET} {online_str}") print(f" {DIM}Exported:{RESET} {export_str} {status}") print(f"\n{BLUE}{'═' * term_width}{RESET}") def main(): if len(sys.argv) != 4: print("Usage: script.py ") sys.exit(1) api_url = sys.argv[1] api_token = sys.argv[2] export_dir = sys.argv[3] # Get documents from both sources online_docs = get_online_docs(api_url, api_token) export_docs = get_export_docs(export_dir) # Match and compare comparison = match_and_compare(online_docs, export_docs) # Print results print_comparison(comparison) # Get and print latest changes latest_docs = get_latest_changes(api_url, api_token, limit=3) print_latest_changes(latest_docs, export_dir) if __name__ == "__main__": main() PYTHON_SCRIPT # Run the side-by-side tree comparison (use /work/outline_export as container path) docker run --rm --network domnet \ --user "$(id -u):$(id -g)" \ -e HOME=/tmp \ -v "$WORK_DIR:/work" \ -w /work \ python:3.11-slim \ bash -c "pip install -qqq requests 2>/dev/null && python3 /work/.tree_compare.py '$API_URL' '$API_TOKEN' '/work/outline_export'" # Cleanup rm -f "$WORK_DIR/.tree_compare.py" echo ""