Initial commit: Export tools and import script requirements

- export_with_trees.sh: Bash wrapper for Outline export
- outline_export_fixed.py: Python export implementation
- IMPORT_SCRIPT.MD: PRD for import script (to be built)
- RALPH_PROMPT.md: Ralph Loop prompt for building import script
- CLAUDE.md: Project documentation

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Claude
2026-01-19 22:33:55 +01:00
commit d9161f64f5
7 changed files with 2608 additions and 0 deletions

529
export_with_trees.sh Executable file
View File

@@ -0,0 +1,529 @@
#!/bin/bash
#
# Outline Export Script with Tree Visualization
# Exports all Outline documents with full hierarchy and shows side-by-side tree comparison
#
# Usage: ./export_with_trees.sh [OPTIONS]
# Options are passed through to the Python script (--dry-run, -v, etc.)
#
set -e # Exit on error
# Capture CLI arguments to pass to Python
CLI_ARGS="$@"
# Colors for output
GREEN='\033[0;32m'
BLUE='\033[0;34m'
YELLOW='\033[1;33m'
RED='\033[0;31m'
NC='\033[0m' # No Color
# Configuration
WORK_DIR="$(pwd)"
SETTINGS_FILE="$WORK_DIR/settings.json"
EXPORT_DIR="$WORK_DIR/outline_export"
echo -e "${BLUE}════════════════════════════════════════════════════════════${NC}"
echo -e "${BLUE} OUTLINE EXPORT${NC}"
echo -e "${BLUE}════════════════════════════════════════════════════════════${NC}"
echo ""
# Check if settings.json exists
if [ ! -f "$SETTINGS_FILE" ]; then
echo -e "${RED}Error: settings.json not found${NC}"
exit 1
fi
# Extract API details from settings.json
API_URL=$(jq -r '.source.url' "$SETTINGS_FILE")
API_TOKEN=$(jq -r '.source.token' "$SETTINGS_FILE")
# Backup old export if it exists
if [ -d "$EXPORT_DIR" ]; then
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
BACKUP_FILE="$WORK_DIR/outline_backup_${TIMESTAMP}.tar.gz"
echo -e "${YELLOW}Backing up previous export...${NC}"
tar -czf "$BACKUP_FILE" -C "$WORK_DIR" "outline_export" 2>/dev/null
echo -e "${GREEN}✓ Backup: $BACKUP_FILE ($(du -sh "$BACKUP_FILE" | cut -f1))${NC}"
rm -rf "$EXPORT_DIR"
fi
echo -e "${GREEN}Exporting documents...${NC}"
echo ""
# Run the export with CLI arguments (as current user to avoid root-owned files)
docker run --rm --network domnet \
--user "$(id -u):$(id -g)" \
-e HOME=/tmp \
-v "$WORK_DIR:/work" \
-w /work \
python:3.11-slim \
bash -c "pip install -qqq requests tqdm 2>/dev/null && python3 outline_export_fixed.py $CLI_ARGS"
echo ""
# Create Python script for side-by-side tree comparison
cat > "$WORK_DIR/.tree_compare.py" << 'PYTHON_SCRIPT'
#!/usr/bin/env python3
"""
Side-by-side comparison of Outline online vs exported files.
Matches documents row by row and highlights differences.
"""
import sys
import re
import shutil
import requests
from pathlib import Path
# Colors
GREEN = '\033[0;32m'
RED = '\033[0;31m'
YELLOW = '\033[1;33m'
BLUE = '\033[0;34m'
CYAN = '\033[0;36m'
BOLD = '\033[1m'
DIM = '\033[2m'
RESET = '\033[0m'
def get_terminal_width():
try:
return shutil.get_terminal_size().columns
except:
return 120
def normalize_filename(name):
"""Normalize a name for comparison (handles / -> _ conversion etc)."""
# Replace characters that filesystems don't allow
normalized = name.replace('/', '_').replace('\\', '_')
normalized = normalized.replace(':', '_').replace('*', '_')
normalized = normalized.replace('?', '_').replace('"', '_')
normalized = normalized.replace('<', '_').replace('>', '_')
normalized = normalized.replace('|', '_')
return normalized.strip()
def get_online_docs(api_url, api_token):
"""Fetch all documents from Outline API, organized by collection."""
headers = {
"Authorization": f"Bearer {api_token}",
"Content-Type": "application/json"
}
response = requests.post(f"{api_url}/api/collections.list", headers=headers, json={})
collections = response.json().get("data", [])
collections = sorted(collections, key=lambda c: c.get('name', ''))
# Build collection ID to name mapping
coll_id_to_name = {c['id']: c['name'] for c in collections}
# Fetch all documents with timestamps using documents.list
all_docs_response = requests.post(
f"{api_url}/api/documents.list",
headers=headers,
json={"limit": 1000} # Get all docs
)
all_docs = all_docs_response.json().get("data", [])
# Create timestamp lookup by (collection_name, normalized_title)
timestamp_lookup = {}
for doc in all_docs:
coll_id = doc.get("collectionId")
coll_name = coll_id_to_name.get(coll_id, "Unknown")
title = doc.get("title", "Untitled")
norm_title = normalize_filename(title)
timestamp_lookup[(coll_name, norm_title)] = doc.get("updatedAt")
result = {}
for coll in collections:
coll_name = coll['name']
result[coll_name] = []
# Get navigation tree
nav_response = requests.post(
f"{api_url}/api/collections.documents",
headers=headers,
json={"id": coll["id"]}
)
nav_tree = nav_response.json().get("data", [])
def collect_docs(nodes):
docs = []
for node in nodes:
title = node.get("title", "Untitled")
norm_title = normalize_filename(title)
has_children = len(node.get("children", [])) > 0
updated_at = timestamp_lookup.get((coll_name, norm_title))
docs.append({
'title': title,
'normalized': norm_title,
'has_children': has_children,
'updatedAt': updated_at
})
if has_children:
docs.extend(collect_docs(node.get("children", [])))
return docs
result[coll_name] = collect_docs(nav_tree)
return result
def get_export_docs(export_dir):
"""Get all exported documents, organized by collection."""
import os
export_path = Path(export_dir)
result = {}
if not export_path.exists():
return result
for coll_dir in sorted(export_path.iterdir()):
if coll_dir.is_dir():
coll_name = coll_dir.name
docs = []
for md_file in sorted(coll_dir.glob("*.md")):
title = md_file.stem
if title: # Skip empty filenames
mtime = os.path.getmtime(md_file)
docs.append({
'title': title,
'normalized': normalize_filename(title),
'path': md_file,
'mtime': mtime
})
result[coll_name] = docs
return result
def match_and_compare(online_docs, export_docs):
"""Match online and export docs, return comparison data per collection."""
from datetime import datetime
all_collections = sorted(set(online_docs.keys()) | set(export_docs.keys()))
comparison = []
for coll_name in all_collections:
online_list = online_docs.get(coll_name, [])
export_list = export_docs.get(coll_name, [])
# Create lookup by normalized name
export_lookup = {d['normalized']: d for d in export_list}
online_lookup = {d['normalized']: d for d in online_list}
rows = []
matched_export = set()
# First pass: match online docs to export
for doc in sorted(online_list, key=lambda d: d['title'].lower()):
norm = doc['normalized']
if norm in export_lookup:
export_doc = export_lookup[norm]
# Check freshness
freshness = 'current' # default
if doc.get('updatedAt') and export_doc.get('mtime'):
online_dt = datetime.fromisoformat(doc['updatedAt'].replace('Z', '+00:00'))
online_ts = online_dt.timestamp()
export_ts = export_doc['mtime']
# Allow 60s tolerance
if export_ts < online_ts - 60:
freshness = 'stale'
rows.append({
'online': doc['title'],
'export': export_doc['title'],
'status': 'match',
'is_folder': doc['has_children'],
'freshness': freshness
})
matched_export.add(norm)
else:
rows.append({
'online': doc['title'],
'export': None,
'status': 'missing',
'is_folder': doc['has_children'],
'freshness': None
})
# Second pass: find extra export docs
for doc in sorted(export_list, key=lambda d: d['title'].lower()):
if doc['normalized'] not in matched_export:
rows.append({
'online': None,
'export': doc['title'],
'status': 'extra',
'is_folder': False,
'freshness': None
})
# Sort rows: matched first, then missing, then extra
rows.sort(key=lambda r: (
0 if r['status'] == 'match' else (1 if r['status'] == 'missing' else 2),
(r['online'] or r['export'] or '').lower()
))
comparison.append({
'collection': coll_name,
'rows': rows,
'online_count': len(online_list),
'export_count': len(export_list)
})
return comparison
def print_comparison(comparison):
"""Print the side-by-side comparison with status indicators."""
term_width = get_terminal_width()
col_width = (term_width - 10) // 2 # -10 for separators and status icons
total_online = 0
total_export = 0
total_matched = 0
total_missing = 0
total_extra = 0
total_stale = 0
print(f"\n{BLUE}{'═' * term_width}{RESET}")
print(f"{BOLD}{CYAN}{'ONLINE':<{col_width}} {'':5} {'EXPORTED':<{col_width}}{RESET}")
print(f"{BLUE}{'═' * term_width}{RESET}")
for coll in comparison:
total_online += coll['online_count']
total_export += coll['export_count']
# Collection header
coll_matched = sum(1 for r in coll['rows'] if r['status'] == 'match')
coll_missing = sum(1 for r in coll['rows'] if r['status'] == 'missing')
coll_extra = sum(1 for r in coll['rows'] if r['status'] == 'extra')
coll_stale = sum(1 for r in coll['rows'] if r.get('freshness') == 'stale')
total_matched += coll_matched
total_missing += coll_missing
total_extra += coll_extra
total_stale += coll_stale
if coll_missing == 0 and coll_extra == 0:
coll_status = f"{GREEN}✓{RESET}"
else:
coll_status = f"{RED}✗{RESET}"
header = f"{coll['collection']}/ ({coll['online_count']} → {coll['export_count']})"
print(f"\n{BOLD}{YELLOW}{header}{RESET} {coll_status}")
print(f"{BLUE}{'─' * term_width}{RESET}")
for row in coll['rows']:
online_name = row['online'] or ''
export_name = row['export'] or ''
# Add folder indicator
if row['is_folder'] and online_name:
online_name = f"📁 {online_name}"
# Truncate if needed
if len(online_name) > col_width - 1:
online_name = online_name[:col_width-4] + '...'
if len(export_name) > col_width - 1:
export_name = export_name[:col_width-4] + '...'
# Status and colors
if row['status'] == 'match':
# Freshness indicator
if row.get('freshness') == 'stale':
freshness = f"{YELLOW}●{RESET}"
else:
freshness = f"{GREEN}●{RESET}"
status = f"{GREEN}✓{RESET}{freshness}"
left = f"{online_name}"
right = f"{export_name}"
elif row['status'] == 'missing':
status = f"{RED}✗{RESET} "
left = f"{RED}{online_name}{RESET}"
right = f"{DIM}---{RESET}"
else: # extra
status = f"{YELLOW}+{RESET} "
left = f"{DIM}---{RESET}"
right = f"{YELLOW}{export_name}{RESET}"
# Calculate visible width (without ANSI codes)
def visible_len(s):
return len(re.sub(r'\033\[[0-9;]*m', '', s))
left_pad = col_width - visible_len(left)
right_pad = col_width - visible_len(right)
print(f" {left}{' ' * max(0, left_pad)} {status} {right}")
# Summary
print(f"\n{BLUE}{'═' * term_width}{RESET}")
print(f"{BOLD}SUMMARY:{RESET}")
print(f" Online: {total_online} documents")
print(f" Exported: {total_export} documents")
print(f" {GREEN}✓● Matched & current: {total_matched - total_stale}{RESET}")
if total_stale > 0:
print(f" {YELLOW}✓● Matched but stale: {total_stale} (export older than online){RESET}")
if total_missing > 0:
print(f" {RED}✗ Missing: {total_missing} (online but not exported){RESET}")
if total_extra > 0:
print(f" {YELLOW}+ Extra: {total_extra} (exported but not online){RESET}")
if total_missing == 0 and total_extra == 0 and total_stale == 0:
print(f"\n{GREEN}✓ All documents exported and current!{RESET}")
elif total_missing == 0 and total_extra == 0:
print(f"\n{YELLOW}⚠ All documents exported but {total_stale} are stale{RESET}")
print()
def get_latest_changes(api_url, api_token, limit=3):
"""Fetch the most recently updated documents."""
headers = {
"Authorization": f"Bearer {api_token}",
"Content-Type": "application/json"
}
response = requests.post(
f"{api_url}/api/documents.list",
headers=headers,
json={
"sort": "updatedAt",
"direction": "DESC",
"limit": limit
}
)
docs = response.json().get("data", [])
result = []
for doc in docs:
# Get collection name
coll_id = doc.get("collectionId")
coll_name = "Unknown"
if coll_id:
coll_response = requests.post(
f"{api_url}/api/collections.info",
headers=headers,
json={"id": coll_id}
)
coll_data = coll_response.json().get("data", {})
coll_name = coll_data.get("name", "Unknown")
result.append({
'title': doc.get("title", "Untitled"),
'collection': coll_name,
'updatedAt': doc.get("updatedAt"),
'normalized': normalize_filename(doc.get("title", "Untitled"))
})
return result
def find_export_file(export_dir, collection, normalized_title):
"""Find the exported file matching the document."""
export_path = Path(export_dir)
# Try exact collection match first
coll_dir = export_path / collection
if coll_dir.exists():
for md_file in coll_dir.glob("*.md"):
if normalize_filename(md_file.stem) == normalized_title:
return md_file
# Try all collections (in case of name mismatch)
for coll_dir in export_path.iterdir():
if coll_dir.is_dir():
for md_file in coll_dir.glob("*.md"):
if normalize_filename(md_file.stem) == normalized_title:
return md_file
return None
def print_latest_changes(latest_docs, export_dir):
"""Print the latest changes section."""
term_width = get_terminal_width()
from datetime import datetime
import os
print(f"\n{BLUE}{'═' * term_width}{RESET}")
print(f"{BOLD}{CYAN}LATEST CHANGES (verify actuality){RESET}")
print(f"{BLUE}{'─' * term_width}{RESET}")
for i, doc in enumerate(latest_docs, 1):
title = doc['title']
collection = doc['collection']
updated_at = doc['updatedAt']
# Parse online timestamp
if updated_at:
# Handle ISO format with timezone
online_dt = datetime.fromisoformat(updated_at.replace('Z', '+00:00'))
online_str = online_dt.strftime("%Y-%m-%d %H:%M:%S")
else:
online_str = "Unknown"
# Find export file
export_file = find_export_file(export_dir, collection, doc['normalized'])
if export_file and export_file.exists():
export_mtime = os.path.getmtime(export_file)
export_dt = datetime.fromtimestamp(export_mtime)
export_str = export_dt.strftime("%Y-%m-%d %H:%M:%S")
# Compare (export should be same time or newer)
if updated_at:
# Convert online to local timestamp for comparison
online_ts = online_dt.timestamp()
if export_mtime >= online_ts - 60: # Allow 60s tolerance
status = f"{GREEN}✓{RESET}"
else:
status = f"{YELLOW}⚠ older{RESET}"
else:
status = f"{GREEN}✓{RESET}"
else:
export_str = "NOT FOUND"
status = f"{RED}✗{RESET}"
# Print entry
print(f"\n {BOLD}{i}. {title}{RESET}")
print(f" {DIM}Collection:{RESET} {collection}")
print(f" {DIM}Online:{RESET} {online_str}")
print(f" {DIM}Exported:{RESET} {export_str} {status}")
print(f"\n{BLUE}{'═' * term_width}{RESET}")
def main():
if len(sys.argv) != 4:
print("Usage: script.py <API_URL> <API_TOKEN> <EXPORT_DIR>")
sys.exit(1)
api_url = sys.argv[1]
api_token = sys.argv[2]
export_dir = sys.argv[3]
# Get documents from both sources
online_docs = get_online_docs(api_url, api_token)
export_docs = get_export_docs(export_dir)
# Match and compare
comparison = match_and_compare(online_docs, export_docs)
# Print results
print_comparison(comparison)
# Get and print latest changes
latest_docs = get_latest_changes(api_url, api_token, limit=3)
print_latest_changes(latest_docs, export_dir)
if __name__ == "__main__":
main()
PYTHON_SCRIPT
# Run the side-by-side tree comparison (use /work/outline_export as container path)
docker run --rm --network domnet \
--user "$(id -u):$(id -g)" \
-e HOME=/tmp \
-v "$WORK_DIR:/work" \
-w /work \
python:3.11-slim \
bash -c "pip install -qqq requests 2>/dev/null && python3 /work/.tree_compare.py '$API_URL' '$API_TOKEN' '/work/outline_export'"
# Cleanup
rm -f "$WORK_DIR/.tree_compare.py"
echo ""