- export_with_trees.sh: Bash wrapper for Outline export - outline_export_fixed.py: Python export implementation - IMPORT_SCRIPT.MD: PRD for import script (to be built) - RALPH_PROMPT.md: Ralph Loop prompt for building import script - CLAUDE.md: Project documentation Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
530 lines
18 KiB
Bash
Executable File
530 lines
18 KiB
Bash
Executable File
#!/bin/bash
|
|
#
|
|
# Outline Export Script with Tree Visualization
|
|
# Exports all Outline documents with full hierarchy and shows side-by-side tree comparison
|
|
#
|
|
# Usage: ./export_with_trees.sh [OPTIONS]
|
|
# Options are passed through to the Python script (--dry-run, -v, etc.)
|
|
#
|
|
|
|
set -e # Exit on error
|
|
|
|
# Capture CLI arguments to pass to Python
|
|
CLI_ARGS="$@"
|
|
|
|
# Colors for output
|
|
GREEN='\033[0;32m'
|
|
BLUE='\033[0;34m'
|
|
YELLOW='\033[1;33m'
|
|
RED='\033[0;31m'
|
|
NC='\033[0m' # No Color
|
|
|
|
# Configuration
|
|
WORK_DIR="$(pwd)"
|
|
SETTINGS_FILE="$WORK_DIR/settings.json"
|
|
EXPORT_DIR="$WORK_DIR/outline_export"
|
|
|
|
echo -e "${BLUE}════════════════════════════════════════════════════════════${NC}"
|
|
echo -e "${BLUE} OUTLINE EXPORT${NC}"
|
|
echo -e "${BLUE}════════════════════════════════════════════════════════════${NC}"
|
|
echo ""
|
|
|
|
# Check if settings.json exists
|
|
if [ ! -f "$SETTINGS_FILE" ]; then
|
|
echo -e "${RED}Error: settings.json not found${NC}"
|
|
exit 1
|
|
fi
|
|
|
|
# Extract API details from settings.json
|
|
API_URL=$(jq -r '.source.url' "$SETTINGS_FILE")
|
|
API_TOKEN=$(jq -r '.source.token' "$SETTINGS_FILE")
|
|
|
|
# Backup old export if it exists
|
|
if [ -d "$EXPORT_DIR" ]; then
|
|
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
|
|
BACKUP_FILE="$WORK_DIR/outline_backup_${TIMESTAMP}.tar.gz"
|
|
echo -e "${YELLOW}Backing up previous export...${NC}"
|
|
tar -czf "$BACKUP_FILE" -C "$WORK_DIR" "outline_export" 2>/dev/null
|
|
echo -e "${GREEN}✓ Backup: $BACKUP_FILE ($(du -sh "$BACKUP_FILE" | cut -f1))${NC}"
|
|
rm -rf "$EXPORT_DIR"
|
|
fi
|
|
|
|
echo -e "${GREEN}Exporting documents...${NC}"
|
|
echo ""
|
|
|
|
# Run the export with CLI arguments (as current user to avoid root-owned files)
|
|
docker run --rm --network domnet \
|
|
--user "$(id -u):$(id -g)" \
|
|
-e HOME=/tmp \
|
|
-v "$WORK_DIR:/work" \
|
|
-w /work \
|
|
python:3.11-slim \
|
|
bash -c "pip install -qqq requests tqdm 2>/dev/null && python3 outline_export_fixed.py $CLI_ARGS"
|
|
|
|
echo ""
|
|
|
|
# Create Python script for side-by-side tree comparison
|
|
cat > "$WORK_DIR/.tree_compare.py" << 'PYTHON_SCRIPT'
|
|
#!/usr/bin/env python3
|
|
"""
|
|
Side-by-side comparison of Outline online vs exported files.
|
|
Matches documents row by row and highlights differences.
|
|
"""
|
|
import sys
|
|
import re
|
|
import shutil
|
|
import requests
|
|
from pathlib import Path
|
|
|
|
# Colors
|
|
GREEN = '\033[0;32m'
|
|
RED = '\033[0;31m'
|
|
YELLOW = '\033[1;33m'
|
|
BLUE = '\033[0;34m'
|
|
CYAN = '\033[0;36m'
|
|
BOLD = '\033[1m'
|
|
DIM = '\033[2m'
|
|
RESET = '\033[0m'
|
|
|
|
def get_terminal_width():
|
|
try:
|
|
return shutil.get_terminal_size().columns
|
|
except:
|
|
return 120
|
|
|
|
def normalize_filename(name):
|
|
"""Normalize a name for comparison (handles / -> _ conversion etc)."""
|
|
# Replace characters that filesystems don't allow
|
|
normalized = name.replace('/', '_').replace('\\', '_')
|
|
normalized = normalized.replace(':', '_').replace('*', '_')
|
|
normalized = normalized.replace('?', '_').replace('"', '_')
|
|
normalized = normalized.replace('<', '_').replace('>', '_')
|
|
normalized = normalized.replace('|', '_')
|
|
return normalized.strip()
|
|
|
|
def get_online_docs(api_url, api_token):
|
|
"""Fetch all documents from Outline API, organized by collection."""
|
|
headers = {
|
|
"Authorization": f"Bearer {api_token}",
|
|
"Content-Type": "application/json"
|
|
}
|
|
|
|
response = requests.post(f"{api_url}/api/collections.list", headers=headers, json={})
|
|
collections = response.json().get("data", [])
|
|
collections = sorted(collections, key=lambda c: c.get('name', ''))
|
|
|
|
# Build collection ID to name mapping
|
|
coll_id_to_name = {c['id']: c['name'] for c in collections}
|
|
|
|
# Fetch all documents with timestamps using documents.list
|
|
all_docs_response = requests.post(
|
|
f"{api_url}/api/documents.list",
|
|
headers=headers,
|
|
json={"limit": 1000} # Get all docs
|
|
)
|
|
all_docs = all_docs_response.json().get("data", [])
|
|
|
|
# Create timestamp lookup by (collection_name, normalized_title)
|
|
timestamp_lookup = {}
|
|
for doc in all_docs:
|
|
coll_id = doc.get("collectionId")
|
|
coll_name = coll_id_to_name.get(coll_id, "Unknown")
|
|
title = doc.get("title", "Untitled")
|
|
norm_title = normalize_filename(title)
|
|
timestamp_lookup[(coll_name, norm_title)] = doc.get("updatedAt")
|
|
|
|
result = {}
|
|
|
|
for coll in collections:
|
|
coll_name = coll['name']
|
|
result[coll_name] = []
|
|
|
|
# Get navigation tree
|
|
nav_response = requests.post(
|
|
f"{api_url}/api/collections.documents",
|
|
headers=headers,
|
|
json={"id": coll["id"]}
|
|
)
|
|
nav_tree = nav_response.json().get("data", [])
|
|
|
|
def collect_docs(nodes):
|
|
docs = []
|
|
for node in nodes:
|
|
title = node.get("title", "Untitled")
|
|
norm_title = normalize_filename(title)
|
|
has_children = len(node.get("children", [])) > 0
|
|
updated_at = timestamp_lookup.get((coll_name, norm_title))
|
|
docs.append({
|
|
'title': title,
|
|
'normalized': norm_title,
|
|
'has_children': has_children,
|
|
'updatedAt': updated_at
|
|
})
|
|
if has_children:
|
|
docs.extend(collect_docs(node.get("children", [])))
|
|
return docs
|
|
|
|
result[coll_name] = collect_docs(nav_tree)
|
|
|
|
return result
|
|
|
|
def get_export_docs(export_dir):
|
|
"""Get all exported documents, organized by collection."""
|
|
import os
|
|
export_path = Path(export_dir)
|
|
result = {}
|
|
|
|
if not export_path.exists():
|
|
return result
|
|
|
|
for coll_dir in sorted(export_path.iterdir()):
|
|
if coll_dir.is_dir():
|
|
coll_name = coll_dir.name
|
|
docs = []
|
|
for md_file in sorted(coll_dir.glob("*.md")):
|
|
title = md_file.stem
|
|
if title: # Skip empty filenames
|
|
mtime = os.path.getmtime(md_file)
|
|
docs.append({
|
|
'title': title,
|
|
'normalized': normalize_filename(title),
|
|
'path': md_file,
|
|
'mtime': mtime
|
|
})
|
|
result[coll_name] = docs
|
|
|
|
return result
|
|
|
|
def match_and_compare(online_docs, export_docs):
|
|
"""Match online and export docs, return comparison data per collection."""
|
|
from datetime import datetime
|
|
|
|
all_collections = sorted(set(online_docs.keys()) | set(export_docs.keys()))
|
|
comparison = []
|
|
|
|
for coll_name in all_collections:
|
|
online_list = online_docs.get(coll_name, [])
|
|
export_list = export_docs.get(coll_name, [])
|
|
|
|
# Create lookup by normalized name
|
|
export_lookup = {d['normalized']: d for d in export_list}
|
|
online_lookup = {d['normalized']: d for d in online_list}
|
|
|
|
rows = []
|
|
matched_export = set()
|
|
|
|
# First pass: match online docs to export
|
|
for doc in sorted(online_list, key=lambda d: d['title'].lower()):
|
|
norm = doc['normalized']
|
|
if norm in export_lookup:
|
|
export_doc = export_lookup[norm]
|
|
# Check freshness
|
|
freshness = 'current' # default
|
|
if doc.get('updatedAt') and export_doc.get('mtime'):
|
|
online_dt = datetime.fromisoformat(doc['updatedAt'].replace('Z', '+00:00'))
|
|
online_ts = online_dt.timestamp()
|
|
export_ts = export_doc['mtime']
|
|
# Allow 60s tolerance
|
|
if export_ts < online_ts - 60:
|
|
freshness = 'stale'
|
|
rows.append({
|
|
'online': doc['title'],
|
|
'export': export_doc['title'],
|
|
'status': 'match',
|
|
'is_folder': doc['has_children'],
|
|
'freshness': freshness
|
|
})
|
|
matched_export.add(norm)
|
|
else:
|
|
rows.append({
|
|
'online': doc['title'],
|
|
'export': None,
|
|
'status': 'missing',
|
|
'is_folder': doc['has_children'],
|
|
'freshness': None
|
|
})
|
|
|
|
# Second pass: find extra export docs
|
|
for doc in sorted(export_list, key=lambda d: d['title'].lower()):
|
|
if doc['normalized'] not in matched_export:
|
|
rows.append({
|
|
'online': None,
|
|
'export': doc['title'],
|
|
'status': 'extra',
|
|
'is_folder': False,
|
|
'freshness': None
|
|
})
|
|
|
|
# Sort rows: matched first, then missing, then extra
|
|
rows.sort(key=lambda r: (
|
|
0 if r['status'] == 'match' else (1 if r['status'] == 'missing' else 2),
|
|
(r['online'] or r['export'] or '').lower()
|
|
))
|
|
|
|
comparison.append({
|
|
'collection': coll_name,
|
|
'rows': rows,
|
|
'online_count': len(online_list),
|
|
'export_count': len(export_list)
|
|
})
|
|
|
|
return comparison
|
|
|
|
def print_comparison(comparison):
|
|
"""Print the side-by-side comparison with status indicators."""
|
|
term_width = get_terminal_width()
|
|
col_width = (term_width - 10) // 2 # -10 for separators and status icons
|
|
|
|
total_online = 0
|
|
total_export = 0
|
|
total_matched = 0
|
|
total_missing = 0
|
|
total_extra = 0
|
|
total_stale = 0
|
|
|
|
print(f"\n{BLUE}{'═' * term_width}{RESET}")
|
|
print(f"{BOLD}{CYAN}{'ONLINE':<{col_width}} {'':5} {'EXPORTED':<{col_width}}{RESET}")
|
|
print(f"{BLUE}{'═' * term_width}{RESET}")
|
|
|
|
for coll in comparison:
|
|
total_online += coll['online_count']
|
|
total_export += coll['export_count']
|
|
|
|
# Collection header
|
|
coll_matched = sum(1 for r in coll['rows'] if r['status'] == 'match')
|
|
coll_missing = sum(1 for r in coll['rows'] if r['status'] == 'missing')
|
|
coll_extra = sum(1 for r in coll['rows'] if r['status'] == 'extra')
|
|
coll_stale = sum(1 for r in coll['rows'] if r.get('freshness') == 'stale')
|
|
|
|
total_matched += coll_matched
|
|
total_missing += coll_missing
|
|
total_extra += coll_extra
|
|
total_stale += coll_stale
|
|
|
|
if coll_missing == 0 and coll_extra == 0:
|
|
coll_status = f"{GREEN}✓{RESET}"
|
|
else:
|
|
coll_status = f"{RED}✗{RESET}"
|
|
|
|
header = f"{coll['collection']}/ ({coll['online_count']} → {coll['export_count']})"
|
|
print(f"\n{BOLD}{YELLOW}{header}{RESET} {coll_status}")
|
|
print(f"{BLUE}{'─' * term_width}{RESET}")
|
|
|
|
for row in coll['rows']:
|
|
online_name = row['online'] or ''
|
|
export_name = row['export'] or ''
|
|
|
|
# Add folder indicator
|
|
if row['is_folder'] and online_name:
|
|
online_name = f"📁 {online_name}"
|
|
|
|
# Truncate if needed
|
|
if len(online_name) > col_width - 1:
|
|
online_name = online_name[:col_width-4] + '...'
|
|
if len(export_name) > col_width - 1:
|
|
export_name = export_name[:col_width-4] + '...'
|
|
|
|
# Status and colors
|
|
if row['status'] == 'match':
|
|
# Freshness indicator
|
|
if row.get('freshness') == 'stale':
|
|
freshness = f"{YELLOW}●{RESET}"
|
|
else:
|
|
freshness = f"{GREEN}●{RESET}"
|
|
status = f"{GREEN}✓{RESET}{freshness}"
|
|
left = f"{online_name}"
|
|
right = f"{export_name}"
|
|
elif row['status'] == 'missing':
|
|
status = f"{RED}✗{RESET} "
|
|
left = f"{RED}{online_name}{RESET}"
|
|
right = f"{DIM}---{RESET}"
|
|
else: # extra
|
|
status = f"{YELLOW}+{RESET} "
|
|
left = f"{DIM}---{RESET}"
|
|
right = f"{YELLOW}{export_name}{RESET}"
|
|
|
|
# Calculate visible width (without ANSI codes)
|
|
def visible_len(s):
|
|
return len(re.sub(r'\033\[[0-9;]*m', '', s))
|
|
|
|
left_pad = col_width - visible_len(left)
|
|
right_pad = col_width - visible_len(right)
|
|
|
|
print(f" {left}{' ' * max(0, left_pad)} {status} {right}")
|
|
|
|
# Summary
|
|
print(f"\n{BLUE}{'═' * term_width}{RESET}")
|
|
print(f"{BOLD}SUMMARY:{RESET}")
|
|
print(f" Online: {total_online} documents")
|
|
print(f" Exported: {total_export} documents")
|
|
print(f" {GREEN}✓● Matched & current: {total_matched - total_stale}{RESET}")
|
|
|
|
if total_stale > 0:
|
|
print(f" {YELLOW}✓● Matched but stale: {total_stale} (export older than online){RESET}")
|
|
if total_missing > 0:
|
|
print(f" {RED}✗ Missing: {total_missing} (online but not exported){RESET}")
|
|
if total_extra > 0:
|
|
print(f" {YELLOW}+ Extra: {total_extra} (exported but not online){RESET}")
|
|
|
|
if total_missing == 0 and total_extra == 0 and total_stale == 0:
|
|
print(f"\n{GREEN}✓ All documents exported and current!{RESET}")
|
|
elif total_missing == 0 and total_extra == 0:
|
|
print(f"\n{YELLOW}⚠ All documents exported but {total_stale} are stale{RESET}")
|
|
print()
|
|
|
|
def get_latest_changes(api_url, api_token, limit=3):
|
|
"""Fetch the most recently updated documents."""
|
|
headers = {
|
|
"Authorization": f"Bearer {api_token}",
|
|
"Content-Type": "application/json"
|
|
}
|
|
|
|
response = requests.post(
|
|
f"{api_url}/api/documents.list",
|
|
headers=headers,
|
|
json={
|
|
"sort": "updatedAt",
|
|
"direction": "DESC",
|
|
"limit": limit
|
|
}
|
|
)
|
|
|
|
docs = response.json().get("data", [])
|
|
result = []
|
|
|
|
for doc in docs:
|
|
# Get collection name
|
|
coll_id = doc.get("collectionId")
|
|
coll_name = "Unknown"
|
|
if coll_id:
|
|
coll_response = requests.post(
|
|
f"{api_url}/api/collections.info",
|
|
headers=headers,
|
|
json={"id": coll_id}
|
|
)
|
|
coll_data = coll_response.json().get("data", {})
|
|
coll_name = coll_data.get("name", "Unknown")
|
|
|
|
result.append({
|
|
'title': doc.get("title", "Untitled"),
|
|
'collection': coll_name,
|
|
'updatedAt': doc.get("updatedAt"),
|
|
'normalized': normalize_filename(doc.get("title", "Untitled"))
|
|
})
|
|
|
|
return result
|
|
|
|
def find_export_file(export_dir, collection, normalized_title):
|
|
"""Find the exported file matching the document."""
|
|
export_path = Path(export_dir)
|
|
|
|
# Try exact collection match first
|
|
coll_dir = export_path / collection
|
|
if coll_dir.exists():
|
|
for md_file in coll_dir.glob("*.md"):
|
|
if normalize_filename(md_file.stem) == normalized_title:
|
|
return md_file
|
|
|
|
# Try all collections (in case of name mismatch)
|
|
for coll_dir in export_path.iterdir():
|
|
if coll_dir.is_dir():
|
|
for md_file in coll_dir.glob("*.md"):
|
|
if normalize_filename(md_file.stem) == normalized_title:
|
|
return md_file
|
|
|
|
return None
|
|
|
|
def print_latest_changes(latest_docs, export_dir):
|
|
"""Print the latest changes section."""
|
|
term_width = get_terminal_width()
|
|
from datetime import datetime
|
|
import os
|
|
|
|
print(f"\n{BLUE}{'═' * term_width}{RESET}")
|
|
print(f"{BOLD}{CYAN}LATEST CHANGES (verify actuality){RESET}")
|
|
print(f"{BLUE}{'─' * term_width}{RESET}")
|
|
|
|
for i, doc in enumerate(latest_docs, 1):
|
|
title = doc['title']
|
|
collection = doc['collection']
|
|
updated_at = doc['updatedAt']
|
|
|
|
# Parse online timestamp
|
|
if updated_at:
|
|
# Handle ISO format with timezone
|
|
online_dt = datetime.fromisoformat(updated_at.replace('Z', '+00:00'))
|
|
online_str = online_dt.strftime("%Y-%m-%d %H:%M:%S")
|
|
else:
|
|
online_str = "Unknown"
|
|
|
|
# Find export file
|
|
export_file = find_export_file(export_dir, collection, doc['normalized'])
|
|
|
|
if export_file and export_file.exists():
|
|
export_mtime = os.path.getmtime(export_file)
|
|
export_dt = datetime.fromtimestamp(export_mtime)
|
|
export_str = export_dt.strftime("%Y-%m-%d %H:%M:%S")
|
|
|
|
# Compare (export should be same time or newer)
|
|
if updated_at:
|
|
# Convert online to local timestamp for comparison
|
|
online_ts = online_dt.timestamp()
|
|
if export_mtime >= online_ts - 60: # Allow 60s tolerance
|
|
status = f"{GREEN}✓{RESET}"
|
|
else:
|
|
status = f"{YELLOW}⚠ older{RESET}"
|
|
else:
|
|
status = f"{GREEN}✓{RESET}"
|
|
else:
|
|
export_str = "NOT FOUND"
|
|
status = f"{RED}✗{RESET}"
|
|
|
|
# Print entry
|
|
print(f"\n {BOLD}{i}. {title}{RESET}")
|
|
print(f" {DIM}Collection:{RESET} {collection}")
|
|
print(f" {DIM}Online:{RESET} {online_str}")
|
|
print(f" {DIM}Exported:{RESET} {export_str} {status}")
|
|
|
|
print(f"\n{BLUE}{'═' * term_width}{RESET}")
|
|
|
|
def main():
|
|
if len(sys.argv) != 4:
|
|
print("Usage: script.py <API_URL> <API_TOKEN> <EXPORT_DIR>")
|
|
sys.exit(1)
|
|
|
|
api_url = sys.argv[1]
|
|
api_token = sys.argv[2]
|
|
export_dir = sys.argv[3]
|
|
|
|
# Get documents from both sources
|
|
online_docs = get_online_docs(api_url, api_token)
|
|
export_docs = get_export_docs(export_dir)
|
|
|
|
# Match and compare
|
|
comparison = match_and_compare(online_docs, export_docs)
|
|
|
|
# Print results
|
|
print_comparison(comparison)
|
|
|
|
# Get and print latest changes
|
|
latest_docs = get_latest_changes(api_url, api_token, limit=3)
|
|
print_latest_changes(latest_docs, export_dir)
|
|
|
|
if __name__ == "__main__":
|
|
main()
|
|
PYTHON_SCRIPT
|
|
|
|
# Run the side-by-side tree comparison (use /work/outline_export as container path)
|
|
docker run --rm --network domnet \
|
|
--user "$(id -u):$(id -g)" \
|
|
-e HOME=/tmp \
|
|
-v "$WORK_DIR:/work" \
|
|
-w /work \
|
|
python:3.11-slim \
|
|
bash -c "pip install -qqq requests 2>/dev/null && python3 /work/.tree_compare.py '$API_URL' '$API_TOKEN' '/work/outline_export'"
|
|
|
|
# Cleanup
|
|
rm -f "$WORK_DIR/.tree_compare.py"
|
|
|
|
echo ""
|