Initial commit: Export tools and import script requirements
- export_with_trees.sh: Bash wrapper for Outline export - outline_export_fixed.py: Python export implementation - IMPORT_SCRIPT.MD: PRD for import script (to be built) - RALPH_PROMPT.md: Ralph Loop prompt for building import script - CLAUDE.md: Project documentation Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
529
export_with_trees.sh
Executable file
529
export_with_trees.sh
Executable file
@@ -0,0 +1,529 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Outline Export Script with Tree Visualization
|
||||
# Exports all Outline documents with full hierarchy and shows side-by-side tree comparison
|
||||
#
|
||||
# Usage: ./export_with_trees.sh [OPTIONS]
|
||||
# Options are passed through to the Python script (--dry-run, -v, etc.)
|
||||
#
|
||||
|
||||
set -e # Exit on error
|
||||
|
||||
# Capture CLI arguments to pass to Python
|
||||
CLI_ARGS="$@"
|
||||
|
||||
# Colors for output
|
||||
GREEN='\033[0;32m'
|
||||
BLUE='\033[0;34m'
|
||||
YELLOW='\033[1;33m'
|
||||
RED='\033[0;31m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
# Configuration
|
||||
WORK_DIR="$(pwd)"
|
||||
SETTINGS_FILE="$WORK_DIR/settings.json"
|
||||
EXPORT_DIR="$WORK_DIR/outline_export"
|
||||
|
||||
echo -e "${BLUE}════════════════════════════════════════════════════════════${NC}"
|
||||
echo -e "${BLUE} OUTLINE EXPORT${NC}"
|
||||
echo -e "${BLUE}════════════════════════════════════════════════════════════${NC}"
|
||||
echo ""
|
||||
|
||||
# Check if settings.json exists
|
||||
if [ ! -f "$SETTINGS_FILE" ]; then
|
||||
echo -e "${RED}Error: settings.json not found${NC}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Extract API details from settings.json
|
||||
API_URL=$(jq -r '.source.url' "$SETTINGS_FILE")
|
||||
API_TOKEN=$(jq -r '.source.token' "$SETTINGS_FILE")
|
||||
|
||||
# Backup old export if it exists
|
||||
if [ -d "$EXPORT_DIR" ]; then
|
||||
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
|
||||
BACKUP_FILE="$WORK_DIR/outline_backup_${TIMESTAMP}.tar.gz"
|
||||
echo -e "${YELLOW}Backing up previous export...${NC}"
|
||||
tar -czf "$BACKUP_FILE" -C "$WORK_DIR" "outline_export" 2>/dev/null
|
||||
echo -e "${GREEN}✓ Backup: $BACKUP_FILE ($(du -sh "$BACKUP_FILE" | cut -f1))${NC}"
|
||||
rm -rf "$EXPORT_DIR"
|
||||
fi
|
||||
|
||||
echo -e "${GREEN}Exporting documents...${NC}"
|
||||
echo ""
|
||||
|
||||
# Run the export with CLI arguments (as current user to avoid root-owned files)
|
||||
docker run --rm --network domnet \
|
||||
--user "$(id -u):$(id -g)" \
|
||||
-e HOME=/tmp \
|
||||
-v "$WORK_DIR:/work" \
|
||||
-w /work \
|
||||
python:3.11-slim \
|
||||
bash -c "pip install -qqq requests tqdm 2>/dev/null && python3 outline_export_fixed.py $CLI_ARGS"
|
||||
|
||||
echo ""
|
||||
|
||||
# Create Python script for side-by-side tree comparison
|
||||
cat > "$WORK_DIR/.tree_compare.py" << 'PYTHON_SCRIPT'
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Side-by-side comparison of Outline online vs exported files.
|
||||
Matches documents row by row and highlights differences.
|
||||
"""
|
||||
import sys
|
||||
import re
|
||||
import shutil
|
||||
import requests
|
||||
from pathlib import Path
|
||||
|
||||
# Colors
|
||||
GREEN = '\033[0;32m'
|
||||
RED = '\033[0;31m'
|
||||
YELLOW = '\033[1;33m'
|
||||
BLUE = '\033[0;34m'
|
||||
CYAN = '\033[0;36m'
|
||||
BOLD = '\033[1m'
|
||||
DIM = '\033[2m'
|
||||
RESET = '\033[0m'
|
||||
|
||||
def get_terminal_width():
|
||||
try:
|
||||
return shutil.get_terminal_size().columns
|
||||
except:
|
||||
return 120
|
||||
|
||||
def normalize_filename(name):
|
||||
"""Normalize a name for comparison (handles / -> _ conversion etc)."""
|
||||
# Replace characters that filesystems don't allow
|
||||
normalized = name.replace('/', '_').replace('\\', '_')
|
||||
normalized = normalized.replace(':', '_').replace('*', '_')
|
||||
normalized = normalized.replace('?', '_').replace('"', '_')
|
||||
normalized = normalized.replace('<', '_').replace('>', '_')
|
||||
normalized = normalized.replace('|', '_')
|
||||
return normalized.strip()
|
||||
|
||||
def get_online_docs(api_url, api_token):
|
||||
"""Fetch all documents from Outline API, organized by collection."""
|
||||
headers = {
|
||||
"Authorization": f"Bearer {api_token}",
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
|
||||
response = requests.post(f"{api_url}/api/collections.list", headers=headers, json={})
|
||||
collections = response.json().get("data", [])
|
||||
collections = sorted(collections, key=lambda c: c.get('name', ''))
|
||||
|
||||
# Build collection ID to name mapping
|
||||
coll_id_to_name = {c['id']: c['name'] for c in collections}
|
||||
|
||||
# Fetch all documents with timestamps using documents.list
|
||||
all_docs_response = requests.post(
|
||||
f"{api_url}/api/documents.list",
|
||||
headers=headers,
|
||||
json={"limit": 1000} # Get all docs
|
||||
)
|
||||
all_docs = all_docs_response.json().get("data", [])
|
||||
|
||||
# Create timestamp lookup by (collection_name, normalized_title)
|
||||
timestamp_lookup = {}
|
||||
for doc in all_docs:
|
||||
coll_id = doc.get("collectionId")
|
||||
coll_name = coll_id_to_name.get(coll_id, "Unknown")
|
||||
title = doc.get("title", "Untitled")
|
||||
norm_title = normalize_filename(title)
|
||||
timestamp_lookup[(coll_name, norm_title)] = doc.get("updatedAt")
|
||||
|
||||
result = {}
|
||||
|
||||
for coll in collections:
|
||||
coll_name = coll['name']
|
||||
result[coll_name] = []
|
||||
|
||||
# Get navigation tree
|
||||
nav_response = requests.post(
|
||||
f"{api_url}/api/collections.documents",
|
||||
headers=headers,
|
||||
json={"id": coll["id"]}
|
||||
)
|
||||
nav_tree = nav_response.json().get("data", [])
|
||||
|
||||
def collect_docs(nodes):
|
||||
docs = []
|
||||
for node in nodes:
|
||||
title = node.get("title", "Untitled")
|
||||
norm_title = normalize_filename(title)
|
||||
has_children = len(node.get("children", [])) > 0
|
||||
updated_at = timestamp_lookup.get((coll_name, norm_title))
|
||||
docs.append({
|
||||
'title': title,
|
||||
'normalized': norm_title,
|
||||
'has_children': has_children,
|
||||
'updatedAt': updated_at
|
||||
})
|
||||
if has_children:
|
||||
docs.extend(collect_docs(node.get("children", [])))
|
||||
return docs
|
||||
|
||||
result[coll_name] = collect_docs(nav_tree)
|
||||
|
||||
return result
|
||||
|
||||
def get_export_docs(export_dir):
|
||||
"""Get all exported documents, organized by collection."""
|
||||
import os
|
||||
export_path = Path(export_dir)
|
||||
result = {}
|
||||
|
||||
if not export_path.exists():
|
||||
return result
|
||||
|
||||
for coll_dir in sorted(export_path.iterdir()):
|
||||
if coll_dir.is_dir():
|
||||
coll_name = coll_dir.name
|
||||
docs = []
|
||||
for md_file in sorted(coll_dir.glob("*.md")):
|
||||
title = md_file.stem
|
||||
if title: # Skip empty filenames
|
||||
mtime = os.path.getmtime(md_file)
|
||||
docs.append({
|
||||
'title': title,
|
||||
'normalized': normalize_filename(title),
|
||||
'path': md_file,
|
||||
'mtime': mtime
|
||||
})
|
||||
result[coll_name] = docs
|
||||
|
||||
return result
|
||||
|
||||
def match_and_compare(online_docs, export_docs):
|
||||
"""Match online and export docs, return comparison data per collection."""
|
||||
from datetime import datetime
|
||||
|
||||
all_collections = sorted(set(online_docs.keys()) | set(export_docs.keys()))
|
||||
comparison = []
|
||||
|
||||
for coll_name in all_collections:
|
||||
online_list = online_docs.get(coll_name, [])
|
||||
export_list = export_docs.get(coll_name, [])
|
||||
|
||||
# Create lookup by normalized name
|
||||
export_lookup = {d['normalized']: d for d in export_list}
|
||||
online_lookup = {d['normalized']: d for d in online_list}
|
||||
|
||||
rows = []
|
||||
matched_export = set()
|
||||
|
||||
# First pass: match online docs to export
|
||||
for doc in sorted(online_list, key=lambda d: d['title'].lower()):
|
||||
norm = doc['normalized']
|
||||
if norm in export_lookup:
|
||||
export_doc = export_lookup[norm]
|
||||
# Check freshness
|
||||
freshness = 'current' # default
|
||||
if doc.get('updatedAt') and export_doc.get('mtime'):
|
||||
online_dt = datetime.fromisoformat(doc['updatedAt'].replace('Z', '+00:00'))
|
||||
online_ts = online_dt.timestamp()
|
||||
export_ts = export_doc['mtime']
|
||||
# Allow 60s tolerance
|
||||
if export_ts < online_ts - 60:
|
||||
freshness = 'stale'
|
||||
rows.append({
|
||||
'online': doc['title'],
|
||||
'export': export_doc['title'],
|
||||
'status': 'match',
|
||||
'is_folder': doc['has_children'],
|
||||
'freshness': freshness
|
||||
})
|
||||
matched_export.add(norm)
|
||||
else:
|
||||
rows.append({
|
||||
'online': doc['title'],
|
||||
'export': None,
|
||||
'status': 'missing',
|
||||
'is_folder': doc['has_children'],
|
||||
'freshness': None
|
||||
})
|
||||
|
||||
# Second pass: find extra export docs
|
||||
for doc in sorted(export_list, key=lambda d: d['title'].lower()):
|
||||
if doc['normalized'] not in matched_export:
|
||||
rows.append({
|
||||
'online': None,
|
||||
'export': doc['title'],
|
||||
'status': 'extra',
|
||||
'is_folder': False,
|
||||
'freshness': None
|
||||
})
|
||||
|
||||
# Sort rows: matched first, then missing, then extra
|
||||
rows.sort(key=lambda r: (
|
||||
0 if r['status'] == 'match' else (1 if r['status'] == 'missing' else 2),
|
||||
(r['online'] or r['export'] or '').lower()
|
||||
))
|
||||
|
||||
comparison.append({
|
||||
'collection': coll_name,
|
||||
'rows': rows,
|
||||
'online_count': len(online_list),
|
||||
'export_count': len(export_list)
|
||||
})
|
||||
|
||||
return comparison
|
||||
|
||||
def print_comparison(comparison):
|
||||
"""Print the side-by-side comparison with status indicators."""
|
||||
term_width = get_terminal_width()
|
||||
col_width = (term_width - 10) // 2 # -10 for separators and status icons
|
||||
|
||||
total_online = 0
|
||||
total_export = 0
|
||||
total_matched = 0
|
||||
total_missing = 0
|
||||
total_extra = 0
|
||||
total_stale = 0
|
||||
|
||||
print(f"\n{BLUE}{'═' * term_width}{RESET}")
|
||||
print(f"{BOLD}{CYAN}{'ONLINE':<{col_width}} {'':5} {'EXPORTED':<{col_width}}{RESET}")
|
||||
print(f"{BLUE}{'═' * term_width}{RESET}")
|
||||
|
||||
for coll in comparison:
|
||||
total_online += coll['online_count']
|
||||
total_export += coll['export_count']
|
||||
|
||||
# Collection header
|
||||
coll_matched = sum(1 for r in coll['rows'] if r['status'] == 'match')
|
||||
coll_missing = sum(1 for r in coll['rows'] if r['status'] == 'missing')
|
||||
coll_extra = sum(1 for r in coll['rows'] if r['status'] == 'extra')
|
||||
coll_stale = sum(1 for r in coll['rows'] if r.get('freshness') == 'stale')
|
||||
|
||||
total_matched += coll_matched
|
||||
total_missing += coll_missing
|
||||
total_extra += coll_extra
|
||||
total_stale += coll_stale
|
||||
|
||||
if coll_missing == 0 and coll_extra == 0:
|
||||
coll_status = f"{GREEN}✓{RESET}"
|
||||
else:
|
||||
coll_status = f"{RED}✗{RESET}"
|
||||
|
||||
header = f"{coll['collection']}/ ({coll['online_count']} → {coll['export_count']})"
|
||||
print(f"\n{BOLD}{YELLOW}{header}{RESET} {coll_status}")
|
||||
print(f"{BLUE}{'─' * term_width}{RESET}")
|
||||
|
||||
for row in coll['rows']:
|
||||
online_name = row['online'] or ''
|
||||
export_name = row['export'] or ''
|
||||
|
||||
# Add folder indicator
|
||||
if row['is_folder'] and online_name:
|
||||
online_name = f"📁 {online_name}"
|
||||
|
||||
# Truncate if needed
|
||||
if len(online_name) > col_width - 1:
|
||||
online_name = online_name[:col_width-4] + '...'
|
||||
if len(export_name) > col_width - 1:
|
||||
export_name = export_name[:col_width-4] + '...'
|
||||
|
||||
# Status and colors
|
||||
if row['status'] == 'match':
|
||||
# Freshness indicator
|
||||
if row.get('freshness') == 'stale':
|
||||
freshness = f"{YELLOW}●{RESET}"
|
||||
else:
|
||||
freshness = f"{GREEN}●{RESET}"
|
||||
status = f"{GREEN}✓{RESET}{freshness}"
|
||||
left = f"{online_name}"
|
||||
right = f"{export_name}"
|
||||
elif row['status'] == 'missing':
|
||||
status = f"{RED}✗{RESET} "
|
||||
left = f"{RED}{online_name}{RESET}"
|
||||
right = f"{DIM}---{RESET}"
|
||||
else: # extra
|
||||
status = f"{YELLOW}+{RESET} "
|
||||
left = f"{DIM}---{RESET}"
|
||||
right = f"{YELLOW}{export_name}{RESET}"
|
||||
|
||||
# Calculate visible width (without ANSI codes)
|
||||
def visible_len(s):
|
||||
return len(re.sub(r'\033\[[0-9;]*m', '', s))
|
||||
|
||||
left_pad = col_width - visible_len(left)
|
||||
right_pad = col_width - visible_len(right)
|
||||
|
||||
print(f" {left}{' ' * max(0, left_pad)} {status} {right}")
|
||||
|
||||
# Summary
|
||||
print(f"\n{BLUE}{'═' * term_width}{RESET}")
|
||||
print(f"{BOLD}SUMMARY:{RESET}")
|
||||
print(f" Online: {total_online} documents")
|
||||
print(f" Exported: {total_export} documents")
|
||||
print(f" {GREEN}✓● Matched & current: {total_matched - total_stale}{RESET}")
|
||||
|
||||
if total_stale > 0:
|
||||
print(f" {YELLOW}✓● Matched but stale: {total_stale} (export older than online){RESET}")
|
||||
if total_missing > 0:
|
||||
print(f" {RED}✗ Missing: {total_missing} (online but not exported){RESET}")
|
||||
if total_extra > 0:
|
||||
print(f" {YELLOW}+ Extra: {total_extra} (exported but not online){RESET}")
|
||||
|
||||
if total_missing == 0 and total_extra == 0 and total_stale == 0:
|
||||
print(f"\n{GREEN}✓ All documents exported and current!{RESET}")
|
||||
elif total_missing == 0 and total_extra == 0:
|
||||
print(f"\n{YELLOW}⚠ All documents exported but {total_stale} are stale{RESET}")
|
||||
print()
|
||||
|
||||
def get_latest_changes(api_url, api_token, limit=3):
|
||||
"""Fetch the most recently updated documents."""
|
||||
headers = {
|
||||
"Authorization": f"Bearer {api_token}",
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
|
||||
response = requests.post(
|
||||
f"{api_url}/api/documents.list",
|
||||
headers=headers,
|
||||
json={
|
||||
"sort": "updatedAt",
|
||||
"direction": "DESC",
|
||||
"limit": limit
|
||||
}
|
||||
)
|
||||
|
||||
docs = response.json().get("data", [])
|
||||
result = []
|
||||
|
||||
for doc in docs:
|
||||
# Get collection name
|
||||
coll_id = doc.get("collectionId")
|
||||
coll_name = "Unknown"
|
||||
if coll_id:
|
||||
coll_response = requests.post(
|
||||
f"{api_url}/api/collections.info",
|
||||
headers=headers,
|
||||
json={"id": coll_id}
|
||||
)
|
||||
coll_data = coll_response.json().get("data", {})
|
||||
coll_name = coll_data.get("name", "Unknown")
|
||||
|
||||
result.append({
|
||||
'title': doc.get("title", "Untitled"),
|
||||
'collection': coll_name,
|
||||
'updatedAt': doc.get("updatedAt"),
|
||||
'normalized': normalize_filename(doc.get("title", "Untitled"))
|
||||
})
|
||||
|
||||
return result
|
||||
|
||||
def find_export_file(export_dir, collection, normalized_title):
|
||||
"""Find the exported file matching the document."""
|
||||
export_path = Path(export_dir)
|
||||
|
||||
# Try exact collection match first
|
||||
coll_dir = export_path / collection
|
||||
if coll_dir.exists():
|
||||
for md_file in coll_dir.glob("*.md"):
|
||||
if normalize_filename(md_file.stem) == normalized_title:
|
||||
return md_file
|
||||
|
||||
# Try all collections (in case of name mismatch)
|
||||
for coll_dir in export_path.iterdir():
|
||||
if coll_dir.is_dir():
|
||||
for md_file in coll_dir.glob("*.md"):
|
||||
if normalize_filename(md_file.stem) == normalized_title:
|
||||
return md_file
|
||||
|
||||
return None
|
||||
|
||||
def print_latest_changes(latest_docs, export_dir):
|
||||
"""Print the latest changes section."""
|
||||
term_width = get_terminal_width()
|
||||
from datetime import datetime
|
||||
import os
|
||||
|
||||
print(f"\n{BLUE}{'═' * term_width}{RESET}")
|
||||
print(f"{BOLD}{CYAN}LATEST CHANGES (verify actuality){RESET}")
|
||||
print(f"{BLUE}{'─' * term_width}{RESET}")
|
||||
|
||||
for i, doc in enumerate(latest_docs, 1):
|
||||
title = doc['title']
|
||||
collection = doc['collection']
|
||||
updated_at = doc['updatedAt']
|
||||
|
||||
# Parse online timestamp
|
||||
if updated_at:
|
||||
# Handle ISO format with timezone
|
||||
online_dt = datetime.fromisoformat(updated_at.replace('Z', '+00:00'))
|
||||
online_str = online_dt.strftime("%Y-%m-%d %H:%M:%S")
|
||||
else:
|
||||
online_str = "Unknown"
|
||||
|
||||
# Find export file
|
||||
export_file = find_export_file(export_dir, collection, doc['normalized'])
|
||||
|
||||
if export_file and export_file.exists():
|
||||
export_mtime = os.path.getmtime(export_file)
|
||||
export_dt = datetime.fromtimestamp(export_mtime)
|
||||
export_str = export_dt.strftime("%Y-%m-%d %H:%M:%S")
|
||||
|
||||
# Compare (export should be same time or newer)
|
||||
if updated_at:
|
||||
# Convert online to local timestamp for comparison
|
||||
online_ts = online_dt.timestamp()
|
||||
if export_mtime >= online_ts - 60: # Allow 60s tolerance
|
||||
status = f"{GREEN}✓{RESET}"
|
||||
else:
|
||||
status = f"{YELLOW}⚠ older{RESET}"
|
||||
else:
|
||||
status = f"{GREEN}✓{RESET}"
|
||||
else:
|
||||
export_str = "NOT FOUND"
|
||||
status = f"{RED}✗{RESET}"
|
||||
|
||||
# Print entry
|
||||
print(f"\n {BOLD}{i}. {title}{RESET}")
|
||||
print(f" {DIM}Collection:{RESET} {collection}")
|
||||
print(f" {DIM}Online:{RESET} {online_str}")
|
||||
print(f" {DIM}Exported:{RESET} {export_str} {status}")
|
||||
|
||||
print(f"\n{BLUE}{'═' * term_width}{RESET}")
|
||||
|
||||
def main():
|
||||
if len(sys.argv) != 4:
|
||||
print("Usage: script.py <API_URL> <API_TOKEN> <EXPORT_DIR>")
|
||||
sys.exit(1)
|
||||
|
||||
api_url = sys.argv[1]
|
||||
api_token = sys.argv[2]
|
||||
export_dir = sys.argv[3]
|
||||
|
||||
# Get documents from both sources
|
||||
online_docs = get_online_docs(api_url, api_token)
|
||||
export_docs = get_export_docs(export_dir)
|
||||
|
||||
# Match and compare
|
||||
comparison = match_and_compare(online_docs, export_docs)
|
||||
|
||||
# Print results
|
||||
print_comparison(comparison)
|
||||
|
||||
# Get and print latest changes
|
||||
latest_docs = get_latest_changes(api_url, api_token, limit=3)
|
||||
print_latest_changes(latest_docs, export_dir)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
PYTHON_SCRIPT
|
||||
|
||||
# Run the side-by-side tree comparison (use /work/outline_export as container path)
|
||||
docker run --rm --network domnet \
|
||||
--user "$(id -u):$(id -g)" \
|
||||
-e HOME=/tmp \
|
||||
-v "$WORK_DIR:/work" \
|
||||
-w /work \
|
||||
python:3.11-slim \
|
||||
bash -c "pip install -qqq requests 2>/dev/null && python3 /work/.tree_compare.py '$API_URL' '$API_TOKEN' '/work/outline_export'"
|
||||
|
||||
# Cleanup
|
||||
rm -f "$WORK_DIR/.tree_compare.py"
|
||||
|
||||
echo ""
|
||||
Reference in New Issue
Block a user