<#
.SYNOPSIS
Dump the staging Postgres database to a local file for development use.
.DESCRIPTION
PowerShell counterpart of scripts/dump-staging.sh. Runs on Windows hosts
(staging itself, or a dev box with network access to staging Postgres).
.PARAMETER Url
Postgres connection URL. If omitted, reads $env:STAGING_DATABASE_URL.
SQLAlchemy driver suffixes (+asyncpg, +psycopg) are stripped automatically.
.PARAMETER Full
Include data from the heavy tables (variant_attributes, variant_price_history,
temporal_logs, user_logs, product_embeddings). Default: schema-only for these.
.PARAMETER PgBin
Path to the directory containing pg_dump.exe / psql.exe. If omitted, uses
whatever is on PATH, falling back to "C:\Program Files\PostgreSQL\<v>\bin".
.EXAMPLE
$env:STAGING_DATABASE_URL = "postgresql://user:pass@host:5432/merch"
.\scripts\dump-staging.ps1
.EXAMPLE
.\scripts\dump-staging.ps1 -Url "postgresql://user:pass@host:5432/merch" -Full
#>
[CmdletBinding()]
param(
[string]$Url = $env:STAGING_DATABASE_URL,
[switch]$Full,
[string]$PgBin
)
$ErrorActionPreference = 'Stop'
$ProgressPreference = 'SilentlyContinue'
# --- args ---------------------------------------------------------------------
if (-not $Url) {
Write-Error "Pass -Url or set `$env:STAGING_DATABASE_URL"
exit 2
}
# Strip SQLAlchemy driver suffixes; pg_dump only understands plain postgresql://
$Url = $Url -replace '\+asyncpg', '' -replace '\+psycopg2?', ''
# Tables whose DATA we skip by default (schema is still dumped).
$heavyTables = @(
'variant_attributes',
'variant_price_history',
'temporal_logs',
'user_logs',
'product_embeddings'
)
# --- locate pg_dump / psql ----------------------------------------------------
function Resolve-PgTool([string]$name) {
if ($PgBin) {
$candidate = Join-Path $PgBin "$name.exe"
if (Test-Path $candidate) { return $candidate }
}
$cmd = Get-Command "$name.exe" -ErrorAction SilentlyContinue
if ($cmd) { return $cmd.Source }
# Fallback: probe the default Postgres install location, newest version first
$defaults = Get-ChildItem 'C:\Program Files\PostgreSQL' -Directory -ErrorAction SilentlyContinue `
| Sort-Object Name -Descending
foreach ($d in $defaults) {
$candidate = Join-Path $d.FullName "bin\$name.exe"
if (Test-Path $candidate) { return $candidate }
}
throw "$name.exe not found. Install postgresql-client or pass -PgBin <dir>."
}
$pgDump = Resolve-PgTool 'pg_dump'
$psql = Resolve-PgTool 'psql'
$clientVersion = (& $pgDump --version) -replace '^[^\d]*', ''
$serverVersion = try { (& $psql $Url -tAc 'SHOW server_version_num').Trim() } catch { 'unknown' }
Write-Host "pg_dump $clientVersion -> server $serverVersion"
# --- dump ---------------------------------------------------------------------
$dumpDir = Join-Path (Get-Location) 'dumps'
if (-not (Test-Path $dumpDir)) { New-Item -ItemType Directory -Path $dumpDir | Out-Null }
$ts = (Get-Date).ToUniversalTime().ToString('yyyyMMddTHHmmssZ')
$out = Join-Path $dumpDir "staging-$ts.dump"
$args = @(
'--format=custom',
'--compress=9',
'--no-owner',
'--no-acl',
'--verbose',
"--file=$out"
)
if (-not $Full) {
foreach ($t in $heavyTables) {
$args += "--exclude-table-data=$t"
$args += "--exclude-table-data=${t}_*" # partition children
}
Write-Host ("Skipping DATA for: {0} (schema kept). Pass -Full to include." -f ($heavyTables -join ', '))
} else {
Write-Host "-Full set - including data for all tables (may be many GB)."
}
$args += $Url
Write-Host "Dumping to $out ..."
& $pgDump @args
if ($LASTEXITCODE -ne 0) { throw "pg_dump exited with $LASTEXITCODE" }
$sizeMB = [math]::Round(((Get-Item $out).Length / 1MB), 1)
Write-Host ""
Write-Host "Done. $out ($sizeMB MB)"
Write-Host ""
Write-Host "Restore into the local Compose Postgres with:"
Write-Host " Get-Content '$out' -Raw -Encoding Byte | docker compose -p merchendiser -f docker/docker-compose.yml exec -T postgres pg_restore --clean --if-exists --no-owner --no-acl -d merch"
Write-Host ""
Write-Host "Or, simpler, from a bash shell:"
Write-Host " docker compose -p merchendiser -f docker/docker-compose.yml exec -T postgres pg_restore --clean --if-exists --no-owner --no-acl -d merch < '$out'"$env:STAGING_DATABASE_URL = "postgresql://user:pass@staging-host:5432/merch"
.\scripts\dump-staging.ps1 -Full