Confluence to Hudu KB Migration

Easily migrate your Confluence Knowledge Base articles to Hudu with this script, preserving formatting and images along the way.

Features:

  • Flexible Migration Options โ€“ Retrieve articles from a single Confluence space or migrate from all spaces at once.

  • Custom Sorting โ€“ Choose where to store articles in Hudu:

    • Assign all articles to a single company

    • Add them to the global Knowledge Base

    • Manually assign articles to specific companies

  • Preserves Formatting & Images โ€“ Ensures article structure and media remain intact during migration.

# Confluence_KB_Migration.ps1
# Converts Confluence articles to Hudu KB articles.
# usage: ./Confluence_KB_Migration.ps1


# ---------------------------------------
# Azure Vault Variables
# ---------------------------------------
$AzVault_Name = "ENTER YOUR KEY VAULT NAME HERE"
$AzVault_HuduSecretName = "ENTER YOUR KEY VAULT SECRET NAME HERE (HUDU API KEY)"
$AzVault_ConfluenceAPIKey="ENTER YOUR KEY VAULT SECRET NAME HERE (CONFLUENCE API KEY)"

# ---------------------------------------
# Hudu Variables
# ---------------------------------------
$HuduBaseURL = "HTTPS://YOUR.HUDU.DOMAIN"

# ---------------------------------------
# Confluence Variables
# ---------------------------------------
$ConfluenceDomain = "ENTER YOUR CONFLUENCE SUBDOMAIN HERE"
$Confluence_Username="ENTER YOUR CONFLUENCE USERNAME HERE"
$ConfluenceArticlePreviewLength = 500

# ---------------------------------------
# internal variables
# ---------------------------------------
$LogFile = ".\ConfluenceTransfer.log"
$ContentType = "page"
$TmpOutputDir="temp"
$ConfluenceBaseUrl = "https://$($ConfluenceDomain).atlassian.net/wiki"


# STEP 1: Initial SetUp, import/install modules, sign into Hudu 
Write-Host "Installing and/or Importing Modules, signing into Hudu with API key from Key Vault"
foreach ($module in @('Az', 'HuduAPI')) {if (Get-Module -ListAvailable -Name $module) 
    { Write-Host "Importing module, $module..."; Import-Module $module } else {Write-Host "Installing and importing module $module..."; Install-Module $module -Force -AllowClobber; Import-Module $module }
}
if (-not (Get-AzContext)) { Write-Host "AZContext not yet set. Connecting AZ Account... $(Connect-AzAccount)" } else {Write-Host "AZContext already set. Skipping Sign-on."};
Write-Host "Authenticating to Hudu instance @$HuduBaseURL..."
New-HuduAPIKey "$(Get-AzKeyVaultSecret -VaultName "$AzVault_Name" -Name "$AzVault_HuduSecretName" -AsPlainText)"
New-HuduBaseUrl $HuduBaseURL
Write-Host "Obtaining Confluence Credentials from AZ Vault..."
$ConfluenceToken = Get-AzKeyVaultSecret -VaultName "$AzVault_Name" -Name "$AzVault_ConfluenceAPIKey" -AsPlainText
$encodedCreds = [System.Convert]::ToBase64String(
    [System.Text.Encoding]::ASCII.GetBytes("$($Confluence_Username):$($ConfluenceToken)")
)

# Define some useful Functions 
function PrintAndLog {
    param (
        [string]$message
    )
    $logline = "[$(Get-Date)] $($message)"
    Write-Host "$logline"
    Add-Content -Path $LogFile -Value "$logline"
}
function Get-PercentDone {
    param (
        [int]$Current,
        [int]$Total
    )
    if ($Total -eq 0) {
        return 100}
    $percentDone = ($Current / $Total) * 100
    if ($percentDone -gt 100){
        return 100
    }
    $rounded = [Math]::Round($percentDone, 2)
    return $rounded
}
function Select-Object-From-List($objects,$message,$allowNull = $false) {
    $validated=$false
    while ($validated -eq $false){
        if ($allowNull -eq $true) {
            Write-Host "0: None/Custom"
        }
        for ($i = 0; $i -lt $objects.Count; $i++) {
            $object = $objects[$i]
            if ($null -ne $object.OptionMessage) {
                Write-Host "$($i+1): $($object.OptionMessage)"
            } elseif ($null -ne $object.name) {
                Write-Host "$($i+1): $($object.name)"
            } else {
                Write-Host "$($i+1): $($object)"
            }
        }
        $choice = Read-Host $message
        if ($null -eq $choice -or $choice -lt 0 -or $choice -gt $objects.Count +1) {
            PrintAndLog -message "Invalid selection. Please enter a number from above"
        }
        if ($choice -eq 0 -and $true -eq $allowNull) {
            return $null
        }
        if ($null -ne $objects[$choice - 1]){
            return $objects[$choice - 1]
        }
    }
}
function Get-YesNoResponse($message) {
    do {
        $response = Read-Host "$message (y/n)"
        $response = if($null -ne $response) {$response.ToLower()} else {""}
        if ($response -eq 'y' -or $response -eq 'yes') {
            return $true
        } elseif ($response -eq 'n' -or $response -eq 'no') {
            return $false
        } else {
            PrintAndLog -message "Invalid input. Please enter 'y' for Yes or 'n' for No."
        }
    }
    while ($true)
}
function Convert-ImageToBase64 {
    param (
        [string]$imageUrl,
        [string]$authHeader
    )
    try {
        # Get image and check content type
        $response = Invoke-WebRequest -Uri $imageUrl -Headers @{ Authorization = $authHeader } -Method Get -ErrorAction Stop
        $contentType = $response.Headers["Content-Type"]

        $base64String = [Convert]::ToBase64String($response.Content)
        return "data:$contentType;base64,$base64String"
    } catch {
        Write-Host "Failed to fetch image: $imageUrl. Error: $_"
        return $imageUrl  # Keep original URL if download fails
    }
}
function ExtractConfluenceImages {
    param (
        [string]$htmlContent,
        [string]$baseUrl,
        [string]$pageId,
        [string]$authHeader
    )
    $htmlContent = [regex]::Replace($htmlContent, '<ac:image[^>]*>\s*<ri:attachment\s+[^>]*ri:filename="([^"]+)"[^>]*>\s*</ac:image>', {
        param($match)
        $filename = $match.Groups[1].Value
        $attachmentUrl = "${baseUrl}/download/attachments/${pageId}/${filename}"

        $base64Src = Convert-ImageToBase64 -imageUrl $attachmentUrl -authHeader $authHeader
        return "<img src=`"$base64Src`" alt=`"$filename`" />"
    })
    return $htmlContent
}
function GetAllSpaces {
    param (
        [string]$baseUrl,
        [string]$authHeader
    )
    $spacesUrl = "${baseUrl}/rest/api/space?limit=100"
    $all_spaces = @()
    try {
        while ($spacesUrl) {
            # Retrieve spaces
            $response = Invoke-RestMethod -Uri $spacesUrl -Headers @{ Authorization = $authHeader } -Method Get
            # Collect space details
            $response.results | ForEach-Object {
                $all_spaces += [PSCustomObject]@{
                    Name          = $_.name
                    Status        = $_.status
                    OptionMessage = $_.key
                    Key           = $_.key
                }
            }
            # Check if there is a next page
            if ($response._links.next) {
                $spacesUrl = "${baseUrl}${response._links.next}"
            } else {
                $spacesUrl = $null
            }
        }
    } catch {
        Write-Host "Failed to retrieve spaces. Error: $_"
    }
    return $all_spaces
}
function GetAllPages {
    param ([string]$baseUrl, [string]$SpaceKey, [string]$authHeader)
    $AllPages = @()
    $start = 0
    $limit = 25
    PrintAndLog -message "Retrieving Confluence content from space '$SpaceKey'..."
    while ($true) {
        $Url = "$baseUrl/rest/api/content" +
                "?spaceKey=$SpaceKey" +
                "&type=$ContentType" +
                "&expand=body.storage" +
                "&limit=$limit" +
                "&start=$start"
        PrintAndLog -message  "Querying: $Url (items $start - $($start + $limit))"
        $response = Invoke-RestMethod -Uri $Url -Method GET -Headers @{
            "Authorization" = "Basic $encodedCreds"
            "Accept"        = "application/json"
        } 
        if ($response.results -and $response.results.Count -gt 0) {
            $AllPages += $response.results
            $start += $limit
            if ($start -ge $response.size) {
                break
            }
        }
        else {
            break
        }
    }
    PrintAndLog -message "Downloaded $($AllPages.Count) Confluence items from spacee: $($SpaceKey)."
    return $allpages
}


# STEP 2: Present Confluence / Source Options and get all articles within a single space or all spaces
PrintAndLog -message  "Getting All Spaces and configuring Source options (Confluence-Side)"
$AllSpaces=GetAllSpaces -baseUrl $ConfluenceBaseUrl -authHeader "Basic $encodedCreds"
if ($AllSpaces.Count -eq 0) {
    PrintAndLog -message  "Sorry, we didnt seem to see any Confluence Spaces! Double-check your credentials and try again."
}
$SourcePages = @()
$SourceMigrationChoice=$(Select-Object-From-List -Objects @(
[PSCustomObject]@{
    OptionMessage= "From a Single/Specific Confluence Space"
    Identifier = 0
}, 
[PSCustomObject]@{
    OptionMessage= "From All ($($AllSpaces.count)) Confluence Space(s)"
    Identifier = 1
}
) -message "Configure Source (Confluence-Side) Options from Confluence- Migrate pages from which Space(s)?" -allowNull $false)
if ([int]$SourceMigrationChoice.Identifier -eq 0) {
    $SingleChosenSpace=$(Select-Object-From-List -Objects $AllSpaces - Message "From which single space would you like to migrate pages from?")  
    $SingleChosenSpace.OptionMessage="$($SingleChosenSpace.OptionMessage) (space: $($SingleChosenSpace.name)/$($SingleChosenSpace.key))"
    $SourcePages=$(GetAllPages -SpaceKey $SingleChosenSpace.key -authHeader "Basic $encodedCreds" -baseUrl $ConfluenceBaseUrl)
} else {
    foreach ($space in $AllSpaces) {
        PrintAndLog -message "Obtaining Pages from space: $($space.name)/$($space.key)"
        $SourcePages+=$(GetAllPages -SpaceKey $space.key -authHeader "Basic $encodedCreds" -baseUrl $ConfluenceBaseUrl)
    }
}
$SourceMigrationChoice.OptionMessage="Migrate $($sourcepages.count) Articles/Pages $($SourceMigrationChoice.OptionMessage)"
PrintAndLog -message "Elected to $SourceMigrationChoice"
if ($SourcePages.Count -eq 0) {
    PrintAndLog -message  "Sorry, we didnt seem to see any Source Articles/Pages in Confluence! Double-check your credentials and try again."
}

# STEP 3: Present Options for Hudu / Destination
PrintAndLog -message  "Getting All Companies and configuring destination options (Hudu-Side)"
$all_companies = Get-HuduCompanies
if ($all_companies.Count -eq 0) {
    PrintAndLog -message  "Sorry, we didnt seem to see any Companies set up in Hudu... If you intend to attribute certain articles to certain companies, be sure to add your companies first!"
}

$Attribution_Options=@()
$DestMigrationChoice=$(Select-Object-From-List -Objects @(
    [PSCustomObject]@{
        OptionMessage= "To a Single Specific Company in Hudu"
        Identifier = 0
    },
    [PSCustomObject]@{
        OptionMessage= "To Global Knowledge Base in Hudu (generalized / non-company-specific)"
        Identifier = 1
    }, 
    [PSCustomObject]@{
        OptionMessage= "To Multiple Companies in Hudu - Let Me Choose for Each article ($($all_companies.count) available destination company choices)"
        Identifier = 2
    }
) -message "Configure Destination (Hudu-Side) Options- $($SourceMigrationChoice.OptionMessage) to where in Hudu?" -allowNull $false)
if ([int]$DestMigrationChoice.Identifier -eq 0) {
    $SingleCompanyChoice=$(Select-Object-From-List -Objects $all_companies -message "Which company to $($sourcepages.OptionMessage) ($($sourcepages.count)) articles to?")
    $Attribution_Options=[PSCustomObject]@{
        CompanyId            = $SingleCompanyChoice.Id
        CompanyName          = $SingleCompanyChoice.Name
        OptionMessage        = "Company Name: $($SingleCompanyChoice.Name), Company ID: $($SingleCompanyChoice.Id)"
        IsGlobalKB           = $false
    }
    $DestMigrationChoice.OptionMessage="$($DestMigrationChoice.OptionMessage) (Company Name: $($SingleCompanyChoice.Name), Company ID: $($SingleCompanyChoice.Id))"
} elseif ([int]$DestMigrationChoice.Identifier -eq 1) {
    $Attribution_Options+=[PSCustomObject]@{
        CompanyId            = 0
        CompanyName          = "Global KB"
        OptionMessage        = "No Company Attribution (Upload As Global KnowledgeBase Article)"
        IsGlobalKB           = $true
    }    
} else {
        $Attribution_Options = @()
    foreach ($company in $all_companies) {
        $Attribution_Options+=[PSCustomObject]@{
            CompanyId            = $company.Id
            CompanyName          = $company.Name
            OptionMessage        = "Company Name: $($company.Name), Company ID: $($company.Id)"
            IsGlobalKB           = $false
        }
    }
    $Attribution_Options+=[PSCustomObject]@{
        CompanyId            = 0
        CompanyName          = "Global KB"
        OptionMessage        = "No Company Attribution (Upload As Global KnowledgeBase Article)"
        IsGlobalKB           = $true
    }
    $Attribution_Options+=[PSCustomObject]@{
        CompanyId            = -1
        CompanyName          = "None (SKIP FOR NOW)"
        OptionMessage        = "Skipped"
        IsGlobalKB           = $false
    }
}

PrintAndLog -message "You've elected for this migration path: $($SourceMigrationChoice.OptionMessage) $($DestMigrationChoice.OptionMessage)."
Read-Host "Press enter now or CTL+C / Close window to exit now!"

$PageIDX=0
foreach ($page in $SourcePages) {
    $pageId    = $page.id
    $title     = $page.title
    $html      = ExtractConfluenceImages -htmlContent $page.body.storage.value -baseUrl $ConfluenceBaseUrl -pageId $pageId -authHeader "Basic $encodedCreds"

    # Find and replace image URLs with base64-encoded versions 
    $PageIDX=$PageIDX+1
    $descriptor = "$PageIDX of $($SourcePages.count), ID: $pageId, titled $title"
    $contentSnippet = if ($html.Length -gt $ConfluenceArticlePreviewLength) {
        $html.Substring(0, $ConfluenceArticlePreviewLength) + "..."
    } else {
        $html
    }
    $articlePreview=@"
Mapping Confluence Page $descriptor ---
Title: $title
Snippet: $contentSnippet
"@
    $completionPercentage = Get-PercentDone -Current $PageIDX -Total $SourcePages.count
    Write-Progress -Activity "Processing $descriptor" -Status "$completionPercentage%" -PercentComplete $completionPercentage

    if ([int]$DestMigrationChoice.Identifier -eq 0) {
        PrintAndLog "Migrating Article: $articlePreview to one company (name: $($SingleCompanyChoice.name) / id: $($SingleCompanyChoice.id))"
        New-HuduArticle -Name "$title" -Content "$html" -CompanyId $SingleCompanyChoice.id
        continue
    } elseif ([int]$DestMigrationChoice.Identifier -eq 1) {
        PrintAndLog "Migrating Article: $articlePreview to Global Knowledgebase in Hudu"
        New-HuduArticle -Name "$title" -Content "$html"
        continue
    }

    $Company_attribution=$(Select-Object-From-List -message "Migrating Article: $articlePreview... Which company to migrate into?" -objects $Attribution_Options)

    PrintAndLog -message  "Uploading article $title for Company: $($Company_attribution.CompanyName)"
    New-HuduArticle -Name "$title" -Content "$html" -CompanyId $Company_attribution.CompanyID
}


6