sql >> Base de Datos >  >> RDS >> Sqlserver

Importando XML a SQL Server pero tratando de hacer múltiples entradas si existen múltiples resultados para un elemento secundario

Esto debería llevarte bastante lejos. No se ha probado en absoluto, así que lea el código, entiéndalo y realice los cambios necesarios para que funcione.

He eliminado la function e inserté todo el código en el bucle, la función era demasiado voluminosa para mi gusto. Ahora deberías poder ver más claramente lo que está pasando.

Efectivamente, es exactamente el mismo código dos veces, con un pequeño paso adicional que agrega autorreferencias para que pueda consultar cada producto a través de su ID principal y sus ID secundarios de la misma manera, como se explica en los comentarios.

$connectionString = "Data Source=Apps2\Apps2;Initial Catalog=ICECAT;Integrated Security=SSPI"
$batchSize = 50000

# set up [files_index] datatable & read schema from DB
$files_index_table = New-Object System.Data.DataTable
$files_index_adapter = New-Object System.Data.SqlClient.SqlDataAdapter("SELECT * FROM files_index WHERE 0 = 1", $connectionString)
$files_index_adapter.Fill($files_index_table) | Out-Null
$files_index_bcp = New-Object SqlBulkCopy($connectionString)
$files_index_bcp.DestinationTableName = "dbo.files_index"
$files_index_count = 0

# set up [product_ids] datatable & read schema from DB
$product_ids_table = New-Object System.Data.DataTable
$product_ids_adapter = New-Object System.Data.SqlClient.SqlDataAdapter("SELECT * FROM product_ids WHERE 0 = 1", $connectionString)
$product_ids_adapter.Fill($product_ids_table) | Out-Null
$product_ids_bcp = New-Object System.Data.SqlClient.SqlBulkCopy($connectionString)
$product_ids_bcp.DestinationTableName = "dbo.product_ids"
$product_ids_count = 0

# main import loop
$xmlReader = New-Object System.Xml.XmlTextReader("C:\Scripts\icecat\files.index.xml")
while ($xmlReader.Read()) {
    # skip any XML nodes that aren't elements
    if ($xmlReader.NodeType -ne [System.Xml.XmlNodeType]::Element) { continue }

    # handle <file> elements
    if ($xmlReader.Name -eq "file") {
        $files_index_count++

        # remember current product ID, we'll need it when we hit the next <M_Prod_ID> element
        $curr_product_id = $xmlReader.GetAttribute("Product_ID")
        $is_new_file = $true

        $newRow = $files_index_table.NewRow()
        $newRow["Product_ID"] = $xmlReader.GetAttribute("Product_ID")
        $newRow["path"] = $xmlReader.GetAttribute("path")
        $newRow["Updated"] = $xmlReader.GetAttribute("Updated")
        $newRow["Quality"] = $xmlReader.GetAttribute("Quality")
        $newRow["Supplier_id"] = $xmlReader.GetAttribute("Supplier_id")
        $newRow["Prod_ID"] = $xmlReader.GetAttribute("Prod_ID")
        $newRow["Catid"] = $xmlReader.GetAttribute("Catid")
        $newRow["On_Market"] = $xmlReader.GetAttribute("On_Market")
        $newRow["Model_Name"] = $xmlReader.GetAttribute("Model_Name")
        $newRow["Product_View"] = $xmlReader.GetAttribute("Product_View")
        $newRow["HighPic"] = $xmlReader.GetAttribute("HighPic")
        $newRow["HighPicSize"] = $xmlReader.GetAttribute("HighPicSize")
        $newRow["HighPicWifiles_index_tableh"] = $xmlReader.GetAttribute("HighPicWifiles_index_tableh")
        $newRow["HighPicHeight"] = $xmlReader.GetAttribute("HighPicHeight")
        $newRow["Date_Added"] = $xmlReader.GetAttribute("Date_Added")
        $files_index_table.Rows.Add($newRow) | Out-Null

        if ($files_index_table.Rows.Count -eq $batchSize) {
            $files_index_bcp.WriteToServer($files_index_table)
            $files_index_table.Rows.Clear()
            Write-Host "$files_index_count <file> elements processed so far"
        }
    # handle <M_Prod_ID> elements
    } elseif ($xmlReader.Name -eq "M_Prod_ID") {
        $product_ids_count++

        # add self-reference row to the [product_ids] table
        # only for the first <M_Prod_ID> per <file> we need to do this
        if ($is_new_file) {
            $newRow = $product_ids_table.NewRow()
            $newRow["Product_ID"] = $curr_product_id  # from above
            $newRow["Alternative_ID"] = $curr_product_id
            $product_ids_table.Rows.Add($newRow) | Out-Null
            $is_new_file = $false
        }

        $newRow = $product_ids_table.NewRow()
        $newRow["Product_ID"] = $curr_product_id  # from above
        $newRow["Alternative_ID"] = $xmlReader.Value
        $product_ids_table.Rows.Add($newRow) | Out-Null

        if ($product_ids_table.Rows.Count -eq $batchSize) {
            $product_ids_bcp.WriteToServer($files_index_table)
            $product_ids_table.Rows.Clear()
            Write-Host "$product_ids_count <M_Prod_ID> elements processed so far"
        }
    }
}

# write any remaining rows to the server
if ($files_index_table.Rows.Count -gt 0) {
    $files_index_bcp.WriteToServer($files_index_table)
    $files_index_table.Rows.Clear()
}
Write-Host "$files_index_count <file> elements processed overall"

if ($product_ids_table.Rows.Count -gt 0) {
    $product_ids_bcp.WriteToServer($product_ids_table)
    $product_ids_table.Rows.Clear()
}
Write-Host "$product_ids_count <M_Prod_ID> elements processed overall"