0% found this document useful (0 votes)
97 views

XML Parsing

This document contains code to load an XML catalog file, parse through each CD entry, validate the data structure, extract the field values, and insert each record into an SQLite database table. The code iterates through each CATALOG node and CD sub-node, checks for expected field names and counts, reads the field values into an array, optionally skips blank lines, and constructs an SQL INSERT statement to add the data. Progress updates are displayed during the lengthy import process.

Uploaded by

ud90117
Copyright
© Attribution Non-Commercial (BY-NC)
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
97 views

XML Parsing

This document contains code to load an XML catalog file, parse through each CD entry, validate the data structure, extract the field values, and insert each record into an SQLite database table. The code iterates through each CATALOG node and CD sub-node, checks for expected field names and counts, reads the field values into an array, optionally skips blank lines, and constructs an SQL INSERT statement to add the data. Progress updates are displayed during the lengthy import process.

Uploaded by

ud90117
Copyright
© Attribution Non-Commercial (BY-NC)
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 3

;#include <Array.au3> #include <_XMLDomWrapper.au3> #region Load XML $xml_file="cd_catalog.

xls" $xml_string=FileRead($xml_file) _XMLLoadXML($xml_string) If @error Then MsgBox(16,"Erro",_XMLError("")) Exit EndIf #endregion Load XML $path = "/MAIN_PARENT/CATALOG" $kids = _XMLGetChildNodes($path&"/*") ;_ArrayDisplay($kids) $rows_total=_XMLGetNodeCount($path&"/CD") ConsoleWrite("Total rows: "[email="&$rows_total&@CRLF"]&$rows_total&@CRLF[/email ]) If $rows_total<=0 Then MsgBox(16,"Error","The selected XML file doesn't contain any useful data.") __ExitError() EndIf ;Exit __ReadXMLtoSQLite()

Func __ExitError() ;_SQLite_Close() ;_SQLite_Shutdown() Exit EndFunc Func __ReadXMLtoSQLite() $rows_progress=0 ; ir contabilizar a evoluo do processamento do ficheiro ;_SQLite_Exec(-1,"BEGIN IMMEDIATE;") ; inicia o transact SQL - vai escrevendo os dados em memoria $progress=0 $progress_lastupdate=0 $timer=TimerInit() ; inicia o cronmetro For $h = 1 To _XMLGetNodeCount($path) ; for each CATALOG ConsoleWrite("-> CATALOG "[email="&$h&@CRLF"]&$h&@CRLF[/email]) For $i = 1 To _XMLGetNodeCount($path&"[" & $h & "]/CD") ; for each CD in each C ATALOG $rows_progress += 1 $progress=Floor(($rows_progress/$rows_total)*100) If @error Then $progress=0 #region show progress If $progress>$progress_lastupdate Then

ConsoleWrite("Progress: "&$progress&" %"[email="&@CRLF"]&@CRLF[/email]) $progress_lastupdate=$progress EndIf #endregion show progress #region Count Checking $aValues = "" $sValue = "" $aValues = _XMLGetValue($path & "[" & $h & "]/CD[" & $i & "]/" & $kids[1]) If IsArray($aValues) Then $sValue=$aValues[1] If $sValue<>1 Then ; this doesn't apply to this XML file, but it's something I have to check with my XML reports. EndIf #endregion Count Checking $aValues = "" $sValues = "" ;#cs #region field checking ; the following verification is essential, ; of having a node which doesn't contain a the same order as expected ; This requires around 50% more processing 'll take 30) $aValues = _XMLGetChildNodes($path & "[" & ;_ArrayDisplay($aValues) because there's the possibility certain field, or the field isn't in time (e.g. instead of 20 seconds, it $h & "]/*[" & $i & "]")

If IsArray($aValues) Then If $aValues[0]<>6 Then MsgBox(16,"Erro","O registo n " & $rows_progress & "contm " & $aValues[0] & " campos, quando devia conter 6."[email="&@CRLF"]&@CRLF[/email]& _ "O ficheiro XML no est correcto. O programa vai sair.") __ExitError() ElseIf $aValues[1]<>"TITLE" Or $aValues[2]<>"ARTIST" Or $aValues[3]<>"COUNTRY" Or $aValues[4]<>"COMPANY" Or _ $aValues[5]<>"PRICE" Or $aValues[6]<>"YEAR" Then MsgBox(16,"Error","The headers don't match. We're in trouble. I'm gonna run now.") __ExitError() EndIf EndIf #endregion field checking ;#ce #region get values $aValues = _XMLGetValue($path & "[" & $h & "]/*[" & $i & "]/*") ;_ArrayDisplay($aValues) If Not IsArray($aValues) Then MsgBox(16,"Erro","O registo n " & $rows_progress & "no foi lido correctamente. O programa vai sair.") __ExitError() EndIf If $aValues[3]=="" Then ; if this is blank (it should always be 1 or 2), then t he whole row would be blank. avoid writing blank lines

ConsoleWrite("-> Line " & $rows_progress & " is blank." & @CRLF) $write=False ; linha invlida / sem contedo ContinueLoop Else $sValues &= $aValues[3] & "," EndIf $sValues &= '"'&StringReplace($aValues[4],'"',"'")&'"'&"," If $aValues[5]=="" Then $sValues &="NULL," Else $sValues &= $aValues[5] & "," EndIf $sValues &= $aValues[6] & "," #endregion get values #region insert in SQL ;_SQLite_Exec(-1, "INSERT INTO tblPicagens VALUES (" & $sValues & ");") ; remov e as virgulas e substitui ';' por ',' ;If $iRval <> $SQLITE_OK Then ConsoleWrite("SQLite Error: "&_SQLite_ErrMsg ()&@ CRLF) #endregion insert in SQL Next Next ConsoleWrite("-> It took "&TimerDiff($timer)&" milisseconds."[email="&@CRLF"]&@C RLF[/email]) Return EndFunc

You might also like