Home | Contact Us | FAQ | Search & Site Map | Link to Us
Sign In | Join | Other 45 Sites in Network
HomeAnnouncementsFree MagazinesWhite PapersSubmit Content
Discussion GroupsASP.NETWindows FormsLanguages.NET FrameworkVisual Studio.NET
Articles.NET FrameworkASP.NETToolsWindows Forms
.NET DirectoryOpen Source ProjectsUser GroupsWeb Resources
Related Topics
Visual Basic 6SQL ServerMS AccessOther DB ProductsMS Server ProductsMore Topics ...

.NET Forum / Languages / VB.NET / March 2008

Tip: Looking for answers? Try searching our database.

speeding up a file conversion from text file to XML format

Thread view: 
Enable EMail Alerts  Start New Thread
Thread rating: 
Peter Newman - 02 Mar 2008 20:26 GMT
HI,  Im usning vb.net ( 2005 ) & SQL Server 2005. I have been asked to try
and improve the importing of files into SQL for old legacy system.  The file
are submitted to the company by FTP, decrypted. ( sample file below )

I have written a module to convert this file into an XML file which imports
to the SQL very quickly. The problem I have come up with is generating the
XML File, when files are received in with inexcess of 4000 data records  (
data recorded marked for with *** for identification only in the sample ).
The client is online waiting for a report, which is not generated till the
file has been imported to the SQL. In testing with a file of 4000 records it
is taking 2 minutes + to generate the xml file and 4 seconds to import it. I
need to find a way to reduce the time it takes to generate the XML style file
or come up with an alternative. The option of upgrading the clients software
to submit XML files is not an option, so the problem remains in-house. The
XML file is generated on the FTP server itself so I cant even blame a slow
network. Any suggestions are most welcome

( below  Sample file & source Code – modified to protect snesitive data )

Sample File

"111111"
"TEST"
"1"        
"100"
"839"
"04/03/2008"   
"001"
"HEADFOODICE@EMAIL.COM"
"CompanyContact@company.co.uk"
"111111 SERVICE  Serial:  839  THIS IS A TEST"
"22222233333333 COMPANY NAME      BANK REF          000000000010099" ***
"TRAILER RECORD"
"QQQQ"
"QQQQQQQQQ"
"QQQQQQQQQ"
"QQQQQQQQQ"
"QQQQQQQQQ"
"QQQQQQQQQ"
"QQQQQQQQQ"
"QQQQQQQQQ"
"QQQQQQQQQ"
"QQQQQQQQQ"
"QQQQQQQQQ"
"QQQQQQQQQ"
"QQQQQQQQQ"

BUILD XML FILE CODE

Imports System.Xml
Imports System.IO

Module XMLBuilder
   Private XML_DOC As New XmlDocument
   Public CURRENTFILE As String = ""
   Dim ROOT As XmlElement
   Public sr As StreamReader
   Private iCount As Integer = 0
   Private InputLine As String = ""
   Public LICENCENUM As String = ""
   Public PIN As String = ""
   Public SERIAL As String = ""
   Public ORIGACC As String = ""
   Private PASSTRAILER As Boolean = False
   Public LEDGERKEY As String = ""
   Public lupBacsID As String = ""
   Public lupOrigSC As String = ""
   Public lupOrigACNO As String = ""
   Public lupOrigACNA As String = ""
   Public lupCountry As String = ""

   Public Sub OpenFile(ByVal FileName As String)
       Dim sr As New StreamReader(FileName)
   End Sub

   Public Function BuildXMLFile(ByVal FullFileName As String) As Boolean
       'Create the Doc
       Dim Root As XmlElement
       ' open the file and get info
       CURRENTFILE = FullFileName
       Dim sr As New StreamReader(CURRENTFILE)
       For iCount = 1 To 10
           Select Case iCount
               Case 1
                   LICENCENUM = StripQuotes(sr.ReadLine)
               Case 2
                   PIN = StripQuotes(sr.ReadLine)
               Case 3
                   TRANSCOUNT = StripQuotes(sr.ReadLine)
               Case 4
                   FILEVALUE = StripQuotes(sr.ReadLine)
               Case 5
                   SERIAL = StripQuotes(sr.ReadLine)
               Case 6
                   MOVEDATE = StripQuotes(sr.ReadLine)
               Case 7
                   ORIGACC = StripQuotes(sr.ReadLine)
           End Select
       Next
       sr.Close()

       'Get Originating Account Details

       Dim GetOrigCommand As New SqlClient.SqlCommand
       GetOrigCommand.CommandText = "Select  BACSID, SortCode,AccNumber,
AccName From  MYSQL.TABLE where (Licence = '" & LICENCENUM & "' AND AccountID
= '" & ORIGACC & "')"
       GetOrigCommand.CommandTimeout = 60
       GetOrigCommand.CommandType = CommandType.Text
       GetOrigCommand.Connection = FTP_Connection
       Dim DataReader As SqlClient.SqlDataReader
       Try
           DataReader = GetOrigCommand.ExecuteReader
           If DataReader.HasRows Then
               DataReader.Read()
               lupBacsID = DataReader(0).ToString
               lupOrigSC = DataReader(1).ToString
               lupOrigACNO = DataReader(2).ToString
               ORICACCNUMBER = lupOrigACNO
               lupOrigACNA = DataReader(3).ToString
           End If

       Catch ex As Exception
           writetolog("Error Looking up OrigAcc")
       Finally
           DataReader.Close()
           GetOrigCommand.Dispose()
       End Try

       XML_DOC.LoadXml("<SQLImporter name='" & LICENCENUM & "." & SERIAL &
"' type='Transmission File' version='2.1238'></SQLImporter>")
       Root = XML_DOC.DocumentElement
       ' Set LedgetKey
       LEDGERKEY = “772857mkkkSoe]7781<hhws” ' Sample LedgetKey  
       ' Create the Header Element
       CreateHeaderElement(XML_DOC, Root)
       ' Create Transactions Element
       CreateTransactionsElement(XML_DOC, Root)
       ' Save the file
       XML_DOC.Save(DayFileArch & "\XMLFiles\" & MovedFileName)

       ' check file has been written and size is > 0
       If My.Computer.FileSystem.FileExists(DayFileArch &
"\BankingFiles\XMLFiles\" & MovedFileName) Then
           BuildXMLFile = True
       Else
           BuildXMLFile = False
       End If
   End Function
   ' Add the Header Elements
   Friend Sub CreateHeaderElement(ByRef m_doc As Xml.XmlDocument, ByVal
parent As Xml.XmlNode)
       Dim Newlayer As XmlElement = m_doc.CreateElement("HeaderFile") '
Creating an element
       ' Add the Header Elements
       AddHeaderElement(XML_DOC, Newlayer)
       'add the layer to the parent
       parent.AppendChild(Newlayer)
   End Sub
   'Add the Transaction Elemennts
   Friend Sub CreateTransactionsElement(ByRef m_doc As Xml.XmlDocument,
ByVal parent As Xml.XmlNode)
       Dim Newlayer As XmlElement = m_doc.CreateElement("Transactions") '
Creating an element
       'Add a grand child element
       ' Write Tranmsactions to file
       Dim sr As New StreamReader(CURRENTFILE)
       ' Move to line 11
       For iCount = 1 To 10
           sr.ReadLine()
       Next
       ' create transactions
       Dim cont As Integer = 0
       Do
           InputLine = StripQuotes(sr.ReadLine)
           If InStr(InputLine, "TRAILER RECORD") Then Exit Do
           AddTransactionElement(XML_DOC, Newlayer, InputLine)
           'add the layer to the parent
           parent.AppendChild(Newlayer)
       Loop Until InStr(InputLine, "TRAILER RECORD") > 0
       sr.Close()
   End Sub

   Friend Sub AddTransactionElement(ByRef m_doc As Xml.XmlDocument, ByVal
parent As Xml.XmlNode, ByVal TransactionRecord As String)
       ' Dim the Element Name
       Dim Transaction As XmlElement = m_doc.CreateElement("Transaction") '
Creating an element
       ' Set the Attributes
       Dim LedgerRef As XmlAttribute = m_doc.CreateAttribute("LedgerRef")
       Dim Licence As XmlAttribute = m_doc.CreateAttribute("Licence")
       Dim SortCode As XmlAttribute = m_doc.CreateAttribute("SortCode")
       Dim AccountNumber As XmlAttribute =
m_doc.CreateAttribute("AccountNumber")
       Dim AccountName As XmlAttribute = m_doc.CreateAttribute("AccountName")
       Dim BankRef As XmlAttribute = m_doc.CreateAttribute("BankRef")
       Dim TransValue As XmlAttribute = m_doc.CreateAttribute("Value")
       Dim TransType As XmlAttribute = m_doc.CreateAttribute("Type")
       ' Set the values
       LedgerRef.InnerText = LEDGERKEY
       SortCode.InnerText = Mid(TransactionRecord, 1, 6)
       Licence.InnerText = LICENCENUM
       AccountNumber.InnerText = Mid(TransactionRecord, 7, 8)
       AccountName.InnerText = Trim(CStr(Mid(TransactionRecord, 15, 18)))
       BankRef.InnerText = Trim(CStr(Mid(TransactionRecord, 13, 18)))
       TransValue.InnerText = Mid(TransactionRecord, 51, 11)
       TransType.InnerText = Mid(TransactionRecord, 62, 2)
       'add the elements to the layer node
       Transaction.Attributes.Append(LedgerRef)
       Transaction.Attributes.Append(Licence)
       Transaction.Attributes.Append(SortCode)
       Transaction.Attributes.Append(AccountNumber)
       Transaction.Attributes.Append(AccountName)
       Transaction.Attributes.Append(BankRef)
       Transaction.Attributes.Append(TransValue)
       Transaction.Attributes.Append(TransType)
       'add the layer to the parent
       parent.AppendChild(Transaction)
   End Sub
   Friend Sub AddHeaderElement(ByRef m_doc As Xml.XmlDocument, ByVal parent
As Xml.XmlNode)

       ' Dim the Element Name
       Dim Header As XmlElement = m_doc.CreateElement("Header") ' Creating
an element
       ' Set the Attributes
       Dim LicenceNo As XmlAttribute = m_doc.CreateAttribute("Licence")
       Dim LegerRef As XmlAttribute = m_doc.CreateAttribute("LedgerKey")
       Dim BacsID As XmlAttribute = m_doc.CreateAttribute("BACSid")
       Dim OriginatingAcc As XmlAttribute = m_doc.CreateAttribute("OrigAcc")
       Dim ORSortCode As XmlAttribute = m_doc.CreateAttribute("OrigSortCode")
       Dim ORAccNumber As XmlAttribute = m_doc.CreateAttribute("OrigAccount")
       Dim ORAccName As XmlAttribute =
m_doc.CreateAttribute("OriginatingAccName")
       Dim SerialNo As XmlAttribute = m_doc.CreateAttribute("SerialNo")
       Dim PinNum As XmlAttribute = m_doc.CreateAttribute("PinNumber")
       Dim SubType As XmlAttribute = m_doc.CreateAttribute("SubmissionType")
       Dim FileState As XmlAttribute = m_doc.CreateAttribute("FileStatus")
       Dim TransCouunt As XmlAttribute = m_doc.CreateAttribute("TransCount")
       Dim TransValue As XmlAttribute = m_doc.CreateAttribute("TransValue")
       Dim RecievedDate As XmlAttribute =
m_doc.CreateAttribute("RecievedDate")
       Dim PayDate As XmlAttribute = m_doc.CreateAttribute("MoveDate")
       Dim Transmitted As XmlAttribute =
m_doc.CreateAttribute("BACSTransmitted")
       Dim Volume As XmlAttribute = m_doc.CreateAttribute("BureauVolume")
       Dim BuildOper As XmlAttribute = m_doc.CreateAttribute("BuildOperator")
       Dim TXOper As XmlAttribute = m_doc.CreateAttribute("TxOperator")
       Dim InvNo As XmlAttribute = m_doc.CreateAttribute("InvoiceNumber")
       Dim Index As XmlAttribute = m_doc.CreateAttribute("SubIndex")
       Dim BReport As XmlAttribute = m_doc.CreateAttribute("InputReport")

       ' Set the values
       LegerRef.InnerText = LEDGERKEY
       LicenceNo.InnerText = LICENCENUM
       BacsID.InnerText = lupBacsID
       OriginatingAcc.InnerText = ORIGACC
       ORSortCode.InnerText = lupOrigSC
       ORAccNumber.InnerText = lupOrigACNO
       ORAccName.InnerText = lupOrigACNA
       SerialNo.InnerText = SERIAL
       PinNum.InnerText = PIN
       Select Case PIN
           Case "TEST"
               SubType.InnerText = PIN
           Case Else
               SubType.InnerText = "Live"
       End Select
       FileState.InnerText = "Unsubmitted"
       TransCouunt.InnerText = TRANSCOUNT
       TransValue.InnerText = FILEVALUE
       RecievedDate.InnerText = Now.ToShortDateString
       PayDate.InnerText = MOVEDATE
       Transmitted.InnerText = ""
       Volume.InnerText = "000000"
       BuildOper.InnerText = ""
       TXOper.InnerText = ""
       InvNo.InnerText = ""
       Index.InnerText = ""
       BReport.InnerText = ""
       'add the elements to the layer node
       Header.Attributes.Append(LegerRef)
       Header.Attributes.Append(LicenceNo)
       Header.Attributes.Append(BacsID)
       Header.Attributes.Append(OriginatingAcc)
       Header.Attributes.Append(ORSortCode)
       Header.Attributes.Append(ORAccNumber)
       Header.Attributes.Append(ORAccName)
       Header.Attributes.Append(SerialNo)
       Header.Attributes.Append(PinNum)
       Header.Attributes.Append(SubType)
       Header.Attributes.Append(FileState)
       Header.Attributes.Append(TransCouunt)
       Header.Attributes.Append(TransValue)
       Header.Attributes.Append(RecievedDate)
       Header.Attributes.Append(PayDate)
       Header.Attributes.Append(Transmitted)
       Header.Attributes.Append(Volume)
       Header.Attributes.Append(BuildOper)
       Header.Attributes.Append(TXOper)
       Header.Attributes.Append(InvNo)
       Header.Attributes.Append(Index)
       Header.Attributes.Append(BReport)
       'add the layer to the parent
       parent.AppendChild(Header)
   End Sub
Cor Ligthert[MVP] - 03 Mar 2008 05:34 GMT
Peter,

This creates an XML file from a database server

\\\
dim ds as new dataset
cim  conn as new connection(connectionstring)
dim da as new sqldataadapter(conn)
da.fill(ds)
ds.write(ThePathOnDisk)
///
Cor
Peter Newman - 03 Mar 2008 07:35 GMT
Hi Cor,

Thank you for your response. The XML file i am producing comes from a
standered text file not from a database, unless i am reeading your answer
wrong. The issue i have is not with the importing of the XML into the SQL
tables, but the creating of the XML file itself from a text file

> Peter,
>
[quoted text clipped - 8 lines]
> ///
> Cor
Steve Gerrard - 03 Mar 2008 08:01 GMT
> HI,  Im usning vb.net ( 2005 ) & SQL Server 2005. I have been asked
> to try and improve the importing of files into SQL for old legacy
[quoted text clipped - 14 lines]
> remains in-house. The XML file is generated on the FTP server itself
> so I cant even blame a slow network. Any suggestions are most welcome

That's a pretty big sample. :)

A couple of thoughts occur to me:

It seems like most of the activity is adding the Transaction XmlElement 4000
times. Couldn't you define the Transaction element, and its attributes, just one
time, and then use cloneNode (or something similar) to copy it, changing the
attributes as needed each time?

It might help to read the incoming text file into an array, and process it from
there, instead of pausing to read the next line as you add each transaction. Or
it might make no difference, but it might be worth checking on it.

The final thought is that given that you have a tightly defined xml structure to
produce, you might consider just creating it yourself using strings, instead of
using the DOM. There is bound to be a lot of extra checking of things going on
in there. Building the strings yourself is not so difficult; it is just a lot of
concatenating of "<elemname>", "attribname=" + attribvalue + ",", "</elemname>",
etc. Might be worth it.
Peter Newman - 03 Mar 2008 08:29 GMT
Thanks Steve,
I will give that a go when i get in the office and keep you posted.

> > HI,  Im usning vb.net ( 2005 ) & SQL Server 2005. I have been asked
> > to try and improve the importing of files into SQL for old legacy
[quoted text clipped - 34 lines]
> concatenating of "<elemname>", "attribname=" + attribvalue + ",", "</elemname>",
> etc. Might be worth it.
Chris Dunaway - 03 Mar 2008 15:55 GMT
On Mar 2, 2:26 pm, Peter Newman
<PeterNew...@discussions.microsoft.com> wrote:

See some thoughts inline:

> to the SQL very quickly. The problem I have come up with is generating the
> XML File, when files are received in with inexcess of 4000 data records  (

> Sample File
>
[quoted text clipped - 23 lines]
> "QQQQQQQQQ"
> "QQQQQQQQQ"

What do the lines with QQQQQQQ stand for?  Are they read?

> BUILD XML FILE CODE
>
[quoted text clipped - 48 lines]
>             End Select
>         Next

What is the purpose of this for loop and select case?  They don't seem
to serve a purpose.  Just remove them:

>         Dim sr As New StreamReader(CURRENTFILE)
>         LICENCENUM = StripQuotes(sr.ReadLine)
[quoted text clipped - 5 lines]
>         ORIGACC = StripQuotes(sr.ReadLine)
>         sr.Close()

>     Friend Sub CreateTransactionsElement(ByRef m_doc As Xml.XmlDocument,
> ByVal parent As Xml.XmlNode)
[quoted text clipped - 7 lines]
>             sr.ReadLine()
>         Next

Here you are re-opening the file and reading the first 10 lines
again.  In effect you are reading the first 10 lines of each file
twice.  Instead, up in the code earlier where you read the first 10
lines and store their values, go ahead and read all the transactions
into an List(Of String) or something so you don't waste the time
duplicating the file I/O.

And finally, you might approach the problem a little differently.
Create two classes.  One for the SqlImporter object and another for
the Transaction object.  The SqlImporter will have a collection
property to hold the transactions.  Then load the file and create the
objects and then use the Xml serializer to create the XML.  You just
work with the object and let the serializer create the XML for you.
You can decorate your classes properties with the appropriate
attributes to control how the xml file will be created.   It will make
your code much more readable and maintainable.

Here's some simple classes that shows how to serialize and deserialize
to and from XML.  You just create instances of the class, populate
it's properties and then call the Serialize method to create the
xml.

Imports System.Xml.Serialization
Imports System.IO

Public Sub Main()

   Dim s As New SqlImporter()
   s.LicenseNumber = "123456"
   s.HeaderFile = "header file"

   Dim t As New Transaction()
   'set t properties here

   s.Transactions.Add(t)

   'Finally, serialize to xml:
   SqlImporter.Save("filename.xml", s)

End Sub

<Serializable()> _
Public Class SqlImporter

 Public Sub New()
   _transactions = New List(Of Transaction)()
 End Sub

 Private _licenseNumber As String
 <XmlAttribute("name")> _
 Public Property LicenseNumber() As String
   Get
     Return _licenseNumber
   End Get
   Set(ByVal value As String)
     _licenseNumber = value
   End Set
 End Property

 Private _headerFile As String
 <XmlAttribute("headerFile")> _
 Public Property HeaderFile() As String
   Get
     Return _headerFile
   End Get
   Set(ByVal value As String)
     _headerFile = value
   End Set
 End Property

 Private _transactions As List(Of Transaction)
 <XmlArray("Transactions"), XmlArrayItem("Transaction",
GetType(Transaction))> _
 Public Property Transactions() As List(Of Transaction)
   Get
     Return _transactions
   End Get
   Set(ByVal value As List(Of Transaction))
     _transactions = value
   End Set
 End Property

 Public Shared Sub Save(ByVal filename As String, ByVal obj As
SqlImporter)
   Using wrt As New StreamWriter(filename)
     Dim xSer As New XmlSerializer(GetType(SqlImporter))
     xSer.Serialize(wrt, obj)
   End Using
 End Sub

 Public Shared Function Load(ByVal filename As String) As SqlImporter
   Dim importer As SqlImporter

   Using rdr As New StreamReader(filename)
     Dim xSer As New XmlSerializer(GetType(SqlImporter))
     importer = DirectCast(xSer.Deserialize(rdr), SqlImporter)
   End Using

   Return importer
 End Function

End Class

<Serializable()> _
Public Class Transaction
 'Properties for Transaction here
End Class

Hope this helps,

Chis
Peter Newman - 03 Mar 2008 16:14 GMT
Thanks Chris,
That has given me food for thought. I cant thank you guys enough,  you learn
something new on every visit here. Keep up the good work

> On Mar 2, 2:26 pm, Peter Newman
> <PeterNew...@discussions.microsoft.com> wrote:
[quoted text clipped - 223 lines]
>
> Chis

Free Magazines

Get these publications absolutely FREE for up to 12 months. There are no hidden fees and no obligation. Simply choose a title, complete the application form and submit it. Read more ...

Oracle MagazineNetwork ComputingComputer WorldBio-IT WorldeWeekInformation WeekInfosecurity
 
Sign In
Join
My Latest Posts
My Monitored Threads
My Blog
My Photo Gallery
My Profile
My Homepage

Start New Thread
Enable EMail Alerts
Rate this Thread



©2008 Advenet LLC   Privacy Policy - Terms of Use
This website includes both content owned or controlled by Advenet as well as content owned or controlled by third parties.