Description:
$enumValueswill have sequence of strings that I have to look into$assetSubGroupwill have a element value from XML (for loop) i.e string that I have to match in above maintained sequence- If match is not, I have to hold few element values and return.
All three of my attempts below are giving me expanded tree cache full errors. There are ~470000 assets i.e XML I'm querying.
How can I tune these queries to avoid expanded tree cache errors?
approach 1:
let $query-name := "get-asset-sub-group-values"
let $output-dir := "D:\output\"
let $report-uri := concat($output-dir, $query-name, "_report0.txt")
let $enumValues := (:all sequence of strings goes here :)
let $map1 := map:new($enumValues ! map:entry(fn:string(.), fn:true()))
let $result1 := concat('asset-id' , "|", 'upi', "|", 'assetSubGroup', "|", 'asset-type', "|", 'asset-sub-type', "|", 'originator', "|", 'originator-identifier', "|", 'mm-project-id', "|" , 'sap-project-id' , " ")
let $result2 :=
for $each-search-copy in cts:search(collection("metadata-search"), cts:element-value-query(xs:QName("AssetID"), "*"))/metadata
let $asset-id := $each-search-copy/assetIdentifiers/assetIdentifier/AssetID[1]/text()
let $upi := $each-search-copy/assetIdentifiers/assetIdentifier/SAPID[1]/text()
let $asset-type := $each-search-copy/biblioCore/assetType[1]/text()
let $asset-sub-type := $each-search-copy/biblioCore/assetSubType[1]/text()
let $originator := $each-search-copy/biblioCore/originator[1]/text()
let $originator-identifier := $each-search-copy/assetIdentifiers/assetIdentifier/OriginatorIdentifier[1]/text()
let $mm-project-id := $each-search-copy/biblioCore/MMProjectID[1]/text()
let $sap-project-id := $each-search-copy/biblioCore/SAPProjectID[1]/text()
let $assetSubGroup := $each-search-copy/biblioCore/assetSubGroup[1]/text()
let $map2 := map:new($assetSubGroup ! map:entry(fn:string(.), fn:true()))
let $flag := map:keys($map2 - $map1)
return
if ($flag)
then(
concat($asset-id , "|", $upi, "|", $assetSubGroup, "|", $asset-type, "|", $asset-sub-type, "|", $originator, "|", $originator-identifier, "|", $mm-project-id, "|" , $sap-project-id , " ")
)
else (
if($assetSubGroup) then()
else (
concat($asset-id , "|", $upi, "|", $assetSubGroup, "|", $asset-type, "|", $asset-sub-type, "|", $originator, "|", $originator-identifier, "|", $mm-project-id, "|" , $sap-project-id , " ")
))
let $result3 := ($result1, $result2)
return xdmp:save($report-uri, text{$result3}), xdmp:elapsed-time()
approach 2:
let $query-name := "get-asset-sub-group-values"
let $output-dir := "D:\output\"
let $report-uri := concat($output-dir, $query-name, "_report1.txt")
let $enumValues := (:all string value sequence goes here that has to match:)
let $map1 := map:new($enumValues ! map:entry(fn:string(.), fn:true()))
let $result1 :=( concat('asset-id' , "|", 'upi', "|", 'assetSubGroup', "|", 'asset-type', "|", 'asset-sub-type', "|", 'originator', "|", 'originator-identifier', "|", 'mm-project-id', "|" , 'sap-project-id' , " "),
for $each-search-copy in cts:search(collection("metadata-search"), cts:element-value-query(xs:QName("AssetID"), "*"))/metadata
let $asset-id := $each-search-copy/assetIdentifiers/assetIdentifier/AssetID[1]/text()
let $upi := $each-search-copy/assetIdentifiers/assetIdentifier/SAPID[1]/text()
let $asset-type := $each-search-copy/biblioCore/assetType[1]/text()
let $asset-sub-type := $each-search-copy/biblioCore/assetSubType[1]/text()
let $originator := $each-search-copy/biblioCore/originator[1]/text()
let $originator-identifier := $each-search-copy/assetIdentifiers/assetIdentifier/OriginatorIdentifier[1]/text()
let $mm-project-id := $each-search-copy/biblioCore/MMProjectID[1]/text()
let $sap-project-id := $each-search-copy/biblioCore/SAPProjectID[1]/text()
let $assetSubGroup := $each-search-copy/biblioCore/assetSubGroup[1]/text()
let $map2 := map:new($assetSubGroup ! map:entry(fn:string(.), fn:true()))
let $flag := map:keys($map2 - $map1)
return
if ($flag)
then(
concat($asset-id , "|", $upi, "|", $assetSubGroup, "|", $asset-type, "|", $asset-sub-type, "|", $originator, "|", $originator-identifier, "|", $mm-project-id, "|" , $sap-project-id , " ")
)
else (
if($assetSubGroup) then()
else (
concat($asset-id , "|", $upi, "|", $assetSubGroup, "|", $asset-type, "|", $asset-sub-type, "|", $originator, "|", $originator-identifier, "|", $mm-project-id, "|" , $sap-project-id , " ")
))
)
return xdmp:save($report-uri, text{$result1}), xdmp:elapsed-time()
approach 3:
let $query-name := "get-asset-sub-group-values"
let $output-dir := "D:\output\"
let $report-uri := concat($output-dir, $query-name, "_report2.txt")
let $enumValues := (:sequence of strings are passed:)
let $result1 :=( concat('asset-id' , "|", 'upi', "|", 'assetSubGroup', "|", 'asset-type', "|", 'asset-sub-type', "|", 'originator', "|", 'originator-identifier', "|", 'mm-project-id', "|" , 'sap-project-id' , " "),
for $each-search-copy in cts:search(collection("metadata-search"), cts:element-value-query(xs:QName("AssetID"), "*"))/metadata
let $asset-id := $each-search-copy/assetIdentifiers/assetIdentifier/AssetID[1]/text()
let $upi := $each-search-copy/assetIdentifiers/assetIdentifier/SAPID[1]/text()
let $asset-type := $each-search-copy/biblioCore/assetType[1]/text()
let $asset-sub-type := $each-search-copy/biblioCore/assetSubType[1]/text()
let $originator := $each-search-copy/biblioCore/originator[1]/text()
let $originator-identifier := $each-search-copy/assetIdentifiers/assetIdentifier/OriginatorIdentifier[1]/text()
let $mm-project-id := $each-search-copy/biblioCore/MMProjectID[1]/text()
let $sap-project-id := $each-search-copy/biblioCore/SAPProjectID[1]/text()
let $assetSubGroup := $each-search-copy/biblioCore/assetSubGroup[1]/text()
let $flag := ($assetSubGroup eq $enumValues)
return
if ($flag)
then()
else (
concat($asset-id , "|", $upi, "|", $assetSubGroup, "|", $asset-type, "|", $asset-sub-type, "|", $originator, "|", $originator-identifier, "|", $mm-project-id, "|" , $sap-project-id , " ")
))
return xdmp:save($report-uri, text{$result1}), xdmp:elapsed-time()
If you find that you are having trouble writing a "streamable" query, another alternative would be to use a batch tool, such as CORB2 that uses multiple threads to execute a module for each document to produce it's output in a separate transaction and collects the results into a final output file. By breaking out the work into separate transactions, you don't have to worry about expanded tree cache errors or timeouts, and you can adjust the thread count to perform more work concurrently and get it done faster than a single query execution.
An example CORB2 options file to produce your
|delimited text file would look something like this (you would need to adjust the XCC-CONNECTION-URI value for your environment):Create the URIs selector module
selector.xqythat will find all of the URIs to process:Create the process module
process.xqythat will be called for each URI:Invoking the CORB job like this (adjust paths and filenames for the XCC and CORB jars and your properties file):
Or, if you are using ml-gradle, use the corb task