How to add metadata to search items?

35 Views Asked by At

I'm using custom skills to OCR and chunk data from images. In parallel to this, I'm pulling additional context data from a CSV file and trying to add this to each search result as additional meta data. I can see the metadata on the parent item but not on the child items. Is there a way to map these properly?

These are the mappings I'm using:

{
    "outputFieldMappings": [
    {
        "sourceFieldName": "/document/metadata/special_code",
        "targetFieldName": "metadata_special_code"
    },
    {
         "sourceFieldName": "/document/metadata/document_type",
         "targetFieldName": "metadata_document_type"
    },
    {
         "sourceFieldName": "/document/metadata/location",
         "targetFieldName": "metadata_location"
    }
  ]
}

And this is the output I'm seeing in the search, with an example of a parent item and a child item. The metadata is absent in the child item, but I'd like this to have the metadata here too.

{
    "@search.score": 0.01515151560306549,
    "@search.rerankerScore": 0.8941482305526733,
    "@search.captions": [
    {
        "text": "sample file.pdf.",
        "highlights": "<em>sample</em> file.pdf."
    }],
    "chunk_id":"<parent id>",
    "parent_id": null,
    "chunk": null,
    "title": "sample file.pdf",
    "metadata_special_code": "12345678",
    "metadata_document_type": "pdf",
    "metadata_location": "test-store/sample file.pdf"
},
{
   "@search.score": 0.032786883413791656,
   "@search.rerankerScore": 0.9278492331504822,
   "@search.captions": [
   {
       "text": "sample file.pdf. <text here>"
   }],
   "chunk_id":"<chunk id>",
   "parent_id":"<parent id>",
   "chunk": "<text here>",
   "title": "sample file.pdf",
   "metadata_special_code": null,
   "metadata_document_type": null,
   "metadata_location": null
}

Edit: Adding Index and Indexer details as per request

Index Definition

{
  "@odata.context": "https://search.windows.net/$metadata#indexes/$entity",
  "@odata.etag": "",
  "name": "test",
  "defaultScoringProfile": null,
  "fields": [
    {
      "name": "chunk_id",
      "type": "Edm.String",
      "searchable": true,
      "filterable": true,
      "retrievable": true,
      "sortable": true,
      "facetable": true,
      "key": true,
      "indexAnalyzer": null,
      "searchAnalyzer": null,
      "analyzer": "keyword",
      "normalizer": null,
      "dimensions": null,
      "vectorSearchProfile": null,
      "synonymMaps": []
    },
    {
      "name": "parent_id",
      "type": "Edm.String",
      "searchable": true,
      "filterable": true,
      "retrievable": true,
      "sortable": true,
      "facetable": true,
      "key": false,
      "indexAnalyzer": null,
      "searchAnalyzer": null,
      "analyzer": null,
      "normalizer": null,
      "dimensions": null,
      "vectorSearchProfile": null,
      "synonymMaps": []
    },
    {
      "name": "chunk",
      "type": "Edm.String",
      "searchable": true,
      "filterable": false,
      "retrievable": true,
      "sortable": false,
      "facetable": false,
      "key": false,
      "indexAnalyzer": null,
      "searchAnalyzer": null,
      "analyzer": null,
      "normalizer": null,
      "dimensions": null,
      "vectorSearchProfile": null,
      "synonymMaps": []
    },
    {
      "name": "title",
      "type": "Edm.String",
      "searchable": true,
      "filterable": true,
      "retrievable": true,
      "sortable": false,
      "facetable": false,
      "key": false,
      "indexAnalyzer": null,
      "searchAnalyzer": null,
      "analyzer": null,
      "normalizer": null,
      "dimensions": null,
      "vectorSearchProfile": null,
      "synonymMaps": []
    },
    {
      "name": "vector",
      "type": "Collection(Edm.Single)",
      "searchable": true,
      "filterable": false,
      "retrievable": true,
      "sortable": false,
      "facetable": false,
      "key": false,
      "indexAnalyzer": null,
      "searchAnalyzer": null,
      "analyzer": null,
      "normalizer": null,
      "dimensions": 1536,
      "vectorSearchProfile": "full-skill-test-profile",
      "synonymMaps": []
    },
    {
      "name": "metadata_cutomer_code",
      "type": "Edm.String",
      "searchable": true,
      "filterable": true,
      "retrievable": true,
      "sortable": true,
      "facetable": false,
      "key": false,
      "indexAnalyzer": null,
      "searchAnalyzer": null,
      "analyzer": null,
      "normalizer": null,
      "dimensions": null,
      "vectorSearchProfile": null,
      "synonymMaps": []
    },
    {
      "name": "metadata_document_type",
      "type": "Edm.String",
      "searchable": true,
      "filterable": true,
      "retrievable": true,
      "sortable": true,
      "facetable": false,
      "key": false,
      "indexAnalyzer": null,
      "searchAnalyzer": null,
      "analyzer": "standard.lucene",
      "normalizer": null,
      "dimensions": null,
      "vectorSearchProfile": null,
      "synonymMaps": []
    },
    {
      "name": "metadata_content",
      "type": "Edm.String",
      "searchable": true,
      "filterable": true,
      "retrievable": true,
      "sortable": true,
      "facetable": false,
      "key": false,
      "indexAnalyzer": null,
      "searchAnalyzer": null,
      "analyzer": "standard.lucene",
      "normalizer": null,
      "dimensions": null,
      "vectorSearchProfile": null,
      "synonymMaps": []
    },
    {
      "name": "metadata_customer_code",
      "type": "Edm.String",
      "searchable": true,
      "filterable": true,
      "retrievable": true,
      "sortable": true,
      "facetable": false,
      "key": false,
      "indexAnalyzer": null,
      "searchAnalyzer": null,
      "analyzer": "standard.lucene",
      "normalizer": null,
      "dimensions": null,
      "vectorSearchProfile": null,
      "synonymMaps": []
    }
  ],
  "scoringProfiles": [],
  "corsOptions": null,
  "suggesters": [],
  "analyzers": [],
  "normalizers": [],
  "tokenizers": [],
  "tokenFilters": [],
  "charFilters": [],
  "encryptionKey": null,
  "similarity": {
    "@odata.type": "#Microsoft.Azure.Search.BM25Similarity",
    "k1": null,
    "b": null
  },
  "semantic": {
    "defaultConfiguration": "full-skill-test-semantic-configuration",
    "configurations": [
      {
        "name": "full-skill-test-semantic-configuration",
        "prioritizedFields": {
          "titleField": {
            "fieldName": "title"
          },
          "prioritizedContentFields": [
            {
              "fieldName": "chunk"
            }
          ],
          "prioritizedKeywordsFields": []
        }
      }
    ]
  },
  "vectorSearch": {
    "algorithms": [
      {
        "name": "full-skill-test-algorithm",
        "kind": "hnsw",
        "hnswParameters": {
          "metric": "cosine",
          "m": 4,
          "efConstruction": 400,
          "efSearch": 500
        },
        "exhaustiveKnnParameters": null
      }
    ],
    "profiles": [
      {
        "name": "full-skill-test-profile",
        "algorithm": "full-skill-test-algorithm",
        "vectorizer": "full-skill-test-vectorizer"
      }
    ],
    "vectorizers": [
      {
        "name": "full-skill-test-vectorizer",
        "kind": "azureOpenAI",
        "azureOpenAIParameters": {
          "resourceUri": "https://openai.azure.com",
          "deploymentId": "text-embedding-ada-002",
          "apiKey": "<redacted>",
          "authIdentity": null
        },
        "customWebApiParameters": null
      }
    ]
  }
}

Indexer Definition

{
  "@odata.context": "https://search.windows.net/$metadata#indexers/$entity",
  "@odata.etag": "",
  "name": "indexer",
  "description": null,
  "dataSourceName": "datasource",
  "skillsetName": "skillset",
  "targetIndexName": "index",
  "disabled": null,
  "schedule": null,
  "parameters": {
    "batchSize": null,
    "maxFailedItems": null,
    "maxFailedItemsPerBatch": null,
    "base64EncodeKeys": null,
    "configuration": {
      "dataToExtract": "contentAndMetadata",
      "parsingMode": "default",
      "imageAction": "generateNormalizedImagePerPage",
      "allowSkillsetToReadFileData": true
    }
  },
  "fieldMappings": [
    {
      "sourceFieldName": "metadata_storage_name",
      "targetFieldName": "title",
      "mappingFunction": null
    }
  ],
  "outputFieldMappings": [
    {
      "sourceFieldName": "/document/ref_metadata/special_code",
      "targetFieldName": "metadata_special_code"
    },
    {
      "sourceFieldName": "/document/ref_metadata/document_type",
      "targetFieldName": "metadata_document_type"
    },
    {
      "sourceFieldName": "/document/ref_metadata/location",
      "targetFieldName": "metadata_location"
    }
  ],
  "cache": null,
  "encryptionKey": null
}

Skillset

{
      "@odata.type": "#Microsoft.Skills.Custom.WebApiSkill",
      "name": "#2",
      "description": "",
      "context": "/document",
      "uri": "https://functionapp.azurewebsites.net/api/MetadataOutput?code=<code>",
      "httpMethod": "POST",
      "timeout": "PT3M50S",
      "batchSize": 1,
      "degreeOfParallelism": 1,
      "inputs": [
        {
          "name": "document",
          "source": "/document/metadata_storage_name"
        }
      ],
      "outputs": [
        {
          "name": "ref_metadata",
          "targetName": "output_metadata"
        }
      ],
      "httpHeaders": {}
    }
0

There are 0 best solutions below