Running Delta Live Tables using Mosaic

11 Views Asked by At

I'm just learning to use mosaic and delta live table in Databricks. I was following this example https://github.com/databrickslabs/mosaic/tree/main/notebooks/examples/python/OpenStreetMaps and after configuring everything I'm not able to run the pipeline.

The error is in the line where i enable mosaic

mos.enable_mosaic(spark, dbutils)

I also tried to install a specific version of mosaic to downgrade at the mosaic v3 and use the preview version of DLT.

This the configuration file of my DLT

{
    "id": "<id>",
    "pipeline_type": "WORKSPACE",
    "clusters": [
        {
            "label": "default",
            "node_type_id": "Standard_DS3_v2",
            "num_workers": 1
        }
    ],
    "development": true,
    "continuous": false,
    "channel": "CURRENT",
    "photon": true,
    "libraries": [
        {
            "notebook": {
                "path": "/Users/<user>/Mosaic-Example/02_Process"
            }
        }
    ],
    "name": "open_street_map",
    "edition": "ADVANCED",
    "storage": "/temp/mosaic/open_street_maps",
    "target": "open_street_maps",
    "data_sampling": false
}

and here the complete Trace Error

java.lang.RuntimeException: Failed to execute python command for notebook '/Users/<user>/Mosaic-Example/02_Process' with id RunnableCommandId(9033327179885129490) and error AnsiResult(---------------------------------------------------------------------------
Py4JError                                 Traceback (most recent call last)
File /local_disk0/.ephemeral_nfs/envs/pythonEnv-3e1599be-1a1b-46d7-9547-6fb8561be97d/lib/python3.10/site-packages/mosaic/core/library_handler.py:90, in MosaicLibraryHandler.auto_attach(self)
     89     optionModule = getattr(optionClass, "MODULE$")
---> 90     lib = JavaJarId(
     91         JarURI,
     92         ManagedLibraryId.defaultOrganization(),
     93         NoVersionModule.simpleString(),
     94         optionModule.apply(None),
     95         optionModule.apply(None),
     96     )
     97 except:

File /databricks/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/java_gateway.py:1587, in JavaClass.__call__(self, *args)
   1586 answer = self._gateway_client.send_command(command)
-> 1587 return_value = get_return_value(
   1588     answer, self._gateway_client, None, self._fqn)
   1590 for temp_arg in temp_args:

File /databricks/spark/python/pyspark/errors/exceptions/captured.py:188, in capture_sql_exception.<locals>.deco(*a, **kw)
    187 try:
--> 188     return f(*a, **kw)
    189 except Py4JJavaError as e:

File /databricks/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/protocol.py:330, in get_return_value(answer, gateway_client, target_id, name)
    329     else:
--> 330         raise Py4JError(
    331             "An error occurred while calling {0}{1}{2}. Trace:\n{3}\n".
    332             format(target_id, ".", name, value))
    333 else:

Py4JError: An error occurred while calling None.com.databricks.libraries.JavaJarId. Trace:
py4j.Py4JException: Constructor com.databricks.libraries.JavaJarId([class java.net.URI, class java.lang.String, class java.lang.String, class scala.None$, class scala.None$]) does not exist
    at py4j.reflection.ReflectionEngine.getConstructor(ReflectionEngine.java:203)
    at py4j.reflection.ReflectionEngine.getConstructor(ReflectionEngine.java:220)
    at py4j.Gateway.invoke(Gateway.java:255)
    at py4j.commands.ConstructorCommand.invokeConstructor(ConstructorCommand.java:80)
    at py4j.commands.ConstructorCommand.execute(ConstructorCommand.java:69)
    at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:195)
    at py4j.ClientServerConnection.run(ClientServerConnection.java:115)
    at java.lang.Thread.run(Thread.java:750)



During handling of the above exception, another exception occurred:

Py4JError                                 Traceback (most recent call last)
File ~/.ipykernel/2098/command--1-2880499838:3
      1 import mosaic as mos
----> 3 mos.enable_mosaic(spark, dbutils)

File /local_disk0/.ephemeral_nfs/envs/pythonEnv-3e1599be-1a1b-46d7-9547-6fb8561be97d/lib/python3.10/site-packages/mosaic/api/enable.py:47, in enable_mosaic(spark, dbutils)
     14 """
     15 Enable Mosaic functions.
     16 
   (...)
     44 
     45 """
     46 config.mosaic_spark = spark
---> 47 _ = MosaicLibraryHandler(config.mosaic_spark)
     48 config.mosaic_context = MosaicContext(config.mosaic_spark)
     50 # Register SQL functions

File /local_disk0/.ephemeral_nfs/envs/pythonEnv-3e1599be-1a1b-46d7-9547-6fb8561be97d/lib/python3.10/site-packages/mosaic/core/library_handler.py:29, in MosaicLibraryHandler.__init__(self, spark)
     25     raise FileNotFoundError(
     26         f"Mosaic JAR package {self._jar_filename} could not be located at {self.mosaic_library_location}."
     27     )
     28 LOGGER.info(f"Automatically attaching Mosaic JAR to cluster.")
---> 29 self.auto_attach()

File /local_disk0/.ephemeral_nfs/envs/pythonEnv-3e1599be-1a1b-46d7-9547-6fb8561be97d/lib/python3.10/site-packages/mosaic/core/library_handler.py:98, in MosaicLibraryHandler.auto_attach(self)
     90     lib = JavaJarId(
     91         JarURI,
     92         ManagedLibraryId.defaultOrganization(),
   (...)
     95         optionModule.apply(None),
     96     )
     97 except:
---> 98     lib = JavaJarId(
     99         JarURI,
    100         ManagedLibraryId.defaultOrganization(),
    101         NoVersionModule.simpleString()
    102     )
    104 libSeq = converters.asScalaBufferConverter((lib,)).asScala().toSeq()
    106 context = DatabricksILoop.getSharedDriverContextIfExists().get()

File /databricks/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/java_gateway.py:1587, in JavaClass.__call__(self, *args)
   1581 command = proto.CONSTRUCTOR_COMMAND_NAME +\
   1582     self._command_header +\
   1583     args_command +\
   1584     proto.END_COMMAND_PART
   1586 answer = self._gateway_client.send_command(command)
-> 1587 return_value = get_return_value(
   1588     answer, self._gateway_client, None, self._fqn)
   1590 for temp_arg in temp_args:
   1591     if hasattr(temp_arg, "_detach"):

File /databricks/spark/python/pyspark/errors/exceptions/captured.py:188, in capture_sql_exception.<locals>.deco(*a, **kw)
    186 def deco(*a: Any, **kw: Any) -> Any:
    187     try:
--> 188         return f(*a, **kw)
    189     except Py4JJavaError as e:
    190         converted = convert_exception(e.java_exception)

File /databricks/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/protocol.py:330, in get_return_value(answer, gateway_client, target_id, name)
    326         raise Py4JJavaError(
    327             "An error occurred while calling {0}{1}{2}.\n".
    328             format(target_id, ".", name), value)
    329     else:
--> 330         raise Py4JError(
    331             "An error occurred while calling {0}{1}{2}. Trace:\n{3}\n".
    332             format(target_id, ".", name, value))
    333 else:
    334     raise Py4JError(
    335         "An error occurred while calling {0}{1}{2}".
    336         format(target_id, ".", name))

Py4JError: An error occurred while calling None.com.databricks.libraries.JavaJarId. Trace:
py4j.Py4JException: Constructor com.databricks.libraries.JavaJarId([class java.net.URI, class java.lang.String, class java.lang.String]) does not exist
    at py4j.reflection.ReflectionEngine.getConstructor(ReflectionEngine.java:203)
    at py4j.reflection.ReflectionEngine.getConstructor(ReflectionEngine.java:220)
    at py4j.Gateway.invoke(Gateway.java:255)
    at py4j.commands.ConstructorCommand.invokeConstructor(ConstructorCommand.java:80)
    at py4j.commands.ConstructorCommand.execute(ConstructorCommand.java:69)
    at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:195)
    at py4j.ClientServerConnection.run(ClientServerConnection.java:115)
    at java.lang.Thread.run(Thread.java:750)

,None,Map(),Map(),List(),List(),Map())
at com.databricks.pipelines.execution.core.languages.PythonRepl.$anonfun$runCmd$1(PythonRepl.scala:337)
at com.databricks.spark.util.FrameProfiler$.record(FrameProfiler.scala:94)
at com.databricks.sql.transaction.tahoe.metering.DeltaLogging.recordFrameProfile(DeltaLogging.scala:266)
at com.databricks.sql.transaction.tahoe.metering.DeltaLogging.recordFrameProfile$(DeltaLogging.scala:264)
at com.databricks.pipelines.execution.core.languages.PythonRepl.recordFrameProfile(PythonRepl.scala:65)
at com.databricks.pipelines.execution.core.languages.PythonRepl.runCmd(PythonRepl.scala:305)
at com.databricks.pipelines.execution.extensions.workspace.WorkspacePythonPipelineGraphLoader.$anonfun$loadPythonGraph$12(WorkspacePythonPipelineGraphLoader.scala:159)
at scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:286)
at scala.collection.mutable.ArraySeq.foreach(ArraySeq.scala:75)
at scala.collection.TraversableLike.map(TraversableLike.scala:286)
at scala.collection.TraversableLike.map$(TraversableLike.scala:279)
at scala.collection.AbstractTraversable.map(Traversable.scala:108)
at com.databricks.pipelines.execution.extensions.workspace.WorkspacePythonPipelineGraphLoader.$anonfun$loadPythonGraph$10(WorkspacePythonPipelineGraphLoader.scala:143)
at com.databricks.pipelines.execution.extensions.workspace.WorkspacePythonPipelineGraphLoader.$anonfun$loadPythonGraph$10$adapted(WorkspacePythonPipelineGraphLoader.scala:105)
at scala.collection.immutable.Map$Map1.foreach(Map.scala:193)
at com.databricks.pipelines.execution.extensions.workspace.WorkspacePythonPipelineGraphLoader.loadPythonGraph(WorkspacePythonPipelineGraphLoader.scala:105)
at com.databricks.pipelines.execution.extensions.workspace.WorkspacePipelineGraphLoader.loadGraph(WorkspacePipelineGraphLoader.scala:159)
at com.databricks.pipelines.execution.extensions.workspace.WorkspacePipelineGraphLoader.loadGraph(WorkspacePipelineGraphLoader.scala:53)
at com.databricks.pipelines.execution.extensions.workspace.WorkspacePipelineExecutionExtension$.loadGraph(WorkspacePipelineExecutionExtension.scala:18)
at com.databricks.pipelines.execution.service.DLTComputeRunnableContext.loadGraph(DLTComputeRunnableContext.scala:100)
at com.databricks.pipelines.execution.core.UpdateExecution.initializationForUpdates(UpdateExecution.scala:555)
at com.databricks.pipelines.execution.core.UpdateExecution.$anonfun$initializeAndLoadGraphForRegularUpdate$1(UpdateExecution.scala:642)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at com.databricks.pipelines.execution.core.monitoring.DeltaPipelinesUsageLogging.$anonfun$recordPipelinesOperation$3(DeltaPipelinesUsageLogging.scala:115)
at com.databricks.pipelines.common.monitoring.OperationStatusReporter.executeWithPeriodicReporting(OperationStatusReporter.scala:120)
at com.databricks.pipelines.common.monitoring.OperationStatusReporter$.executeWithPeriodicReporting(OperationStatusReporter.scala:160)
at com.databricks.pipelines.execution.core.monitoring.DeltaPipelinesUsageLogging.$anonfun$recordPipelinesOperation$6(DeltaPipelinesUsageLogging.scala:135)
at com.databricks.logging.UsageLogging.$anonfun$recordOperation$1(UsageLogging.scala:573)
at com.databricks.logging.UsageLogging.executeThunkAndCaptureResultTags$1(UsageLogging.scala:668)
at com.databricks.logging.UsageLogging.$anonfun$recordOperationWithResultTags$4(UsageLogging.scala:686)
at com.databricks.logging.UsageLogging.$anonfun$withAttributionContext$1(UsageLogging.scala:426)
at scala.util.DynamicVariable.withValue(DynamicVariable.scala:62)
at com.databricks.logging.AttributionContext$.withValue(AttributionContext.scala:216)
at com.databricks.logging.UsageLogging.withAttributionContext(UsageLogging.scala:424)
at com.databricks.logging.UsageLogging.withAttributionContext$(UsageLogging.scala:418)
at com.databricks.pipelines.execution.core.monitoring.PublicLogging.withAttributionContext(DeltaPipelinesUsageLogging.scala:24)
at com.databricks.logging.UsageLogging.withAttributionTags(UsageLogging.scala:472)
at com.databricks.logging.UsageLogging.withAttributionTags$(UsageLogging.scala:455)
at com.databricks.pipelines.execution.core.monitoring.PublicLogging.withAttributionTags(DeltaPipelinesUsageLogging.scala:24)
at com.databricks.logging.UsageLogging.recordOperationWithResultTags(UsageLogging.scala:663)
at com.databricks.logging.UsageLogging.recordOperationWithResultTags$(UsageLogging.scala:582)
at com.databricks.pipelines.execution.core.monitoring.PublicLogging.recordOperationWithResultTags(DeltaPipelinesUsageLogging.scala:24)
at com.databricks.logging.UsageLogging.recordOperation(UsageLogging.scala:573)
at com.databricks.logging.UsageLogging.recordOperation$(UsageLogging.scala:542)
at com.databricks.pipelines.execution.core.monitoring.PublicLogging.recordOperation(DeltaPipelinesUsageLogging.scala:24)
at com.databricks.pipelines.execution.core.monitoring.PublicLogging.recordOperation0(DeltaPipelinesUsageLogging.scala:59)
at com.databricks.pipelines.execution.core.monitoring.DeltaPipelinesUsageLogging.$anonfun$recordPipelinesOperation$1(DeltaPipelinesUsageLogging.scala:127)
at com.databricks.spark.util.FrameProfiler$.record(FrameProfiler.scala:94)
at com.databricks.pipelines.execution.core.monitoring.DeltaPipelinesUsageLogging.recordPipelinesOperation(DeltaPipelinesUsageLogging.scala:105)
at com.databricks.pipelines.execution.core.monitoring.DeltaPipelinesUsageLogging.recordPipelinesOperation$(DeltaPipelinesUsageLogging.scala:101)
at com.databricks.pipelines.execution.core.UpdateExecution.recordPipelinesOperation(UpdateExecution.scala:68)
at com.databricks.pipelines.execution.core.UpdateExecution.executeStage(UpdateExecution.scala:412)
at com.databricks.pipelines.execution.core.UpdateExecution.initializeAndLoadGraphForRegularUpdate(UpdateExecution.scala:642)
at com.databricks.pipelines.execution.core.UpdateExecution.$anonfun$executeUpdate$1(UpdateExecution.scala:524)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at com.databricks.spark.util.FrameProfiler$.record(FrameProfiler.scala:94)
at com.databricks.pipelines.execution.core.UpdateExecution.executeUpdate(UpdateExecution.scala:523)
at com.databricks.pipelines.execution.core.UpdateExecution.$anonfun$start$3(UpdateExecution.scala:232)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at com.databricks.logging.UsageLogging.$anonfun$withAttributionContext$1(UsageLogging.scala:426)
at scala.util.DynamicVariable.withValue(DynamicVariable.scala:62)
at com.databricks.logging.AttributionContext$.withValue(AttributionContext.scala:216)
at com.databricks.logging.UsageLogging.withAttributionContext(UsageLogging.scala:424)
at com.databricks.logging.UsageLogging.withAttributionContext$(UsageLogging.scala:418)
at com.databricks.pipelines.execution.core.monitoring.PublicLogging.withAttributionContext(DeltaPipelinesUsageLogging.scala:24)
at com.databricks.logging.UsageLogging.withAttributionTags(UsageLogging.scala:472)
at com.databricks.logging.UsageLogging.withAttributionTags$(UsageLogging.scala:455)
at com.databricks.pipelines.execution.core.monitoring.PublicLogging.withAttributionTags(DeltaPipelinesUsageLogging.scala:24)
at com.databricks.pipelines.execution.core.monitoring.DeltaPipelinesUsageLogging$$anon$1.runWithAttributionTags(DeltaPipelinesUsageLogging.scala:77)
at sun.reflect.NativeMethodAccessorImpl.invoke0(NativeMethodAccessorImpl.java:-2)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at com.databricks.pipelines.execution.core.monitoring.DeltaPipelinesUsageLogging.withDbAttributionTags(DeltaPipelinesUsageLogging.scala:84)
at com.databricks.pipelines.execution.core.monitoring.DeltaPipelinesUsageLogging.withDbAttributionTags$(DeltaPipelinesUsageLogging.scala:83)
at com.databricks.pipelines.execution.core.UpdateExecution.withDbAttributionTags(UpdateExecution.scala:68)
at com.databricks.pipelines.execution.core.UpdateExecution.$anonfun$start$1(UpdateExecution.scala:208)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at com.databricks.pipelines.execution.core.UCContextCompanion$OptionUCContextHelper.runWithNewUCSIfAvailable(BaseUCContext.scala:1087)
at com.databricks.pipelines.execution.core.UpdateExecution.start(UpdateExecution.scala:195)
at com.databricks.pipelines.execution.service.ExecutionBackend$$anon$2.$anonfun$run$2(ExecutionBackend.scala:712)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at com.databricks.pipelines.execution.core.CommandContextUtils$.withCommandContext(CommandContextUtils.scala:66)
at com.databricks.pipelines.execution.service.ExecutionBackend$$anon$2.run(ExecutionBackend.scala:708)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at org.apache.spark.util.threads.SparkThreadLocalCapturingRunnable.$anonfun$run$1(SparkThreadLocalForwardingThreadPoolExecutor.scala:118)
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at com.databricks.spark.util.IdentityClaim$.withClaim(IdentityClaim.scala:48)
at org.apache.spark.util.threads.SparkThreadLocalCapturingHelper.$anonfun$runWithCaptured$4(SparkThreadLocalForwardingThreadPoolExecutor.scala:81)
at com.databricks.unity.UCSEphemeralState$Handle.runWith(UCSEphemeralState.scala:41)
at org.apache.spark.util.threads.SparkThreadLocalCapturingHelper.runWithCaptured(SparkThreadLocalForwardingThreadPoolExecutor.scala:80)
at org.apache.spark.util.threads.SparkThreadLocalCapturingHelper.runWithCaptured$(SparkThreadLocalForwardingThreadPoolExecutor.scala:66)
at org.apache.spark.util.threads.SparkThreadLocalCapturingRunnable.runWithCaptured(SparkThreadLocalForwardingThreadPoolExecutor.scala:115)
at org.apache.spark.util.threads.SparkThreadLocalCapturingRunnable.run(SparkThreadLocalForwardingThreadPoolExecutor.scala:118)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:750)

What am I missing?

0

There are 0 best solutions below