Fehler beim Definieren von SparkContext in Jupyter Notebooke (Python 3 (ipykernel))

Anonymous · Post by **Anonymous** » 24 Dec 2025, 15:04

sc = SparkContext(conf=conf)-Fehler
Bei Verwendung des Jupyter-Notizbuchs von Anaconda verwende ich Pyspark zum ersten Mal.
Was ich versuche, ist die Spark-Umgebung zu definieren, um eine CSV-Datei auf der lokalen Festplatte zu lesen.
Bearbeiten: Den Text hinzugefügt, da user2314737 nach Text anstelle eines Bilds gefragt hat, und jetzt möchte StackOverflow, dass ich weitere Details hinzufüge, wie es mein Textkörper ist Meistens Code XD, also muss ich in diesem Teil mit euch allen chatten. Ihr könnt diesen Textkörper ignorieren, aber ich muss schreiben, bis ich auf „Änderungen speichern“ klicken kann.
Eingabe:

Code: Select all

from pyspark import SparkContext, SparkConf
conf = SparkConf().setAppName("PrdectiveModel")
sc = SparkContext(conf=conf) ----> the error is in this line

Ausgabe:

Code: Select all

Py4JJavaError                             Traceback (most recent call last)
Input In [13], in ()
1 from pyspark import SparkContext, SparkConf
2 conf = SparkConf().setAppName("PrdectiveModel")
----> 3 sc = SparkContext(conf=conf)

File E:\anaconda3\lib\site-packages\pyspark\context.py:146, in SparkContext.__init__(self, master, appName, sparkHome, pyFiles, environment, batchSize, serializer, conf, gateway, jsc, profiler_cls)
144 SparkContext._ensure_initialized(self, gateway=gateway, conf=conf)
145 try:
--> 146     self._do_init(master, appName, sparkHome, pyFiles, environment, batchSize, serializer,
147                   conf, jsc, profiler_cls)
148 except:
149     # If an error occurs, clean up in order to allow future SparkContext creation:
150     self.stop()

File E:\anaconda3\lib\site-packages\pyspark\context.py:209, in SparkContext._do_init(self, master, appName, sparkHome, pyFiles, environment, batchSize, serializer, conf, jsc, profiler_cls)
206 self.environment["PYTHONHASHSEED"] = os.environ.get("PYTHONHASHSEED", "0")
208 # Create the Java SparkContext through Py4J
--> 209 self._jsc = jsc or self._initialize_context(self._conf._jconf)
210 # Reset the SparkConf to the one actually used by the SparkContext in JVM.
211 self._conf = SparkConf(_jconf=self._jsc.sc().conf())

File E:\anaconda3\lib\site-packages\pyspark\context.py:329, in SparkContext._initialize_context(self, jconf)
325 def _initialize_context(self, jconf):
326     """
327     Initialize SparkContext in function to allow subclass specific initialization
328     """
--> 329     return self._jvm.JavaSparkContext(jconf)

File E:\anaconda3\lib\site-packages\py4j\java_gateway.py:1585, in JavaClass.__call__(self, *args)
1579 command = proto.CONSTRUCTOR_COMMAND_NAME +\
1580     self._command_header +\
1581     args_command +\
1582     proto.END_COMMAND_PART
1584 answer = self._gateway_client.send_command(command)
-> 1585 return_value = get_return_value(
1586     answer, self._gateway_client, None, self._fqn)
1588 for temp_arg in temp_args:
1589     temp_arg._detach()

File E:\anaconda3\lib\site-packages\py4j\protocol.py:326, in get_return_value(answer, gateway_client, target_id, name)
324 value = OUTPUT_CONVERTER[type](answer[2:], gateway_client)
325 if answer[1] == REFERENCE_TYPE:
--> 326     raise Py4JJavaError(
327         "An error occurred while calling {0}{1}{2}.\n".
328         format(target_id, ".", name), value)
329 else:
330     raise Py4JError(
331         "An error occurred while calling {0}{1}{2}.  Trace:\n{3}\n".
332         format(target_id, ".", name, value))

Py4JJavaError: An error occurred while calling None.org.apache.spark.api.java.JavaSparkContext.
: java.lang.NoClassDefFoundError: Could not initialize class org.apache.spark.storage.StorageUtils$
at org.apache.spark.storage.BlockManagerMasterEndpoint.(BlockManagerMasterEndpoint.scala:110)
at org.apache.spark.SparkEnv$.$anonfun$create$9(SparkEnv.scala:348)
at org.apache.spark.SparkEnv$.registerOrLookupEndpoint$1(SparkEnv.scala:287)
at org.apache.spark.SparkEnv$.create(SparkEnv.scala:336)
at org.apache.spark.SparkEnv$.createDriverEnv(SparkEnv.scala:191)
at org.apache.spark.SparkContext.createSparkEnv(SparkContext.scala:277)
at org.apache.spark.SparkContext.(SparkContext.scala:460)
at org.apache.spark.api.java.JavaSparkContext.(JavaSparkContext.scala:58)
at java.base/jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at java.base/jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:77)
at java.base/jdk.internal.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.base/java.lang.reflect.Constructor.newInstanceWithCaller(Constructor.java:499)
at java.base/java.lang.reflect.Constructor.newInstance(Constructor.java:480)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:247)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:238)
at py4j.commands.ConstructorCommand.invokeConstructor(ConstructorCommand.java:80)
at py4j.commands.ConstructorCommand.execute(ConstructorCommand.java:69)
at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
at py4j.ClientServerConnection.run(ClientServerConnection.java:106)
at java.base/java.lang.Thread.run(Thread.java:833)

Also habe ich getan, was Sie gesagt haben, die Umgebung überprüft und so weiter.
Ich habe sogar Findsprak
Eingabe
verwendet

Code: Select all

import findspark

findspark.init()
findspark.find()

Ausgabe

Code: Select all

'C:\\spark-3.2.1-bin-hadoop3.2'

damit es korrekt installiert ist und problemlos importiert werden kann.
NOCH!!!!!!!!!!!!! Mein Fehler bleibt bestehen

Code: Select all

sc = SparkContext(conf=conf)

Warum verursacht diese einzelne Zeile den Fehler
..............
Also habe ich einen anderen Code ausprobiert

Code: Select all

import pyspark

from pyspark.sql import SparkSession

spark = SparkSession.builder.getOrCreate()

df = spark.sql("select 'spark' as hello ")

df.show()

und jetzt erscheint ein anderer Fehler

Code: Select all

iRuntimeError                              Traceback (most recent call last)
Input In [6], in ()
1 import pyspark
3 from pyspark.sql import SparkSession
----> 5 spark = SparkSession.builder.getOrCreate()
7 df = spark.sql("select 'spark' as hello ")
9 df.show()

File C:\spark-3.2.1-bin-hadoop3.2\python\pyspark\sql\session.py:228, in SparkSession.Builder.getOrCreate(self)
226         sparkConf.set(key, value)
227     # This SparkContext may be an existing one.
--> 228     sc = SparkContext.getOrCreate(sparkConf)
229 # Do not update `SparkConf` for existing `SparkContext`, as it's shared
230 # by all sessions.
231 session = SparkSession(sc)

File C:\spark-3.2.1-bin-hadoop3.2\python\pyspark\context.py:392, in SparkContext.getOrCreate(cls, conf)
390 with SparkContext._lock:
391     if SparkContext._active_spark_context is None:
--> 392         SparkContext(conf=conf or SparkConf())
393     return SparkContext._active_spark_context

File C:\spark-3.2.1-bin-hadoop3.2\python\pyspark\context.py:144, in SparkContext.__init__(self, master, appName, sparkHome, pyFiles, environment, batchSize, serializer, conf, gateway, jsc, profiler_cls)
139 if gateway is not None and gateway.gateway_parameters.auth_token is None:
140     raise ValueError(
141         "You are trying to pass an insecure Py4j gateway to Spark.  This"
142         " is not allowed as it is a security risk.")
--> 144 SparkContext._ensure_initialized(self, gateway=gateway, conf=conf)
145 try:
146     self._do_init(master, appName, sparkHome, pyFiles, environment, batchSize, serializer,
147                   conf, jsc, profiler_cls)

File C:\spark-3.2.1-bin-hadoop3.2\python\pyspark\context.py:339, in SparkContext._ensure_initialized(cls, instance, gateway, conf)
337 with SparkContext._lock:
338     if not SparkContext._gateway:
--> 339         SparkContext._gateway = gateway or launch_gateway(conf)
340         SparkContext._jvm = SparkContext._gateway.jvm
342     if instance:

File C:\spark-3.2.1-bin-hadoop3.2\python\pyspark\java_gateway.py:108, in launch_gateway(conf, popen_kwargs)
105     time.sleep(0.1)
107 if not os.path.isfile(conn_info_file):
--> 108     raise RuntimeError("Java gateway process exited before sending its port number")
110 with open(conn_info_file, "rb") as info:
111     gateway_port = read_int(info)

RuntimeError: Java gateway process exited before sending its port number

Ich hoffe, es gibt einen Zusammenhang mit den beiden Fehlern
Vielen Dank, dass Sie sich die Zeit genommen haben, dies zu lesen. Ich hoffe, es wird behoben

Fehler beim Definieren von SparkContext in Jupyter Notebooke (Python 3 (ipykernel))

Fehler beim Definieren von SparkContext in Jupyter Notebooke (Python 3 (ipykernel)) ⇐ Python

Quick Reply