Bei Verwendung des Jupyter-Notizbuchs von Anaconda verwende ich Pyspark zum ersten Mal.
Was ich versuche, ist die Spark-Umgebung zu definieren, um eine CSV-Datei auf der lokalen Festplatte zu lesen.
Bearbeiten: Den Text hinzugefügt, da user2314737 nach Text anstelle eines Bilds gefragt hat, und jetzt möchte StackOverflow, dass ich weitere Details hinzufüge, wie es mein Textkörper ist Meistens Code XD, also muss ich in diesem Teil mit euch allen chatten. Ihr könnt diesen Textkörper ignorieren, aber ich muss schreiben, bis ich auf „Änderungen speichern“ klicken kann.
Eingabe:
Code: Select all
from pyspark import SparkContext, SparkConf
conf = SparkConf().setAppName("PrdectiveModel")
sc = SparkContext(conf=conf) ----> the error is in this line
Code: Select all
Py4JJavaError Traceback (most recent call last)
Input In [13], in ()
1 from pyspark import SparkContext, SparkConf
2 conf = SparkConf().setAppName("PrdectiveModel")
----> 3 sc = SparkContext(conf=conf)
File E:\anaconda3\lib\site-packages\pyspark\context.py:146, in SparkContext.__init__(self, master, appName, sparkHome, pyFiles, environment, batchSize, serializer, conf, gateway, jsc, profiler_cls)
144 SparkContext._ensure_initialized(self, gateway=gateway, conf=conf)
145 try:
--> 146 self._do_init(master, appName, sparkHome, pyFiles, environment, batchSize, serializer,
147 conf, jsc, profiler_cls)
148 except:
149 # If an error occurs, clean up in order to allow future SparkContext creation:
150 self.stop()
File E:\anaconda3\lib\site-packages\pyspark\context.py:209, in SparkContext._do_init(self, master, appName, sparkHome, pyFiles, environment, batchSize, serializer, conf, jsc, profiler_cls)
206 self.environment["PYTHONHASHSEED"] = os.environ.get("PYTHONHASHSEED", "0")
208 # Create the Java SparkContext through Py4J
--> 209 self._jsc = jsc or self._initialize_context(self._conf._jconf)
210 # Reset the SparkConf to the one actually used by the SparkContext in JVM.
211 self._conf = SparkConf(_jconf=self._jsc.sc().conf())
File E:\anaconda3\lib\site-packages\pyspark\context.py:329, in SparkContext._initialize_context(self, jconf)
325 def _initialize_context(self, jconf):
326 """
327 Initialize SparkContext in function to allow subclass specific initialization
328 """
--> 329 return self._jvm.JavaSparkContext(jconf)
File E:\anaconda3\lib\site-packages\py4j\java_gateway.py:1585, in JavaClass.__call__(self, *args)
1579 command = proto.CONSTRUCTOR_COMMAND_NAME +\
1580 self._command_header +\
1581 args_command +\
1582 proto.END_COMMAND_PART
1584 answer = self._gateway_client.send_command(command)
-> 1585 return_value = get_return_value(
1586 answer, self._gateway_client, None, self._fqn)
1588 for temp_arg in temp_args:
1589 temp_arg._detach()
File E:\anaconda3\lib\site-packages\py4j\protocol.py:326, in get_return_value(answer, gateway_client, target_id, name)
324 value = OUTPUT_CONVERTER[type](answer[2:], gateway_client)
325 if answer[1] == REFERENCE_TYPE:
--> 326 raise Py4JJavaError(
327 "An error occurred while calling {0}{1}{2}.\n".
328 format(target_id, ".", name), value)
329 else:
330 raise Py4JError(
331 "An error occurred while calling {0}{1}{2}. Trace:\n{3}\n".
332 format(target_id, ".", name, value))
Py4JJavaError: An error occurred while calling None.org.apache.spark.api.java.JavaSparkContext.
: java.lang.NoClassDefFoundError: Could not initialize class org.apache.spark.storage.StorageUtils$
at org.apache.spark.storage.BlockManagerMasterEndpoint.(BlockManagerMasterEndpoint.scala:110)
at org.apache.spark.SparkEnv$.$anonfun$create$9(SparkEnv.scala:348)
at org.apache.spark.SparkEnv$.registerOrLookupEndpoint$1(SparkEnv.scala:287)
at org.apache.spark.SparkEnv$.create(SparkEnv.scala:336)
at org.apache.spark.SparkEnv$.createDriverEnv(SparkEnv.scala:191)
at org.apache.spark.SparkContext.createSparkEnv(SparkContext.scala:277)
at org.apache.spark.SparkContext.(SparkContext.scala:460)
at org.apache.spark.api.java.JavaSparkContext.(JavaSparkContext.scala:58)
at java.base/jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at java.base/jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:77)
at java.base/jdk.internal.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.base/java.lang.reflect.Constructor.newInstanceWithCaller(Constructor.java:499)
at java.base/java.lang.reflect.Constructor.newInstance(Constructor.java:480)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:247)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:238)
at py4j.commands.ConstructorCommand.invokeConstructor(ConstructorCommand.java:80)
at py4j.commands.ConstructorCommand.execute(ConstructorCommand.java:69)
at py4j.ClientServerConnection.waitForCommands(ClientServerConnection.java:182)
at py4j.ClientServerConnection.run(ClientServerConnection.java:106)
at java.base/java.lang.Thread.run(Thread.java:833)
Ich habe sogar Findsprak
Eingabe
verwendet
Code: Select all
import findspark
findspark.init()
findspark.find()
Code: Select all
'C:\\spark-3.2.1-bin-hadoop3.2'
NOCH!!!!!!!!!!!!! Mein Fehler bleibt bestehen
Code: Select all
sc = SparkContext(conf=conf)
..............
Also habe ich einen anderen Code ausprobiert
Code: Select all
import pyspark
from pyspark.sql import SparkSession
spark = SparkSession.builder.getOrCreate()
df = spark.sql("select 'spark' as hello ")
df.show()
Code: Select all
iRuntimeError Traceback (most recent call last)
Input In [6], in ()
1 import pyspark
3 from pyspark.sql import SparkSession
----> 5 spark = SparkSession.builder.getOrCreate()
7 df = spark.sql("select 'spark' as hello ")
9 df.show()
File C:\spark-3.2.1-bin-hadoop3.2\python\pyspark\sql\session.py:228, in SparkSession.Builder.getOrCreate(self)
226 sparkConf.set(key, value)
227 # This SparkContext may be an existing one.
--> 228 sc = SparkContext.getOrCreate(sparkConf)
229 # Do not update `SparkConf` for existing `SparkContext`, as it's shared
230 # by all sessions.
231 session = SparkSession(sc)
File C:\spark-3.2.1-bin-hadoop3.2\python\pyspark\context.py:392, in SparkContext.getOrCreate(cls, conf)
390 with SparkContext._lock:
391 if SparkContext._active_spark_context is None:
--> 392 SparkContext(conf=conf or SparkConf())
393 return SparkContext._active_spark_context
File C:\spark-3.2.1-bin-hadoop3.2\python\pyspark\context.py:144, in SparkContext.__init__(self, master, appName, sparkHome, pyFiles, environment, batchSize, serializer, conf, gateway, jsc, profiler_cls)
139 if gateway is not None and gateway.gateway_parameters.auth_token is None:
140 raise ValueError(
141 "You are trying to pass an insecure Py4j gateway to Spark. This"
142 " is not allowed as it is a security risk.")
--> 144 SparkContext._ensure_initialized(self, gateway=gateway, conf=conf)
145 try:
146 self._do_init(master, appName, sparkHome, pyFiles, environment, batchSize, serializer,
147 conf, jsc, profiler_cls)
File C:\spark-3.2.1-bin-hadoop3.2\python\pyspark\context.py:339, in SparkContext._ensure_initialized(cls, instance, gateway, conf)
337 with SparkContext._lock:
338 if not SparkContext._gateway:
--> 339 SparkContext._gateway = gateway or launch_gateway(conf)
340 SparkContext._jvm = SparkContext._gateway.jvm
342 if instance:
File C:\spark-3.2.1-bin-hadoop3.2\python\pyspark\java_gateway.py:108, in launch_gateway(conf, popen_kwargs)
105 time.sleep(0.1)
107 if not os.path.isfile(conn_info_file):
--> 108 raise RuntimeError("Java gateway process exited before sending its port number")
110 with open(conn_info_file, "rb") as info:
111 gateway_port = read_int(info)
RuntimeError: Java gateway process exited before sending its port number
Vielen Dank, dass Sie sich die Zeit genommen haben, dies zu lesen. Ich hoffe, es wird behoben
Mobile version