spark_home = os.environ.get('SPARK_HOME', None)
os.environ["SPARK_HOME"] = "C:\spark-2.3.0-bin-hadoop2.7"
import pyspark
from pyspark import SparkContext, SparkConf
from pyspark.sql import SparkSession
conf = SparkConf()
sc = SparkContext(conf=conf)
spark = SparkSession.builder.config(conf=conf).getOrCreate()
import pandas as pd
ip = spark.read.format("csv").option("inferSchema","true").option("header","true").load(r"some other file.csv")
kw = pd.read_csv(r"some file.csv",encoding='ISO-8859-1',index_col=False,error_bad_lines=False)
for i in range(len(kw)):
rx = '(?i)'+kw.Keywords[i]
ip = ip.where(~ip['Content'].rlike(rx))
op = ip.toPandas()
op.to_csv(r'something.csv',encoding='utf-8')
< /code>
Pycharm wirft mir jedoch diesen Fehler: < /p>
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
2018-06-08 11:31:52 WARN Utils:66 - Truncated the string representation of a plan since it was too large. This behavior can be adjusted by setting 'spark.debug.maxToStringFields' in SparkEnv.conf.
Traceback (most recent call last):
File "C:/Users/mainak.paul/PycharmProjects/Concept_Building_SIP/ThemeSparkUncoveredGames.py", line 17, in
op = ip.toPandas()
File "C:\Python27\lib\site-packages\pyspark\sql\dataframe.py", line 1966, in toPandas
pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
File "C:\Python27\lib\site-packages\pyspark\sql\dataframe.py", line 466, in collect
port = self._jdf.collectToPython()
File "C:\Python27\lib\site-packages\py4j\java_gateway.py", line 1160, in __call__
answer, self.gateway_client, self.target_id, self.name)
File "C:\Python27\lib\site-packages\pyspark\sql\utils.py", line 63, in deco
return f(*a, **kw)
File "C:\Python27\lib\site-packages\py4j\protocol.py", line 320, in get_return_value
format(target_id, ".", name), value)
py4j.protocol.Py4JJavaError: An error occurred while calling o30.collectToPython.
: java.lang.IllegalArgumentException
< /code>
Ich bekomme einfach nicht, warum .topandas () < /code> nicht funktioniert. Spark Version ist 2.3. Hat sich in dieser Version etwas geändert, von dem ich nicht weiß? Ich habe diesen Code mit Spark 2.2 in einer anderen Maschine ausgeführt und er lief gut.op = ip.where(ip['Content'].rlike(rx)).toPandas()
< /code>
Erhält immer noch den gleichen Fehler. Was mache ich falsch? Gibt es eine andere Möglichkeit zum Exportieren von pyspark.sql.dataframe.dataframe < /code> nach a .csv < /code> ohne die Leistung zu beeinträchtigen?ip.write.csv('file.csv')
< /code>
Jetzt erhalte ich den folgenden Fehler: < /p>
Traceback (most recent call last):
File "somefile.csv", line 21, in
ip.write.csv('somefile.csv')
File "C:\Python27\lib\site-packages\pyspark\sql\readwriter.py", line 883, in csv
self._jwrite.csv(path)
File "C:\Python27\lib\site-packages\py4j\java_gateway.py", line 1160, in __call__
answer, self.gateway_client, self.target_id, self.name)
File "C:\Python27\lib\site-packages\pyspark\sql\utils.py", line 63, in deco
return f(*a, **kw)
File "C:\Python27\lib\site-packages\py4j\protocol.py", line 320, in get_return_value
format(target_id, ".", name), value)
py4j.protocol.Py4JJavaError: An error occurred while calling o102.csv.
< /code>
Hinzufügen der Stacktrace: < /p>
Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
18/06/11 16:53:14 ERROR Shell: Failed to locate the winutils binary in the hadoop binary path
java.io.IOException: Could not locate executable C:\spark-2.3.0-bin-hadoop2.7\bin\bin\winutils.exe in the Hadoop binaries.
at org.apache.hadoop.util.Shell.getQualifiedBinPath(Shell.java:379)
at org.apache.hadoop.util.Shell.getWinUtilsPath(Shell.java:394)
at org.apache.hadoop.util.Shell.(Shell.java:387)
at org.apache.hadoop.util.StringUtils.(StringUtils.java:80)
at org.apache.hadoop.security.SecurityUtil.getAuthenticationMethod(SecurityUtil.java:611)
at org.apache.hadoop.security.UserGroupInformation.initialize(UserGroupInformation.java:273)
at org.apache.hadoop.security.UserGroupInformation.ensureInitialized(UserGroupInformation.java:261)
at org.apache.hadoop.security.UserGroupInformation.loginUserFromSubject(UserGroupInformation.java:791)
at org.apache.hadoop.security.UserGroupInformation.getLoginUser(UserGroupInformation.java:761)
at org.apache.hadoop.security.UserGroupInformation.getCurrentUser(UserGroupInformation.java:634)
at org.apache.spark.util.Utils$$anonfun$getCurrentUserName$1.apply(Utils.scala:2430)
at org.apache.spark.util.Utils$$anonfun$getCurrentUserName$1.apply(Utils.scala:2430)
at scala.Option.getOrElse(Option.scala:121)
at org.apache.spark.util.Utils$.getCurrentUserName(Utils.scala:2430)
at org.apache.spark.SparkContext.(SparkContext.scala:295)
at org.apache.spark.api.java.JavaSparkContext.(JavaSparkContext.scala:58)
at java.base/jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at java.base/jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at java.base/jdk.internal.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.base/java.lang.reflect.Constructor.newInstance(Constructor.java:488)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:247)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:236)
at py4j.commands.ConstructorCommand.invokeConstructor(ConstructorCommand.java:80)
at py4j.commands.ConstructorCommand.execute(ConstructorCommand.java:69)
at py4j.GatewayConnection.run(GatewayConnection.java:214)
at java.base/java.lang.Thread.run(Thread.java:844)
WARNING: An illegal reflective access operation has occurred
WARNING: Illegal reflective access by org.apache.hadoop.security.authentication.util.KerberosUtil (file:/C:/opt/spark/spark-2.2.0-bin-hadoop2.7/jars/hadoop-auth-2.7.3.jar) to method sun.security.krb5.Config.getInstance()
WARNING: Please consider reporting this to the maintainers of org.apache.hadoop.security.authentication.util.KerberosUtil
WARNING: Use --illegal-access=warn to enable warnings of further illegal reflective access operations
WARNING: All illegal access operations will be denied in a future release
18/06/11 16:53:14 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Traceback (most recent call last):
File "C:/Users/mainak.paul/PycharmProjects/Concept_Building_SIP/ThemeSparkUncoveredGames.py", line 22, in
op = ip.toPandas().collect()
File "C:\Python27\lib\site-packages\pyspark\sql\dataframe.py", line 1937, in toPandas
if self.sql_ctx.getConf("spark.sql.execution.pandas.respectSessionTimeZone").lower() \
File "C:\Python27\lib\site-packages\pyspark\sql\context.py", line 142, in getConf
return self.sparkSession.conf.get(key, defaultValue)
File "C:\Python27\lib\site-packages\pyspark\sql\conf.py", line 46, in get
return self._jconf.get(key)
File "C:\Python27\lib\site-packages\py4j\java_gateway.py", line 1160, in __call__
answer, self.gateway_client, self.target_id, self.name)
File "C:\Python27\lib\site-packages\pyspark\sql\utils.py", line 63, in deco
return f(*a, **kw)
File "C:\Python27\lib\site-packages\py4j\protocol.py", line 320, in get_return_value
format(target_id, ".", name), value)
py4j.protocol.Py4JJavaError: An error occurred while calling o86.get.
: java.util.NoSuchElementException: spark.sql.execution.pandas.respectSessionTimeZone
at org.apache.spark.sql.internal.SQLConf$$anonfun$getConfString$2.apply(SQLConf.scala:1089)
at org.apache.spark.sql.internal.SQLConf$$anonfun$getConfString$2.apply(SQLConf.scala:1089)
at scala.Option.getOrElse(Option.scala:121)
at org.apache.spark.sql.internal.SQLConf.getConfString(SQLConf.scala:1089)
at org.apache.spark.sql.RuntimeConfig.get(RuntimeConfig.scala:74)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.base/java.lang.reflect.Method.invoke(Method.java:564)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:280)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:214)
at java.base/java.lang.Thread.run(Thread.java:844)
Process finished with exit code 1
Ich führe eine riesige Textdatei mit Pycharm und Pyspark aus. < /p>
Das versuche ich zu tun:[code]spark_home = os.environ.get('SPARK_HOME', None) os.environ["SPARK_HOME"] = "C:\spark-2.3.0-bin-hadoop2.7" import pyspark from pyspark import SparkContext, SparkConf from pyspark.sql import SparkSession conf = SparkConf() sc = SparkContext(conf=conf) spark = SparkSession.builder.config(conf=conf).getOrCreate() import pandas as pd ip = spark.read.format("csv").option("inferSchema","true").option("header","true").load(r"some other file.csv") kw = pd.read_csv(r"some file.csv",encoding='ISO-8859-1',index_col=False,error_bad_lines=False) for i in range(len(kw)): rx = '(?i)'+kw.Keywords[i] ip = ip.where(~ip['Content'].rlike(rx)) op = ip.toPandas() op.to_csv(r'something.csv',encoding='utf-8') < /code>
Pycharm wirft mir jedoch diesen Fehler: < /p>
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel). 2018-06-08 11:31:52 WARN Utils:66 - Truncated the string representation of a plan since it was too large. This behavior can be adjusted by setting 'spark.debug.maxToStringFields' in SparkEnv.conf. Traceback (most recent call last): File "C:/Users/mainak.paul/PycharmProjects/Concept_Building_SIP/ThemeSparkUncoveredGames.py", line 17, in op = ip.toPandas() File "C:\Python27\lib\site-packages\pyspark\sql\dataframe.py", line 1966, in toPandas pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns) File "C:\Python27\lib\site-packages\pyspark\sql\dataframe.py", line 466, in collect port = self._jdf.collectToPython() File "C:\Python27\lib\site-packages\py4j\java_gateway.py", line 1160, in __call__ answer, self.gateway_client, self.target_id, self.name) File "C:\Python27\lib\site-packages\pyspark\sql\utils.py", line 63, in deco return f(*a, **kw) File "C:\Python27\lib\site-packages\py4j\protocol.py", line 320, in get_return_value format(target_id, ".", name), value) py4j.protocol.Py4JJavaError: An error occurred while calling o30.collectToPython. : java.lang.IllegalArgumentException < /code>
Ich bekomme einfach nicht, warum .topandas () < /code> nicht funktioniert. Spark Version ist 2.3. Hat sich in dieser Version etwas geändert, von dem ich nicht weiß? Ich habe diesen Code mit Spark 2.2 in einer anderen Maschine ausgeführt und er lief gut.op = ip.where(ip['Content'].rlike(rx)).toPandas() < /code>
Erhält immer noch den gleichen Fehler. Was mache ich falsch? Gibt es eine andere Möglichkeit zum Exportieren von pyspark.sql.dataframe.dataframe < /code> nach a .csv < /code> ohne die Leistung zu beeinträchtigen?ip.write.csv('file.csv') < /code>
Jetzt erhalte ich den folgenden Fehler: < /p>
Traceback (most recent call last): File "somefile.csv", line 21, in ip.write.csv('somefile.csv') File "C:\Python27\lib\site-packages\pyspark\sql\readwriter.py", line 883, in csv self._jwrite.csv(path) File "C:\Python27\lib\site-packages\py4j\java_gateway.py", line 1160, in __call__ answer, self.gateway_client, self.target_id, self.name) File "C:\Python27\lib\site-packages\pyspark\sql\utils.py", line 63, in deco return f(*a, **kw) File "C:\Python27\lib\site-packages\py4j\protocol.py", line 320, in get_return_value format(target_id, ".", name), value) py4j.protocol.Py4JJavaError: An error occurred while calling o102.csv. < /code>
Hinzufügen der Stacktrace: < /p>
Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties Setting default log level to "WARN". To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel). 18/06/11 16:53:14 ERROR Shell: Failed to locate the winutils binary in the hadoop binary path java.io.IOException: Could not locate executable C:\spark-2.3.0-bin-hadoop2.7\bin\bin\winutils.exe in the Hadoop binaries. at org.apache.hadoop.util.Shell.getQualifiedBinPath(Shell.java:379) at org.apache.hadoop.util.Shell.getWinUtilsPath(Shell.java:394) at org.apache.hadoop.util.Shell.(Shell.java:387) at org.apache.hadoop.util.StringUtils.(StringUtils.java:80) at org.apache.hadoop.security.SecurityUtil.getAuthenticationMethod(SecurityUtil.java:611) at org.apache.hadoop.security.UserGroupInformation.initialize(UserGroupInformation.java:273) at org.apache.hadoop.security.UserGroupInformation.ensureInitialized(UserGroupInformation.java:261) at org.apache.hadoop.security.UserGroupInformation.loginUserFromSubject(UserGroupInformation.java:791) at org.apache.hadoop.security.UserGroupInformation.getLoginUser(UserGroupInformation.java:761) at org.apache.hadoop.security.UserGroupInformation.getCurrentUser(UserGroupInformation.java:634) at org.apache.spark.util.Utils$$anonfun$getCurrentUserName$1.apply(Utils.scala:2430) at org.apache.spark.util.Utils$$anonfun$getCurrentUserName$1.apply(Utils.scala:2430) at scala.Option.getOrElse(Option.scala:121) at org.apache.spark.util.Utils$.getCurrentUserName(Utils.scala:2430) at org.apache.spark.SparkContext.(SparkContext.scala:295) at org.apache.spark.api.java.JavaSparkContext.(JavaSparkContext.scala:58) at java.base/jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method) at java.base/jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62) at java.base/jdk.internal.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45) at java.base/java.lang.reflect.Constructor.newInstance(Constructor.java:488) at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:247) at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357) at py4j.Gateway.invoke(Gateway.java:236) at py4j.commands.ConstructorCommand.invokeConstructor(ConstructorCommand.java:80) at py4j.commands.ConstructorCommand.execute(ConstructorCommand.java:69) at py4j.GatewayConnection.run(GatewayConnection.java:214) at java.base/java.lang.Thread.run(Thread.java:844) WARNING: An illegal reflective access operation has occurred WARNING: Illegal reflective access by org.apache.hadoop.security.authentication.util.KerberosUtil (file:/C:/opt/spark/spark-2.2.0-bin-hadoop2.7/jars/hadoop-auth-2.7.3.jar) to method sun.security.krb5.Config.getInstance() WARNING: Please consider reporting this to the maintainers of org.apache.hadoop.security.authentication.util.KerberosUtil WARNING: Use --illegal-access=warn to enable warnings of further illegal reflective access operations WARNING: All illegal access operations will be denied in a future release 18/06/11 16:53:14 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable Traceback (most recent call last): File "C:/Users/mainak.paul/PycharmProjects/Concept_Building_SIP/ThemeSparkUncoveredGames.py", line 22, in op = ip.toPandas().collect() File "C:\Python27\lib\site-packages\pyspark\sql\dataframe.py", line 1937, in toPandas if self.sql_ctx.getConf("spark.sql.execution.pandas.respectSessionTimeZone").lower() \ File "C:\Python27\lib\site-packages\pyspark\sql\context.py", line 142, in getConf return self.sparkSession.conf.get(key, defaultValue) File "C:\Python27\lib\site-packages\pyspark\sql\conf.py", line 46, in get return self._jconf.get(key) File "C:\Python27\lib\site-packages\py4j\java_gateway.py", line 1160, in __call__ answer, self.gateway_client, self.target_id, self.name) File "C:\Python27\lib\site-packages\pyspark\sql\utils.py", line 63, in deco return f(*a, **kw) File "C:\Python27\lib\site-packages\py4j\protocol.py", line 320, in get_return_value format(target_id, ".", name), value) py4j.protocol.Py4JJavaError: An error occurred while calling o86.get. : java.util.NoSuchElementException: spark.sql.execution.pandas.respectSessionTimeZone at org.apache.spark.sql.internal.SQLConf$$anonfun$getConfString$2.apply(SQLConf.scala:1089) at org.apache.spark.sql.internal.SQLConf$$anonfun$getConfString$2.apply(SQLConf.scala:1089) at scala.Option.getOrElse(Option.scala:121) at org.apache.spark.sql.internal.SQLConf.getConfString(SQLConf.scala:1089) at org.apache.spark.sql.RuntimeConfig.get(RuntimeConfig.scala:74) at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.base/java.lang.reflect.Method.invoke(Method.java:564) at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244) at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357) at py4j.Gateway.invoke(Gateway.java:280) at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132) at py4j.commands.CallCommand.execute(CallCommand.java:79) at py4j.GatewayConnection.run(GatewayConnection.java:214) at java.base/java.lang.Thread.run(Thread.java:844)
Ich kann die Carousel-Indicator-Klasse nicht auf dem Server finden, den ich aus den Entwickler-Tools gefunden habe. Sie finden den Screenshot unten. Ich habe Änderungen an den Entwickler -Tools...
Wenn ich Schematool -DBTYPE MySQL -initschema Befehl in der Eingabeaufforderung ausführe, wird der folgende Fehler ausgegeben -
Error: A JNI error has occurred, please check your installation and...
Ich versuche derzeit, den Apache -Bohrer mit S3 (Minio) -Pray -Plugin -Support einzurichten, aber Drill stürzt während des Start- oder Plugin -Gebrauchs mit dem folgenden Fehler ab:
Exception in...
Beim Versuch, von meinem Python-Skript aus eine Verbindung zu einer Hadoop HDFS-Instanz herzustellen, die in einem Docker-Container ausgeführt wird, tritt ein ConnectTimeout-Fehler auf. Das Skript...