Exception
java.lang.RuntimeException: java.io.IOException: No FileSystem for scheme: hdfs
at org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:444)
Solution
Give Hadoop hdfs jars
Example
I am on Pivotal Hadoop but others distribution should be same
export SPARK_HOME=/opt/spark/spark-1.5.2-bin-hadoop2.6
cd /opt/spark/spark-1.5.2-bin-hadoop2.6
export HADOOP_CONF_DIR=/etc/hadoop/conf
export HIVE_CONF_DIR=/etc/hive/conf
HIVE_LIB_DIR=/usr/phd/3.0.0.0-249/hive/lib
GUAVA_CLASSPATH=/usr/phd/3.0.0.0-249/hive/lib/guava-11.0.2.jar
hive_metastore_classpath=$HIVE_CONF_DIR:$HIVE_LIB_DIR/*:/usr/phd/3.0.0.0-249/hadoop/*:/usr/phd/3.0.0.0-249/hadoop-mapreduce/*:/usr/phd/3.0.0.0-249/hadoop-yarn/*:/usr/phd/3.0.0.0-249/hadoop-hdfs/*
SPARK_REPL_OPTS="-XX:MaxPermSize=512m" bin/spark-shell \
--master yarn-client \
--packages "com.databricks:spark-csv_2.10:1.2.0" \
--repositories "http...." \
--files ${SPARK_HOME}/conf/hive-site.xml \
--conf spark.executor.memory=5g \
--conf spark.executor.cores=2 \
--conf spark.driver.memory=10g \
--conf spark.driver.maxResultSize=512m \
--conf spark.executor.instances=2 \
--conf "spark.driver.extraJavaOptions=-Dstack.name=phd -Dstack.version=3.0.0.0-249 -XX:+UseG1GC -Xms2g -Xmx10g -XX:InitiatingHeapOccupancyPercent=35 -XX:ParallelGCThreads=5 -XX:ConcGCThreads=3" \
--conf "spark.yarn.am.extraJavaOptions=-Dstack.name=phd -Dstack.version=3.0.0.0-249" \
--conf "spark.executor.extraJavaOptions=-Dstack.name=phd -Dstack.version=3.0.0.0-249 -XX:+UseG1GC -XX:InitiatingHeapOccupancyPercent=35 -XX:ParallelGCThreads=5 -XX:ConcGCThreads=3" \
--conf spark.sql.hive.metastore.version=0.14.0 \
--conf spark.sql.hive.metastore.jars=$hive_metastore_classpath \
--conf spark.driver.extraClassPath=$GUAVA_CLASSPATH
java.lang.RuntimeException: java.io.IOException: No FileSystem for scheme: hdfs
at org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:444)
Solution
Give Hadoop hdfs jars
Example
I am on Pivotal Hadoop but others distribution should be same
export SPARK_HOME=/opt/spark/spark-1.5.2-bin-hadoop2.6
cd /opt/spark/spark-1.5.2-bin-hadoop2.6
export HADOOP_CONF_DIR=/etc/hadoop/conf
export HIVE_CONF_DIR=/etc/hive/conf
HIVE_LIB_DIR=/usr/phd/3.0.0.0-249/hive/lib
GUAVA_CLASSPATH=/usr/phd/3.0.0.0-249/hive/lib/guava-11.0.2.jar
hive_metastore_classpath=$HIVE_CONF_DIR:$HIVE_LIB_DIR/*:/usr/phd/3.0.0.0-249/hadoop/*:/usr/phd/3.0.0.0-249/hadoop-mapreduce/*:/usr/phd/3.0.0.0-249/hadoop-yarn/*:/usr/phd/3.0.0.0-249/hadoop-hdfs/*
SPARK_REPL_OPTS="-XX:MaxPermSize=512m" bin/spark-shell \
--master yarn-client \
--packages "com.databricks:spark-csv_2.10:1.2.0" \
--repositories "http...." \
--files ${SPARK_HOME}/conf/hive-site.xml \
--conf spark.executor.memory=5g \
--conf spark.executor.cores=2 \
--conf spark.driver.memory=10g \
--conf spark.driver.maxResultSize=512m \
--conf spark.executor.instances=2 \
--conf "spark.driver.extraJavaOptions=-Dstack.name=phd -Dstack.version=3.0.0.0-249 -XX:+UseG1GC -Xms2g -Xmx10g -XX:InitiatingHeapOccupancyPercent=35 -XX:ParallelGCThreads=5 -XX:ConcGCThreads=3" \
--conf "spark.yarn.am.extraJavaOptions=-Dstack.name=phd -Dstack.version=3.0.0.0-249" \
--conf "spark.executor.extraJavaOptions=-Dstack.name=phd -Dstack.version=3.0.0.0-249 -XX:+UseG1GC -XX:InitiatingHeapOccupancyPercent=35 -XX:ParallelGCThreads=5 -XX:ConcGCThreads=3" \
--conf spark.sql.hive.metastore.version=0.14.0 \
--conf spark.sql.hive.metastore.jars=$hive_metastore_classpath \
--conf spark.driver.extraClassPath=$GUAVA_CLASSPATH
No comments:
Post a Comment
Please share your views and comments below.
Thank You.