spark1与hadoop1如何使用,针对这个问题,这篇文章详细介绍了相对应的分析和解答,希望可以帮助更多想解决这个问题的小伙伴找到更简单易行的方法。
环境准备
ubuntu-12.04.1-desktop-i386.iso
jdk-7u7-linux-i586.tar.gz
paohaijiao@ubuntu:~$ sudo -s
[sudo] password for paohaijiao:
root@ubuntu:~# vi /etc/lightdm/lightdm.conf
[SeatDefaults]
user-session=ubuntu
greeter-session=unity-greeter
greeter-show-manual-login=true
allow-guest=false
root@ubuntu:~# sudo passwd root
Enter new UNIX password:
Retype new UNIX password:
passwd: password updated successfully
root@ubuntu:~# reboot -h now
以root用户登录
root@ubuntu:~# mkdir /usr/lib/java
root@ubuntu:~# getconf LONG_BIT
32
root@ubuntu:~# cd /usr/lib/java
root@ubuntu:/usr/lib/java# ls
jdk1.7.0_07 jdk-7u7-linux-i586.tar.gz
export JAVA_HOME=/usr/lib/java/jdk1.7.0_07
export JRE_HOME=${JAVA_HOME}/jre
export CLASS_PATH=.:${JAVA_HOME}/lib:$JRE__HOME}/lib
export PATH=${JAVA_HOME}/bin:$PATH
root@ubuntu:/usr/lib/java# source ~/.bashrc
root@ubuntu:/usr/lib/java# apt-get install ssh
root@ubuntu:~# /etc/init.d/ssh start
Rather than invoking init scripts through /etc/init.d, use the service(8)
utility, e.g. service ssh start
Since the script you are attempting to invoke has been converted to an
Upstart job, you may also use the start(8) utility, e.g. start ssh
root@ubuntu:~# ps -e |grep ssh
2174 ? 00:00:00 ssh-agent
3579 ? 00:00:00 sshd
root@ubuntu:~# ssh-keygen -t rsa -P ""
Generating public/private rsa key pair.
Enter file in which to save the key (/root/.ssh/id_rsa):
Created directory '/root/.ssh'.
Your identification has been saved in /root/.ssh/id_rsa.
Your public key has been saved in /root/.ssh/id_rsa.pub.
The key fingerprint is:
bf:d2:3c:20:7b:b0:6d:4f:7f:9d:98:cb:b7:26:c1:67 root@ubuntu
The key's randomart image is:
+--[ RSA 2048]----+
| |
| |
| |
| |
| S . |
| o .. o E |
| * +o *..|
| o =.+o.+.+.|
| o oo..+=..|
+-----------------+
root@ubuntu:~# cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
root@ubuntu:~# ssh localhost
The authenticity of host 'localhost (127.0.0.1)' can't be established.
ECDSA key fingerprint is 0d:1a:18:04:c8:6b:0b:d7:98:e8:f4:a4:f6:e3:2a:8c.
Are you sure you want to continue connecting (yes/no)? yes
Warning: Permanently added 'localhost' (ECDSA) to the list of known hosts.
Welcome to Ubuntu 12.04.1 LTS (GNU/Linux 3.2.0-29-generic-pae i686)
* Documentation: https://help.ubuntu.com/
The programs included with the Ubuntu system are free software;
the exact distribution terms for each program are described in the
individual files in /usr/share/doc/*/copyright.
Ubuntu comes with ABSOLUTELY NO WARRANTY, to the extent permitted by
root@ubuntu:~# apt-get install rsync
Reading package lists... Done
Building dependency tree
Reading state information... Done
The following packages will be upgraded:
rsync
1 upgraded, 0 newly installed, 0 to remove and 612 not upgraded.
Need to get 299 kB of archives.
After this operation, 5,120 B of additional disk space will be used.
Get:1 http://us.archive.ubuntu.com/ubuntu/ precise-updates/main rsync i386 3.0.9-1ubuntu1.1 [299 kB]
root@ubuntu:~# mkdir /usr/local/hadoop
root@ubuntu:~# cd /root/Downloads/
root@ubuntu:~/Downloads# ls
hadoop-1.2.1-bin.tar.gz
root@ubuntu:~/Downloads# tar -xzf hadoop-1.2.1-bin.tar.gz
root@ubuntu:~/Downloads# ls
hadoop-1.2.1 hadoop-1.2.1-bin.tar.gz
root@ubuntu:~/Downloads# mv hadoop-1.2.1 /usr/local/hadoop
root@ubuntu:~/Downloads# cd /usr/local/hadoop
root@ubuntu:/usr/local/hadoop# ls
hadoop-1.2.1
root@ubuntu:/usr/local/hadoop# cd hadoop-1.2.1
root@ubuntu:/usr/local/hadoop/hadoop-1.2.1# ls
bin hadoop-ant-1.2.1.jar hadoop-tools-1.2.1.jar NOTICE.txt
build.xml hadoop-client-1.2.1.jar ivy README.txt
c++ hadoop-core-1.2.1.jar ivy.xml sbin
CHANGES.txt hadoop-examples-1.2.1.jar lib share
conf hadoop-minicluster-1.2.1.jar libexec src
contrib hadoop-test-1.2.1.jar LICENSE.txt webapps
root@ubuntu:/usr/local/hadoop/hadoop-1.2.1# cd conf/
root@ubuntu:/usr/local/hadoop/hadoop-1.2.1/conf# ls
capacity-scheduler.xml hadoop-policy.xml slaves
configuration.xsl hdfs-site.xml ssl-client.xml.example
core-site.xml log4j.properties ssl-server.xml.example
fair-scheduler.xml mapred-queue-acls.xml taskcontroller.cfg
hadoop-env.sh mapred-site.xml task-log4j.properties
hadoop-metrics2.properties masters
root@ubuntu:/usr/local/hadoop/hadoop-1.2.1/conf# vi hadoop-env.sh
export JAVA_HOME=/usr/lib/java/jdk1.7.0_07
root@ubuntu:/usr/local/hadoop/hadoop-1.2.1/conf# source hadoop-env.sh
root@ubuntu:/usr/local/hadoop/hadoop-1.2.1/bin# vi ~/.bashrc
export PATH=${JAVA_HOME}/bin:/usr/local/hadoop/hadoop-1.2.1/bin:$PATH
root@ubuntu:/usr/local/hadoop/hadoop-1.2.1/bin# source ~/.bashrc
root@ubuntu:/usr/local/hadoop/hadoop-1.2.1# mkdir input
root@ubuntu:/usr/local/hadoop/hadoop-1.2.1# ls
bin hadoop-client-1.2.1.jar ivy sbin
build.xml hadoop-core-1.2.1.jar ivy.xml share
c++ hadoop-examples-1.2.1.jar lib src
CHANGES.txt hadoop-minicluster-1.2.1.jar libexec webapps
conf hadoop-test-1.2.1.jar LICENSE.txt
contrib hadoop-tools-1.2.1.jar NOTICE.txt
hadoop-ant-1.2.1.jar input README.txt
root@ubuntu:/usr/local/hadoop/hadoop-1.2.1# cp conf/* input
root@ubuntu:/usr/local/hadoop/hadoop-1.2.1# ls input
capacity-scheduler.xml hadoop-policy.xml slaves
configuration.xsl hdfs-site.xml ssl-client.xml.example
core-site.xml log4j.properties ssl-server.xml.example
fair-scheduler.xml mapred-queue-acls.xml taskcontroller.cfg
hadoop-env.sh mapred-site.xml task-log4j.properties
hadoop-metrics2.properties masters
root@ubuntu:/usr/local/hadoop/hadoop-1.2.1/conf# vi core-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>fs.default.name</name>
<value>hdfs://localhost:9000</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/usr/local/hadoop/hadoop-1.2.1/tmp</value>
</property>
</configuration>
root@ubuntu:/usr/local/hadoop/hadoop-1.2.1/conf# vi hdfs-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.name.dir</name>
<value>/usr/local/hadoop/hadoop-1.2.1/hdfs/name</value>
</property>
<property>
<name>dfs.data.dir</name>
<value>/usr/local/hadoop/hadoop-1.2.1/hdfs/data</value>
</property>
</configuration>
vi mapred-site.xml
<property>
<name>mapred.job.tracker</name>
<value>localhost:9001</value>
</property>
root@ubuntu:/usr/local/hadoop/hadoop-1.2.1/conf# hadoop namenode -format
root@ubuntu:/usr/local/hadoop/hadoop-1.2.1/bin# ./start-all.sh
root@ubuntu:/usr/local/hadoop/hadoop-1.2.1# hadoop jar hadoop-examples-1.2.1.jar wordcount input output
root@ubuntu:/usr/local/hadoop/hadoop-1.2.1# cat output/*
http://d3kbcqa49mib13.cloudfront.net/spark-1.0.0-bin-hadoop1.tgz
http://www.scala-lang.org/files/archive/scala-2.10.4.tgz[spark1.0.0要求版本scala 2.10.x]
root@ubuntu:~# mkdir/usr/lib/scala
bash: mkdir/usr/lib/scala: No such file or directory
root@ubuntu:~# mkdir /usr/lib/scala
root@ubuntu:~# cd ~/Do
Documents/ Downloads/
root@ubuntu:~# cd ~/Downloads/
root@ubuntu:~/Downloads# ls
hadoop-1.2.1-bin.tar.gz scala-2.10.4.tgz spark-1.0.0-bin-hadoop1.tgz
root@ubuntu:~/Downloads# tar -zxf scala-2.10.4.tgz
root@ubuntu:~/Downloads# ls
hadoop-1.2.1-bin.tar.gz scala-2.10.4.tgz
scala-2.10.4 spark-1.0.0-bin-hadoop1.tgz
root@ubuntu:~/Downloads# mv scala-2.10.4 /usr/lib/scala
root@ubuntu:~/Downloads# cd /usr/lib/scala
root@ubuntu:/usr/lib/scala# ls
scala-2.10.4
root@ubuntu:/usr/lib/scala# vi ~/.bashrc
root@ubuntu:/usr/lib/scala# source ~/.bashrc
s# tar -zxf spark-1.0.0-bin-hadoop1.
export JAVA_HOME=/usr/lib/java/jdk1.7.0_07
export JRE_HOME=${JAVA_HOME}/jre
export SCALA_HOME=/usr/lib/scala/scala-2.10.4
export CLASS_PATH=.:${JAVA_HOME}/lib:$JRE__HOME}/lib
export PATH=${SCALA_HOME}/bin:${JAVA_HOME}/bin:/usr/local/hadoop/hadoop-1.2.1/bin:$PATH
root@ubuntu:/usr/local/hadoop/hadoop-1.2.1# cd /usr/local/hadoop/hadoop-1.2.1
root@ubuntu:/usr/local/hadoop/hadoop-1.2.1# hadoop jar hadoop-examples-1.2.1.jar wordcount input output
root@ubuntu:~/Downloads# cd /root/Downloads
root@ubuntu:~/soft# cd spark-1.0.0-bin-hadoop1/
root@ubuntu:~/soft/spark-1.0.0-bin-hadoop1# ls
bin conf examples LICENSE python RELEASE
CHANGES.txt ec2 lib NOTICE README.md sbin
root@ubuntu:~/soft/spark-1.0.0-bin-hadoop1# cd conf/
root@ubuntu:~/soft/spark-1.0.0-bin-hadoop1/conf# ls
fairscheduler.xml.template slaves
log4j.properties.template spark-defaults.conf.template
metrics.properties.template spark-env.sh.template
root@ubuntu:~/soft/spark-1.0.0-bin-hadoop1/conf# vi spark-env.sh
export JAVA_HOME=/usr/lib/java/jdk1.7.0_07
export SCALA_HOME=/usr/lib/scala/scala-2.10.4
export SPARK_MASTER_IP=192.168.141.138
export SPARK_WORKER_MEMORY=2g
export HADOOP_CONF_DIR=/usr/local/hadoop/hadoop-1.2.1/conf
root@ubuntu:~/soft/spark-1.0.0-bin-hadoop1/conf# vi slaves
# A Spark Worker will be started on each of the machines listed below.
localhost
root@ubuntu:/usr/local/hadoop/hadoop-1.2.1# ./bin/start-all.sh
starting namenode, logging to /usr/local/hadoop/hadoop-1.2.1/libexec/../logs/hadoop-root-namenode-ubuntu.out
root@ubuntu:/usr/local/hadoop/hadoop-1.2.1/bin# jps
3459 Jps
3207 JobTracker
2714 NameNode
3122 SecondaryNameNode
2923 DataNode
3411 TaskTracker
root@ubuntu:~/soft/spark-1.0.0-bin-hadoop1/sbin# cd /root/soft/spark-1.0.0-bin-hadoop1/sbin
root@ubuntu:~/soft/spark-1.0.0-bin-hadoop1/sbin# jps
3918 Jps
3207 JobTracker
3625 Master
3844 Worker
3411 TaskTracker
root@ubuntu:~/soft/spark-1.0.0-bin-hadoop1# jps
3207 JobTracker
3625 Master
4483 SecondaryNameNode
3844 Worker
4734 Jps
3411 TaskTracker
访问http://192.168.141.138:8080/
root@ubuntu:~/soft/spark-1.0.0-bin-hadoop1/bin# ./spark-shell
http://192.168.141.138:4040/environment/
root@ubuntu:~/soft/spark-1.0.0-bin-hadoop1# hadoop dfs -copyFromLocal README.md ./
http://192.168.141.138:50070/dfshealth.jsp
scala> val file=sc.textFile("hdfs://127.0.0.1:9000/user/root/README.md")
scala> val sparks=file.filter(line=>line.contains("Spark")scala> val sparks=file.filter(line=>line.contains("Spark")
关于spark1与hadoop1如何使用问题的解答就分享到这里了,希望以上内容可以对大家有一定的帮助,如果你还有很多疑惑没有解开,可以关注亿速云行业资讯频道了解更多相关知识。
免责声明:本站发布的内容(图片、视频和文字)以原创、转载和分享为主,文章观点不代表本网站立场,如果涉及侵权请联系站长邮箱:is@yisu.com进行举报,并提供相关证据,一经查实,将立刻删除涉嫌侵权内容。