HelloSpark
Pom
配置spark-core的依赖以及scala的打包依赖
<dependencies>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
<version>2.1.1</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>net.alchim31.maven</groupId>
<artifactId>scala-maven-plugin</artifactId>
<version>3.4.6</version>
<executions>
<execution>
<goals>
<goal>compile</goal>
<goal>testCompile</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
大数据的hello wordcount
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
object WordCount {
def main(args: Array[String]): Unit = {
// 配置conf,如果不上传到虚拟机上需要填写setMastert
// 如果上传到虚拟机通过spark-submit运行则不能填写setMaster
val sparkConf = new SparkConf()
sparkConf.setAppName("WordCount")
sparkConf.setMaster("local[2]")
// 获取上下文对象
val sc = new SparkContext(sparkConf)
// wordCount的主函数
val tuples: Array[(String, Int)] = sc.textFile("c://1.txt").flatMap(_.split(" "))
.map((_, 1))
.reduceByKey(_ + _).collect()
tuples.foreach(println)
}
}