1.使用Spark访问的文件类型有很多种,包括txt,csv,json等。下面依次使用做访问
2.本程序使用maven+intellij+spark+windows编写
访问.txt文件
import org.apache.spark.{SparkConf, SparkContext} object ReadTxt { def main(args:Array[String]): Unit ={ val conf = new SparkConf().setAppName("ReadCSV").setMaster("local")//必须要加上.setAppName(),否则报错 val sc = new SparkContext(conf) val input = sc.textFile("C:\\Users\\enmonster\\Desktop\\information.txt") input.take(10).foreach(println) } }
读取的文件以及执行结果均如下:
name,LittleLawson age,20 gender,male
访问.csv文件
import java.io.StringReader import com.opencsv.CSVReader import org.apache.spark.rdd.RDD import org.apache.spark.{SparkConf, SparkContext} case class Person(name: String, age: Int,gender:String) object ReadCSV { def main(args:Array[String]): Unit ={ val conf = new SparkConf().setAppName("ReadCSV").setMaster("local")// val sc = new SparkContext(conf) val input = sc.textFile("C:\\Users\\enmonster\\Desktop\\information.csv") val result: RDD[Array[String]] = input.map { line => line.split(",",-1) val reader = new CSVReader(new StringReader(line)) reader.readNext() } result.foreach(print)//打印出来的是String对象的hashcode result.foreach(x => x.foreach(print))//String对象遍历出来的值 --lamada表达式 //result.foreach(_.foreach(println(_)))//上式的简写 } }