Spark如何处理中文字符串
Spark如何处理中文字符串
import org.apache.spark._
import org.apache.spark.streaming._
import org.apache.spark.streaming.StreamingContext._
val conf = new SparkConf().setMaster("local[2]").setAppName("NetworkWordCount")
val ssc = new StreamingContext(conf, Seconds(1))
// ---- lines是第一个DStream
val lines = ssc.socketTextStream("localhost", 9999)
val words = lines.flatMap(_.split(" "))
val pairs = words.map(word => (word, 1))
val wordCounts = pairs.reduceByKey(_ _)
// ---- wordCounts是经过一系列转换后的结果DStream,可以输出了
wordCounts.foreachRDD(rdd => {
// 存在数据库之类
})
ssc.start()
ssc.awaitTermination()