官网介绍
http://spark.apache.org/docs/2.3.0/streaming-kafka-0-10-integration.html#creating-a-direct-stream
案例pom.xml依赖
<dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-streaming_2.11</artifactId> <version>2.3.0</version> <!-- <scope>provided</scope> --> </dependency> <dependency> <groupId>org.apache.spark</groupId> <artifactId>spark-streaming-kafka-0-10_2.11</artifactId> <version>2.3.0</version>
package SpartStreamingaiqiyi import org.apache.spark._ import org.apache.spark.streaming._ import org.apache.kafka.clients.consumer.ConsumerRecord import org.apache.kafka.common.serialization.StringDeserializer import org.apache.spark.sql.SparkSession import org.apache.spark.streaming.kafka010._ import org.apache.spark.streaming.kafka010.LocationStrategies.PreferConsistent import org.apache.spark.streaming.kafka010.ConsumerStrategies.Subscribe object test { def main(args: Array[String]): Unit = { val spark = SparkSession.builder() .appName("aiqiyi") .master("local[*]") .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer") .getOrCreate() val sc = spark.sparkContext val checkpointDir = "F:\\IdeaWorkspace\\aiqiyi\\ck" val ssc: StreamingContext = new StreamingContext(sc, Seconds(5)) ssc.checkpoint(checkpointDir) val topics = Array("aiqiyi") // Create a local StreamingContext with two working thread and batch interval of 1 second. // The master requires 2 cores to prevent a starvation scenario. val kafkaParams = Map[String, Object]( "bootstrap.servers" -> "dip005:9092,dip006:9092,dip007:9092", "key.deserializer" -> classOf[StringDeserializer], "value.deserializer" -> classOf[StringDeserializer], "group.id" -> "use_a_separate_group_id_for_each_stream", "auto.offset.reset" -> "latest", "enable.auto.commit" -> (false: java.lang.Boolean) ) val stream = KafkaUtils.createDirectStream[String, String]( ssc, PreferConsistent, Subscribe[String, String](topics, kafkaParams) ) val resultDStream = stream.map(x=>x.value()) resultDStream.print() ssc.start() ssc.awaitTermination() } }