Flink 从集合中获取数据,我们直接看代码:
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.util.Collector;
import java.util.Arrays;
/**
* Stream Source From Collection
*/
public class StreamSourceFromCollection {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
//从集合中获取数据
DataStreamSource<String> dataStreamSource = env.fromCollection(Arrays.asList(
"hello flink",
"hello java",
"hello world",
"test",
"source",
"collection"));
SingleOutputStreamOperator<Tuple2<String, Integer>> sum = dataStreamSource.flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
@Override
public void flatMap(String s, Collector<Tuple2<String, Integer>> collector) throws Exception {
String[] words = s.split(" ");
Arrays.stream(words).forEach((String sp) -> collector.collect(new Tuple2<String, Integer>(sp, 1)));
}
}).keyBy(item -> item.f0)
.sum(1);
sum.print();
env.execute();
}
}
输出结果:
2> (collection,1)
5> (hello,1)
3> (java,1)
5> (hello,2)
11> (source,1)
5> (hello,3)
9> (world,1)
9> (test,1)
13> (flink,1)