Flink将计算结果输出到文件,代码如下:
package org.itzhimei.sink;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.util.Collector;
import java.util.Arrays;
public class Sink_1_WriteAsText {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
//这里如果不设置1
//输出到文件就会按照你本机的cpu进行并发输出
//最终输出结果是以文件名创建的文件夹,在多个文件中并发输出结果
env.setParallelism(1);
DataStreamSource<String> dataStreamSource = env.fromCollection(Arrays.asList(
"hello flink",
"hello java",
"hello world",
"test",
"source",
"collection"));
String output = "D:\\______flink______\\output\\writeword.txt";
SingleOutputStreamOperator<Tuple2<String, Integer>> tuple2SingleOutputStreamOperator = dataStreamSource.flatMap(new FlatMapFunction<String, Tuple2<String, Integer>>() {
@Override
public void flatMap(String s, Collector<Tuple2<String, Integer>> out) throws Exception {
String[] words = s.split(" ");
for (String word : words) {
out.collect(new Tuple2<>(word, 1));
}
}
});
tuple2SingleOutputStreamOperator.writeAsText(output);
env.execute();
}
}
/* 输出:
(hello,1)
(flink,1)
(hello,1)
(java,1)
(hello,1)
(world,1)
(test,1)
(source,1)
(collection,1)
*/
需要注意的是,输出到文件,运行环境并发度要设置为1,即env.setParallelism(1),这里如果不设置1,输出到文件就会按照你本机的cpu进行并发输出,最终输出结果是以文件名创建的文件夹,在多个文件中并发输出结果。