本文共 3242 字,大约阅读时间需要 10 分钟。
package cn.edu.xmu.dm.mpdemo.ioformat;import java.io.IOException;import java.net.URI;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IOUtils;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.SequenceFile;import org.apache.hadoop.io.SequenceFile.CompressionType;import org.apache.hadoop.io.Text;/** * desc: SequenceFileWriter * SequenceFileWriteDemo
* * @author chenwq (irwenqiang@gmail.com) * @version 1.0 2012/05/19 */public class SequenceFileWriteDemo { private static final String[] DATA = { "One, two, buckle my shoe", "Three, four, shut the door", "Five, six, pick up sticks", "Seven, eight, lay them straight", "Nine, ten, a big fat hen" }; public static void main(String[] args) throws IOException { String uri = args[0]; Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(URI.create(uri), conf); Path path = new Path(uri); IntWritable key = new IntWritable(); Text value = new Text(); SequenceFile.Writer writer = null; try { /** * fs: outputstream * conf: configuration object * key: the key' type * value: the value's type */ writer = SequenceFile.createWriter(fs, conf, path, key.getClass(), value.getClass());// writer = SequenceFile.createWriter(fs, conf, path, key.getClass(),// value.getClass(), CompressionType.BLOCK); for (int i = 0; i < 100; i++) { key.set(100 - i); value.set(DATA[i % DATA.length]); System.out.printf("[%s]\t%s\t%s\n", writer.getLength(), key, value); writer.append(key, value); } } finally { IOUtils.closeStream(writer); } }}
package cn.edu.xmu.dm.mpdemo.ioformat;import java.io.IOException;import java.net.URI;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IOUtils;import org.apache.hadoop.io.SequenceFile;import org.apache.hadoop.io.Writable;import org.apache.hadoop.util.ReflectionUtils;/** * desc: SequenceFileReader * SequenceFileReadDemo
* * @author chenwq (irwenqiang@gmail.com) * @version 1.0 2012/05/19 */public class SequenceFileReadDemo { public static void main(String[] args) throws IOException { String uri = args[0]; Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(URI.create(uri), conf); Path path = new Path(uri); SequenceFile.Reader reader = null; try { reader = new SequenceFile.Reader(fs, path, conf); Writable key = (Writable) ReflectionUtils.newInstance( reader.getKeyClass(), conf); Writable value = (Writable) ReflectionUtils.newInstance( reader.getValueClass(), conf); long position = reader.getPosition(); while (reader.next(key, value)) { String syncSeen = reader.syncSeen() ? "*" : ""; System.out.printf("[%s%s]\t%s\t%s\n", position, syncSeen, key, value); position = reader.getPosition(); // beginning of next record } } finally { IOUtils.closeStream(reader); } }}
使用Block压缩后的大小对比:
root@ubuntu:~# hadoop fs -ls mpdemo/Found 2 items-rw-r--r-- 3 root supergroup 4788 2012-05-19 00:11 /user/root/mpdemo/seqinput-rw-r--r-- 3 root supergroup 484 2012-05-19 00:17 /user/root/mpdemo/seqinputblock
转载地址:http://wtwob.baihongyu.com/