1. install lzo on all nodes
sudo apt-get install liblzo2-dev
2. build hadoop-lzo
git clone git://github.com/kevinweil/hadoop-lzo.git
ant compile-native tar
3. copy jar and libraries into cluster on all nodes
cp build/hadoop-lzo-*/hadoop-lzo-*.jar /usr/lib/hadoop-0.20/lib/
cp build/native/Linux-amd64-64/lib/libgplcompression.* /usr/lib/hadoop-0.20/lib/native/Linux-amd64-64/
4. add to core-site.xml
<property>
<name>io.compression.codecs</name>
<value>org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.BZip2Codec,com.hadoop.compression.lzo.LzoCodec,com.hadoop.compression.lzo.LzopCodec</value>
</property>
<property>
<name>io.compression.codec.lzo.class</name>
<value>com.hadoop.compression.lzo.LzoCodec</value>
</property>
add to mapred-site.xml
<property>
<name>mapred.child.env</name>
<value>JAVA_LIBRARY_PATH=/usr/lib/hadoop-0.20/lib/native</value>
</property>
<property>
<name>mapred.map.output.compression.codec</name>
<value>com.hadoop.compression.lzo.LzoCodec</value>
</property>
5. stop and restart tasktrackers and jobtracker
6. test script
register elephant-bird/core/target/elephant-bird-core-3.0.5-SNAPSHOT.jar
register elephant-bird/pig/target/elephant-bird-pig-3.0.5-SNAPSHOT.jar
register google-collections-1.0-rc1.jar
register json-simple-1.1.1.jar
raw_data = LOAD 'test.json.lzo' USING com.twitter.elephantbird.pig.load.LzoJsonLoader() as (json: map[]);
a = limit raw_data 10;
store a into 'lzo' USING com.twitter.elephantbird.pig.store.LzoJsonStorage();