Senin, 12 Oktober 2015
Compressi ShannonFano Source Code Using Java
package lp2maray.com;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.PrintWriter;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.BitSet;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Vector;
public class ShannonFano{
public static void main(String[] args){
InputStream is, isDecompress;
OutputStream os, osDecompress;
String filename = "file3MB.txt";
Path currentRelativePath = Paths.get("");
String s = currentRelativePath.toAbsolutePath().toString();
System.out.println("Current relative path is: " + s);
String fileCom = s+"/doc/file3MB.txt";
String fileDec = s+"/doc/dec.zsf";
//file3MB.txt.zsf
new ShannonFanoCompress(fileCom,null,true); //compress
}
public ShannonFanoCompress(String filename,String destination,boolean verbose){
if(verbose)
System.out.println("compression started, source : "+filename);
HashMap<String,Integer> occurenceTable=new HashMap<String,Integer>();
int total=0;
try{
if(verbose)
System.out.println("counting occurences...");
BufferedReader in=new BufferedReader(new FileReader(filename));
String s=null,c=null;
char tab[]=new char[1];
int i,lineCount=0;
while((s=in.readLine())!=null){
for(i=0;i<s.length();i++)
{tab[0]=s.charAt(i);
c=new String(tab);
if(occurenceTable.containsKey(c))
occurenceTable.put(c,new Integer(occurenceTable.get(c).intValue()+1));
else
occurenceTable.put(c,new Integer(1));
}
total+=s.length()+1;
//count each newline too
lineCount++;
}//while
in.close();
//put ("newline",...) in occurenceTable
occurenceTable.put("newline",new Integer(lineCount));
if(verbose){
System.out.println(lineCount+" line(s) in the file : "+filename);
System.out.println("occurence table");
for(Map.Entry<String,Integer> m:occurenceTable.entrySet())
System.out.println(m.getKey()+" "+m.getValue());
}
}
catch(IOException ioe){ioe.printStackTrace();
return;
}
if(verbose)
System.out.println("computing frequency table...");
LinkedHashMap<String,Float> tmpfrequencyTable=new LinkedHashMap<String,Float>();
for(String tmp:occurenceTable.keySet())
tmpfrequencyTable.put(tmp,new Float(((float)occurenceTable.get(tmp).intValue())/total));
Vector<Float> l=new Vector<Float>();
l.addAll(tmpfrequencyTable.values());
Collections.sort(l,Collections.reverseOrder());
LinkedHashMap<String,Float> frequencyTable=new LinkedHashMap<String,Float>();
for(Float value:l)
for(String key:tmpfrequencyTable.keySet())
if(tmpfrequencyTable.get(key).equals(value))
{frequencyTable.put(key,value);
tmpfrequencyTable.remove(key);
break;
}
if(verbose)
{System.out.println("frequency table");
for(Map.Entry<String,Float> m:frequencyTable.entrySet())
System.out.println(m.getKey()+" "+m.getValue());
}
if(verbose)
System.out.println("computing code table...");
LinkedHashMap<String,StringBuffer> codeTable=new LinkedHashMap<String,StringBuffer>();
for(String tmp:frequencyTable.keySet())
codeTable.put(tmp,new StringBuffer(""));
updateTables(frequencyTable,codeTable);
if(verbose)
{System.out.println("code table");
for(Map.Entry<String,StringBuffer> m:codeTable.entrySet())
System.out.println(m.getKey()+" "+m.getValue());
}
File f;
if(destination==null)
f=new File(filename+".zsf");
else
f=new File(destination);
if(verbose)
System.out.println("destination file : "+f.getName());
try {
if(verbose)
System.out.println("computing bit set from source file by using code table (main compression step)...");
BufferedReader in=new BufferedReader(new FileReader(filename));
String s=null,c=null;
char tab[]=new char[1];
int i=0,j=0,k=0;
BitSet content=new BitSet();
String value=null,newlineValue=new String(codeTable.get("newline"));
while((s=in.readLine())!=null)
{for(i=0;i<s.length();i++)
{tab[0]=s.charAt(i);
c=new String(tab);
value=new String(codeTable.get(c));
for(j=0;j<value.length();j++,k++)
if(value.charAt(j)=='1')
content.set(k,true);
}
for(j=0;j<newlineValue.length();j++,k++)
if(newlineValue.charAt(j)=='1')
content.set(k,true);
}
content.set(k,true);
in.close();
if(f.createNewFile() && verbose)
System.out.println("file "+f.getName()+" created");
if(verbose)
System.out.println("writing code table...");
PrintWriter out=new PrintWriter(new BufferedWriter(new FileWriter(f)));
out.println(codeTable.size());
for(Map.Entry<String,StringBuffer> m:codeTable.entrySet())
out.println(m.getKey()+" "+m.getValue());
byte[] byteContent=convertBitSetInBytes(content);
if(verbose)
System.out.println("writing size of compressed content : "+byteContent.length+" bytes");
out.println(byteContent.length);
out.close();
if(verbose)
System.out.println("writing compressed content...");
BufferedOutputStream out2=new BufferedOutputStream(new FileOutputStream(f,true));
out2.write(byteContent);
out2.close();
if(verbose)
System.out.println("compression successful");
}
catch(IOException ioe)
{ioe.printStackTrace();
return;
}
}
private static void updateTables(LinkedHashMap<String,Float> frequencyTablePart,
LinkedHashMap<String,StringBuffer> codeTablePart){
float leftSum=0,rightSum=0;
int limit=0;
for(Float val:frequencyTablePart.values())
rightSum+=val.floatValue();
for(Float x:frequencyTablePart.values())
{leftSum+=x;
rightSum-=x;
limit++;
if(leftSum>=rightSum)
break;
}
int j=0;
for(Map.Entry<String,StringBuffer> m:codeTablePart.entrySet())
{if(j<limit)
codeTablePart.put(m.getKey(),m.getValue().append("0"));
else
codeTablePart.put(m.getKey(),m.getValue().append("1"));
j++;
}
LinkedHashMap<String,Float> nextFrequencyTablePart;
LinkedHashMap<String,StringBuffer> nextCodeTablePart;
Iterator<String> frequencyKeySetIterator;
Iterator<Float> frequencyValuesIterator;
Iterator<String> codeKeySetIterator;
Iterator<StringBuffer> codeValuesIterator;
if(limit>1)
{frequencyKeySetIterator=frequencyTablePart.keySet().iterator();
frequencyValuesIterator=frequencyTablePart.values().iterator();
codeKeySetIterator=codeTablePart.keySet().iterator();
codeValuesIterator=codeTablePart.values().iterator();
nextFrequencyTablePart=new LinkedHashMap<String,Float>();
for(int i=0;i<limit;i++)
nextFrequencyTablePart.put(frequencyKeySetIterator.next(),frequencyValuesIterator.next());
nextCodeTablePart=new LinkedHashMap<String,StringBuffer>();
for(int i=0;i<limit;i++)
nextCodeTablePart.put(codeKeySetIterator.next(),codeValuesIterator.next());
updateTables(nextFrequencyTablePart,nextCodeTablePart);
}
if(limit<codeTablePart.size()-1)
{frequencyKeySetIterator=frequencyTablePart.keySet().iterator();
frequencyValuesIterator=frequencyTablePart.values().iterator();
codeKeySetIterator=codeTablePart.keySet().iterator();
codeValuesIterator=codeTablePart.values().iterator();
for(int i=0;i<limit;i++)
{frequencyKeySetIterator.next();
frequencyValuesIterator.next();
codeKeySetIterator.next();
codeValuesIterator.next();
}
nextFrequencyTablePart=new LinkedHashMap<String,Float>();
while(frequencyKeySetIterator.hasNext())
nextFrequencyTablePart.put(frequencyKeySetIterator.next(),frequencyValuesIterator.next());
nextCodeTablePart=new LinkedHashMap<String,StringBuffer>();
while(codeKeySetIterator.hasNext())
nextCodeTablePart.put(codeKeySetIterator.next(),codeValuesIterator.next());
updateTables(nextFrequencyTablePart,nextCodeTablePart);
}
}
public static byte[] convertBitSetInBytes(BitSet bitSet){
byte[] byteContent=new byte[(int)Math.ceil(bitSet.length()/8.0f)];
int i;
for(i=0;i<byteContent.length;i++)
byteContent[i]=(byte)0x0000;
for(i=0;i<bitSet.size();i++)
if(bitSet.get(i))
byteContent[i/8]|=1<<(7-(i%8));
return(byteContent);
}
public static BitSet convertBytesInBitSet(byte[] array){
BitSet bitset=new BitSet();
int j;
byte[] mask={(byte)0x01,(byte)0x02,(byte)0x04,(byte)0x08,(byte)0x10,(byte)0x20,(byte)0x40,(byte)0x80};
for(int i=0;i<array.length;i++)
for(j=0;j<8;j++)
if((array[i] & mask[j])!=0)
bitset.set(i*8+(7-j),true);
return(bitset);
}
public static String toBitString(BitSet bitset,int length){
String s=new String("");
for(int i=0;i<length;i++)
if(bitset.get(i))
s+="1";
else
s+="0";
return(s);
}
}
HasilCompressi:
Langganan:
Posting Komentar (Atom)
Tidak ada komentar:
Posting Komentar