LZW算法压缩和解压缩

2019年11月6日 147次阅读

两个处理字典的函数，向字典增加内容，从字典检索内容

	static final int ROOT = 255;
	
	private static int indexOfTables(int[][] tables, int[] data) {
		for (int i=0; i<tables.length && tables[i] != null; i++) {
			boolean eq = false;
			if (tables[i].length == data.length) {
				eq = true;
				for (int j=0; j<tables[i].length; j++) {
					if (tables[i][j] != data[j]) {
						eq = false; 
					}
				}
			}
			
			if (eq ) {
				return i;
			}
		}
		return -1;
	}
	
	private static int[] findFromTables(int[][] tables, int position) {
		if (position < 256) {
			return new int[] {position };
		}
		LinkedList<Integer> list = new LinkedList<>();
		list.add(tables[position][0]);
		if (tables[position].length > 1) {
			list.add(tables[position][1]);
		}
		
		boolean loop = true;
		while (loop) {
			loop = false;
			
			for (int i=0; i<list.size(); i++) {
				position = list.get(i);
				
				if (position > 255) {
					list.remove(i);
					for (int j=0; j<tables[position].length; j++) {
						list.add(i+j, tables[position][j]);
					}
					loop = true;
				}
			}
		}
		int[] r = new int[list.size()];
		for (int i=0; i<r.length; i++) {
			r[i] = list.get(i);
		}
		return r;
	}

LZW压缩编码：

	public static int[] lzw(byte[] source) {
		if (source.length == 0){
			return new int[0];
		}
		
		ArrayList<Integer> output = new ArrayList<>();
		
		int[][] tables = new int[2 << (12-1)][];
		for (int i=0; i<256; i++) {
			tables[i] = new int[1];
			tables[i][0] = i;
		}
		int storeindex = 256;
		
		int prefix = source[0];
		
		for (int i=1; i<source.length; i++) {

			int c = source[i] >=0 ? source[i] : source[i] + 255; // byte2int
			int[] entry = new int[] {prefix, c };
			
			int position = indexOfTables(tables, entry);
			if (position == -1) {
				output.add(prefix);
				tables[storeindex++] = entry;
				prefix = c;
			}
			else {
				prefix = position;
			}
		}
		output.add(prefix);
		
		int[] r = new int[output.size()];
		for (int i=0; i<r.length; i++) {
			r[i] = output.get(i);
		}
		
		return r;
	}

LZW解压：

	public static byte[] unlzw(int[] source) {
		if (source.length == 0) {
			return new byte[0];
		}
		if (source.length == 1) {
			return new byte[] { (byte)source[0] };
		}
		
		int[][] tables = new int[2 << (12-1)][];
		for (int i=0; i<256; i++) {
			tables[i] = new int[1];
			tables[i][0] = i;
		}
		int storeindex =  256;
		
		ArrayList<Byte> output = new ArrayList<>();
		int prefix = source[0];
		int prefixPrefix = prefix;
		
		output.add((byte) source[0]);
		
		for (int i=1; i<source.length; i++) {

			int c = source[i];
			int[] data = null;

			if (c >= storeindex) {
				data = findFromTables(tables, prefix);
				int[] t = new int[data.length + 1];
				System.arraycopy(data, 0, t, 0, data.length);
				t[t.length - 1] =  prefixPrefix;
				data = t;
			}
			else {
				 data = findFromTables(tables, c);
			}
			
			prefixPrefix = data[0];
			int[] entry = new int[] {prefix, data[0]};
			tables[storeindex++] = entry;
			
			prefix = c;
				
			for (int j=0; j<data.length; j++) {
				output.add((byte) data[j] );
			}

		}
		
		byte[] r = new byte[output.size()];
		for (int i=0; i<r.length; i++) {
			r[i] = output.get(i);
		}
		return r;
	}

测试程序：

	@org.junit.Test
	public void test() {
	
		org.junit.Assert.assertArrayEquals("eeeee".getBytes(), LZW.unlzw(LZW.lzw("eeeee".getBytes())));
		
		org.junit.Assert.assertArrayEquals("eeeeeeeeeee".getBytes(), LZW.unlzw(LZW.lzw("eeeeeeeeeee".getBytes())));
		
		org.junit.Assert.assertArrayEquals("ababbabbbabbbb".getBytes(), LZW.unlzw(LZW.lzw("ababbabbbabbbb".getBytes())));
		
		org.junit.Assert.assertArrayEquals("aabbccaaabbbcccabcaaaabbbbcccc".getBytes(), LZW.unlzw(LZW.lzw("aabbccaaabbbcccabcaaaabbbbcccc".getBytes())));
		
	}