001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, 013 * software distributed under the License is distributed on an 014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 * KIND, either express or implied. See the License for the 016 * specific language governing permissions and limitations 017 * under the License. 018 */ 019package org.apache.commons.compress.compressors.bzip2; 020 021import java.io.IOException; 022import java.io.OutputStream; 023import java.util.Arrays; 024 025import org.apache.commons.compress.compressors.CompressorOutputStream; 026import org.apache.commons.io.IOUtils; 027 028/** 029 * An output stream that compresses into the BZip2 format into another stream. 030 * 031 * <p> 032 * The compression requires large amounts of memory. Thus you should call the {@link #close() close()} method as soon as possible, to force 033 * {@code BZip2CompressorOutputStream} to release the allocated memory. 034 * </p> 035 * 036 * <p> 037 * You can shrink the amount of allocated memory and maybe raise the compression speed by choosing a lower blocksize, which in turn may cause a lower 038 * compression ratio. You can avoid unnecessary memory allocation by avoiding using a blocksize which is bigger than the size of the input. 039 * </p> 040 * 041 * <p> 042 * You can compute the memory usage for compressing by the following formula: 043 * </p> 044 * 045 * <pre> 046 * <code>400k + (9 * blocksize)</code>. 047 * </pre> 048 * 049 * <p> 050 * To get the memory required for decompression by {@link BZip2CompressorInputStream} use 051 * </p> 052 * 053 * <pre> 054 * <code>65k + (5 * blocksize)</code>. 055 * </pre> 056 * 057 * <table style="width:100%" border="1"> 058 * <caption>Memory usage by blocksize</caption> 059 * <tr> 060 * <th colspan="3">Memory usage by blocksize</th> 061 * </tr> 062 * <tr> 063 * <th style="text-align: right">Blocksize</th> 064 * <th style="text-align: right">Compression<br> 065 * memory usage</th> 066 * <th style="text-align: right">Decompression<br> 067 * memory usage</th> 068 * </tr> 069 * <tr> 070 * <td style="text-align: right">100k</td> 071 * <td style="text-align: right">1300k</td> 072 * <td style="text-align: right">565k</td> 073 * </tr> 074 * <tr> 075 * <td style="text-align: right">200k</td> 076 * <td style="text-align: right">2200k</td> 077 * <td style="text-align: right">1065k</td> 078 * </tr> 079 * <tr> 080 * <td style="text-align: right">300k</td> 081 * <td style="text-align: right">3100k</td> 082 * <td style="text-align: right">1565k</td> 083 * </tr> 084 * <tr> 085 * <td style="text-align: right">400k</td> 086 * <td style="text-align: right">4000k</td> 087 * <td style="text-align: right">2065k</td> 088 * </tr> 089 * <tr> 090 * <td style="text-align: right">500k</td> 091 * <td style="text-align: right">4900k</td> 092 * <td style="text-align: right">2565k</td> 093 * </tr> 094 * <tr> 095 * <td style="text-align: right">600k</td> 096 * <td style="text-align: right">5800k</td> 097 * <td style="text-align: right">3065k</td> 098 * </tr> 099 * <tr> 100 * <td style="text-align: right">700k</td> 101 * <td style="text-align: right">6700k</td> 102 * <td style="text-align: right">3565k</td> 103 * </tr> 104 * <tr> 105 * <td style="text-align: right">800k</td> 106 * <td style="text-align: right">7600k</td> 107 * <td style="text-align: right">4065k</td> 108 * </tr> 109 * <tr> 110 * <td style="text-align: right">900k</td> 111 * <td style="text-align: right">8500k</td> 112 * <td style="text-align: right">4565k</td> 113 * </tr> 114 * </table> 115 * 116 * <p> 117 * For decompression {@code BZip2CompressorInputStream} allocates less memory if the bzipped input is smaller than one block. 118 * </p> 119 * 120 * <p> 121 * Instances of this class are not threadsafe. 122 * </p> 123 * 124 * <p> 125 * TODO: Update to BZip2 1.0.1 126 * </p> 127 * 128 * @NotThreadSafe 129 */ 130public class BZip2CompressorOutputStream extends CompressorOutputStream<OutputStream> implements BZip2Constants { 131 132 static final class Data { 133 134 // with blockSize 900k 135 /* maps unsigned byte => "does it occur in block" */ 136 final boolean[] inUse = new boolean[256]; // 256 byte 137 final byte[] unseqToSeq = new byte[256]; // 256 byte 138 final int[] mtfFreq = new int[MAX_ALPHA_SIZE]; // 1032 byte 139 final byte[] selector = new byte[MAX_SELECTORS]; // 18002 byte 140 final byte[] selectorMtf = new byte[MAX_SELECTORS]; // 18002 byte 141 142 final byte[] generateMTFValues_yy = new byte[256]; // 256 byte 143 final byte[][] sendMTFValues_len = new byte[N_GROUPS][MAX_ALPHA_SIZE]; // 1548 144 // byte 145 final int[][] sendMTFValues_rfreq = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 146 // byte 147 final int[] sendMTFValues_fave = new int[N_GROUPS]; // 24 byte 148 final short[] sendMTFValues_cost = new short[N_GROUPS]; // 12 byte 149 final int[][] sendMTFValues_code = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 150 // byte 151 final byte[] sendMTFValues2_pos = new byte[N_GROUPS]; // 6 byte 152 final boolean[] sentMTFValues4_inUse16 = new boolean[16]; // 16 byte 153 154 final int[] heap = new int[MAX_ALPHA_SIZE + 2]; // 1040 byte 155 final int[] weight = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte 156 final int[] parent = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte 157 158 // ------------ 159 // 333408 byte 160 161 /* 162 * holds the RLEd block of original data starting at index 1. After sorting the last byte added to the buffer is at index 0. 163 */ 164 final byte[] block; // 900021 byte 165 /* 166 * maps index in Burrows-Wheeler transformed block => index of byte in original block 167 */ 168 final int[] fmap; // 3600000 byte 169 final char[] sfmap; // 3600000 byte 170 // ------------ 171 // 8433529 byte 172 // ============ 173 174 /** 175 * Index of original line in Burrows-Wheeler table. 176 * 177 * <p> 178 * This is the index in fmap that points to the last byte of the original data. 179 * </p> 180 */ 181 int origPtr; 182 183 Data(final int blockSize100k) { 184 final int n = blockSize100k * BASEBLOCKSIZE; 185 this.block = new byte[n + 1 + NUM_OVERSHOOT_BYTES]; 186 this.fmap = new int[n]; 187 this.sfmap = new char[2 * n]; 188 } 189 190 } 191 192 /** 193 * The minimum supported blocksize {@code == 1}. 194 */ 195 public static final int MIN_BLOCKSIZE = 1; 196 197 /** 198 * The maximum supported blocksize {@code == 9}. 199 */ 200 public static final int MAX_BLOCKSIZE = 9; 201 private static final int GREATER_ICOST = 15; 202 203 private static final int LESSER_ICOST = 0; 204 205 /** 206 * Chooses a blocksize based on the given length of the data to compress. 207 * 208 * @return The blocksize, between {@link #MIN_BLOCKSIZE} and {@link #MAX_BLOCKSIZE} both inclusive. For a negative {@code inputLength} this method returns 209 * {@code MAX_BLOCKSIZE} always. 210 * 211 * @param inputLength The length of the data which will be compressed by {@code BZip2CompressorOutputStream}. 212 */ 213 public static int chooseBlockSize(final long inputLength) { 214 return inputLength > 0 ? (int) Math.min(inputLength / 132000 + 1, 9) : MAX_BLOCKSIZE; 215 } 216 217 private static void hbAssignCodes(final int[] code, final byte[] length, final int minLen, final int maxLen, final int alphaSize) { 218 int vec = 0; 219 for (int n = minLen; n <= maxLen; n++) { 220 for (int i = 0; i < alphaSize; i++) { 221 if ((length[i] & 0xff) == n) { 222 code[i] = vec; 223 vec++; 224 } 225 } 226 vec <<= 1; 227 } 228 } 229 230 private static void hbMakeCodeLengths(final byte[] len, final int[] freq, final Data dat, final int alphaSize, final int maxLen) { 231 /* 232 * Nodes and heap entries run from 1. Entry 0 for both the heap and nodes is a sentinel. 233 */ 234 final int[] heap = dat.heap; 235 final int[] weight = dat.weight; 236 final int[] parent = dat.parent; 237 238 for (int i = alphaSize; --i >= 0;) { 239 weight[i + 1] = (freq[i] == 0 ? 1 : freq[i]) << 8; 240 } 241 242 for (boolean tooLong = true; tooLong;) { 243 tooLong = false; 244 245 int nNodes = alphaSize; 246 int nHeap = 0; 247 heap[0] = 0; 248 weight[0] = 0; 249 parent[0] = -2; 250 251 for (int i = 1; i <= alphaSize; i++) { 252 parent[i] = -1; 253 nHeap++; 254 heap[nHeap] = i; 255 256 int zz = nHeap; 257 final int tmp = heap[zz]; 258 while (weight[tmp] < weight[heap[zz >> 1]]) { 259 heap[zz] = heap[zz >> 1]; 260 zz >>= 1; 261 } 262 heap[zz] = tmp; 263 } 264 265 while (nHeap > 1) { 266 final int n1 = heap[1]; 267 heap[1] = heap[nHeap]; 268 nHeap--; 269 270 int yy = 0; 271 int zz = 1; 272 int tmp = heap[1]; 273 274 while (true) { 275 yy = zz << 1; 276 277 if (yy > nHeap) { 278 break; 279 } 280 281 if (yy < nHeap && weight[heap[yy + 1]] < weight[heap[yy]]) { 282 yy++; 283 } 284 285 if (weight[tmp] < weight[heap[yy]]) { 286 break; 287 } 288 289 heap[zz] = heap[yy]; 290 zz = yy; 291 } 292 293 heap[zz] = tmp; 294 295 final int n2 = heap[1]; 296 heap[1] = heap[nHeap]; 297 nHeap--; 298 299 yy = 0; 300 zz = 1; 301 tmp = heap[1]; 302 303 while (true) { 304 yy = zz << 1; 305 306 if (yy > nHeap) { 307 break; 308 } 309 310 if (yy < nHeap && weight[heap[yy + 1]] < weight[heap[yy]]) { 311 yy++; 312 } 313 314 if (weight[tmp] < weight[heap[yy]]) { 315 break; 316 } 317 318 heap[zz] = heap[yy]; 319 zz = yy; 320 } 321 322 heap[zz] = tmp; 323 nNodes++; 324 parent[n1] = parent[n2] = nNodes; 325 326 final int weight_n1 = weight[n1]; 327 final int weight_n2 = weight[n2]; 328 weight[nNodes] = (weight_n1 & 0xffffff00) + (weight_n2 & 0xffffff00) | 1 + Math.max(weight_n1 & 0x000000ff, weight_n2 & 0x000000ff); 329 330 parent[nNodes] = -1; 331 nHeap++; 332 heap[nHeap] = nNodes; 333 334 tmp = 0; 335 zz = nHeap; 336 tmp = heap[zz]; 337 final int weight_tmp = weight[tmp]; 338 while (weight_tmp < weight[heap[zz >> 1]]) { 339 heap[zz] = heap[zz >> 1]; 340 zz >>= 1; 341 } 342 heap[zz] = tmp; 343 344 } 345 346 for (int i = 1; i <= alphaSize; i++) { 347 int j = 0; 348 int k = i; 349 350 for (int parent_k; (parent_k = parent[k]) >= 0;) { 351 k = parent_k; 352 j++; 353 } 354 355 len[i - 1] = (byte) j; 356 if (j > maxLen) { 357 tooLong = true; 358 } 359 } 360 361 if (tooLong) { 362 for (int i = 1; i < alphaSize; i++) { 363 int j = weight[i] >> 8; 364 j = 1 + (j >> 1); 365 weight[i] = j << 8; 366 } 367 } 368 } 369 } 370 371 /** 372 * Index of the last char in the block, so the block size == last + 1. 373 */ 374 private int last; 375 /** 376 * Always: in the range 0 .. 9. The current block size is 100000 * this number. 377 */ 378 private final int blockSize100k; 379 380 private int bsBuff; 381 382 private int bsLive; 383 384 private final CRC crc = new CRC(); 385 private int nInUse; 386 387 private int nMTF; 388 private int currentChar = -1; 389 private int runLength; 390 391 private int combinedCRC; 392 393 private final int allowableBlockSize; 394 /** 395 * All memory intensive stuff. 396 */ 397 private Data data; 398 399 private BlockSort blockSorter; 400 401 private volatile boolean closed; 402 403 /** 404 * Constructs a new {@code BZip2CompressorOutputStream} with a blocksize of 900k. 405 * 406 * @param out the destination stream. 407 * 408 * @throws IOException if an I/O error occurs in the specified stream. 409 * @throws NullPointerException if {@code out == null}. 410 */ 411 public BZip2CompressorOutputStream(final OutputStream out) throws IOException { 412 this(out, MAX_BLOCKSIZE); 413 } 414 415 /** 416 * Constructs a new {@code BZip2CompressorOutputStream} with specified blocksize. 417 * 418 * @param out the destination stream. 419 * @param blockSize the blockSize as 100k units. 420 * 421 * @throws IOException if an I/O error occurs in the specified stream. 422 * @throws IllegalArgumentException if {@code (blockSize < 1) || (blockSize > 9)}. 423 * @throws NullPointerException if {@code out == null}. 424 * 425 * @see #MIN_BLOCKSIZE 426 * @see #MAX_BLOCKSIZE 427 */ 428 public BZip2CompressorOutputStream(final OutputStream out, final int blockSize) throws IOException { 429 super(out); 430 if (blockSize < 1) { 431 throw new IllegalArgumentException("blockSize(" + blockSize + ") < 1"); 432 } 433 if (blockSize > 9) { 434 throw new IllegalArgumentException("blockSize(" + blockSize + ") > 9"); 435 } 436 437 this.blockSize100k = blockSize; 438 439 /* 20 is just a paranoia constant */ 440 this.allowableBlockSize = this.blockSize100k * BASEBLOCKSIZE - 20; 441 init(); 442 } 443 444 private void blockSort() { 445 blockSorter.blockSort(data, last); 446 } 447 448 private void bsFinishedWithStream() throws IOException { 449 while (this.bsLive > 0) { 450 final int ch = this.bsBuff >> 24; 451 this.out.write(ch); // write 8-bit 452 this.bsBuff <<= 8; 453 this.bsLive -= 8; 454 } 455 } 456 457 private void bsPutInt(final int u) throws IOException { 458 bsW(8, u >> 24 & 0xff); 459 bsW(8, u >> 16 & 0xff); 460 bsW(8, u >> 8 & 0xff); 461 bsW(8, u & 0xff); 462 } 463 464 private void bsPutUByte(final int c) throws IOException { 465 bsW(8, c); 466 } 467 468 private void bsW(final int n, final int v) throws IOException { 469 final OutputStream outShadow = this.out; 470 int bsLiveShadow = this.bsLive; 471 int bsBuffShadow = this.bsBuff; 472 473 while (bsLiveShadow >= 8) { 474 outShadow.write(bsBuffShadow >> 24); // write 8-bit 475 bsBuffShadow <<= 8; 476 bsLiveShadow -= 8; 477 } 478 479 this.bsBuff = bsBuffShadow | v << 32 - bsLiveShadow - n; 480 this.bsLive = bsLiveShadow + n; 481 } 482 483 @Override 484 public void close() throws IOException { 485 if (!closed) { 486 try { 487 finish(); 488 } finally { 489 IOUtils.close(out); 490 } 491 } 492 } 493 494 private void endBlock() throws IOException { 495 final int blockCRC = this.crc.getValue(); 496 this.combinedCRC = this.combinedCRC << 1 | this.combinedCRC >>> 31; 497 this.combinedCRC ^= blockCRC; 498 499 // empty block at end of file 500 if (this.last == -1) { 501 return; 502 } 503 504 /* sort the block and establish posn of original string */ 505 blockSort(); 506 507 /* 508 * A 6-byte block header, the value chosen arbitrarily as 0x314159265359 :-). A 32 bit value does not really give a strong enough guarantee that the 509 * value will not appear by chance in the compressed data stream. Worst-case probability of this event, for a 900k block, is about 2.0e-3 for 32 bits, 510 * 1.0e-5 for 40 bits and 4.0e-8 for 48 bits. For a compressed file of size 100Gb -- about 100000 blocks -- only a 48-bit marker will do. NB: normal 511 * compression/ decompression doesn't rely on these statistical properties. They are only important when trying to recover blocks from damaged files. 512 */ 513 bsPutUByte(0x31); 514 bsPutUByte(0x41); 515 bsPutUByte(0x59); 516 bsPutUByte(0x26); 517 bsPutUByte(0x53); 518 bsPutUByte(0x59); 519 520 /* Now the block's CRC, so it is in a known place. */ 521 bsPutInt(blockCRC); 522 523 /* Now a single bit indicating no randomization. */ 524 bsW(1, 0); 525 526 /* Finally, block's contents proper. */ 527 moveToFrontCodeAndSend(); 528 } 529 530 private void endCompression() throws IOException { 531 /* 532 * Now another magic 48-bit number, 0x177245385090, to indicate the end of the last block. (sqrt(pi), if you want to know. I did want to use e, but it 533 * contains too much repetition -- 27 18 28 18 28 46 -- for me to feel statistically comfortable. Call me paranoid.) 534 */ 535 bsPutUByte(0x17); 536 bsPutUByte(0x72); 537 bsPutUByte(0x45); 538 bsPutUByte(0x38); 539 bsPutUByte(0x50); 540 bsPutUByte(0x90); 541 542 bsPutInt(this.combinedCRC); 543 bsFinishedWithStream(); 544 } 545 546 public void finish() throws IOException { 547 if (!closed) { 548 closed = true; 549 try { 550 if (this.runLength > 0) { 551 writeRun(); 552 } 553 this.currentChar = -1; 554 endBlock(); 555 endCompression(); 556 } finally { 557 this.out = null; 558 this.blockSorter = null; 559 this.data = null; 560 } 561 } 562 } 563 564 @Override 565 public void flush() throws IOException { 566 final OutputStream outShadow = this.out; 567 if (outShadow != null) { 568 outShadow.flush(); 569 } 570 } 571 572 /* 573 * Performs Move-To-Front on the Burrows-Wheeler transformed buffer, storing the MTFed data in data.sfmap in RUNA/RUNB run-length-encoded form. 574 * 575 * <p>Keeps track of byte frequencies in data.mtfFreq at the same time.</p> 576 */ 577 private void generateMTFValues() { 578 final int lastShadow = this.last; 579 final Data dataShadow = this.data; 580 final boolean[] inUse = dataShadow.inUse; 581 final byte[] block = dataShadow.block; 582 final int[] fmap = dataShadow.fmap; 583 final char[] sfmap = dataShadow.sfmap; 584 final int[] mtfFreq = dataShadow.mtfFreq; 585 final byte[] unseqToSeq = dataShadow.unseqToSeq; 586 final byte[] yy = dataShadow.generateMTFValues_yy; 587 588 // make maps 589 int nInUseShadow = 0; 590 for (int i = 0; i < 256; i++) { 591 if (inUse[i]) { 592 unseqToSeq[i] = (byte) nInUseShadow; 593 nInUseShadow++; 594 } 595 } 596 this.nInUse = nInUseShadow; 597 598 final int eob = nInUseShadow + 1; 599 600 Arrays.fill(mtfFreq, 0, eob + 1, 0); 601 602 for (int i = nInUseShadow; --i >= 0;) { 603 yy[i] = (byte) i; 604 } 605 606 int wr = 0; 607 int zPend = 0; 608 609 for (int i = 0; i <= lastShadow; i++) { 610 final byte ll_i = unseqToSeq[block[fmap[i]] & 0xff]; 611 byte tmp = yy[0]; 612 int j = 0; 613 614 while (ll_i != tmp) { 615 j++; 616 final byte tmp2 = tmp; 617 tmp = yy[j]; 618 yy[j] = tmp2; 619 } 620 yy[0] = tmp; 621 622 if (j == 0) { 623 zPend++; 624 } else { 625 if (zPend > 0) { 626 zPend--; 627 while (true) { 628 if ((zPend & 1) == 0) { 629 sfmap[wr] = RUNA; 630 wr++; 631 mtfFreq[RUNA]++; 632 } else { 633 sfmap[wr] = RUNB; 634 wr++; 635 mtfFreq[RUNB]++; 636 } 637 638 if (zPend < 2) { 639 break; 640 } 641 zPend = zPend - 2 >> 1; 642 } 643 zPend = 0; 644 } 645 sfmap[wr] = (char) (j + 1); 646 wr++; 647 mtfFreq[j + 1]++; 648 } 649 } 650 651 if (zPend > 0) { 652 zPend--; 653 while (true) { 654 if ((zPend & 1) == 0) { 655 sfmap[wr] = RUNA; 656 wr++; 657 mtfFreq[RUNA]++; 658 } else { 659 sfmap[wr] = RUNB; 660 wr++; 661 mtfFreq[RUNB]++; 662 } 663 664 if (zPend < 2) { 665 break; 666 } 667 zPend = zPend - 2 >> 1; 668 } 669 } 670 671 sfmap[wr] = (char) eob; 672 mtfFreq[eob]++; 673 this.nMTF = wr + 1; 674 } 675 676 /** 677 * Returns the blocksize parameter specified at construction time. 678 * 679 * @return the blocksize parameter specified at construction time 680 */ 681 public final int getBlockSize() { 682 return this.blockSize100k; 683 } 684 685 /** 686 * Writes magic bytes like BZ on the first position of the stream and bytes indicating the file-format, which is huffmanized, followed by a digit indicating 687 * blockSize100k. 688 * 689 * @throws IOException if the magic bytes could not been written 690 */ 691 private void init() throws IOException { 692 bsPutUByte('B'); 693 bsPutUByte('Z'); 694 695 this.data = new Data(this.blockSize100k); 696 this.blockSorter = new BlockSort(this.data); 697 698 // huffmanized magic bytes 699 bsPutUByte('h'); 700 bsPutUByte('0' + this.blockSize100k); 701 702 this.combinedCRC = 0; 703 initBlock(); 704 } 705 706 private void initBlock() { 707 // blockNo++; 708 this.crc.reset(); 709 this.last = -1; 710 // ch = 0; 711 712 final boolean[] inUse = this.data.inUse; 713 for (int i = 256; --i >= 0;) { 714 inUse[i] = false; 715 } 716 717 } 718 719 private void moveToFrontCodeAndSend() throws IOException { 720 bsW(24, this.data.origPtr); 721 generateMTFValues(); 722 sendMTFValues(); 723 } 724 725 private void sendMTFValues() throws IOException { 726 final byte[][] len = this.data.sendMTFValues_len; 727 final int alphaSize = this.nInUse + 2; 728 729 for (int t = N_GROUPS; --t >= 0;) { 730 final byte[] len_t = len[t]; 731 for (int v = alphaSize; --v >= 0;) { 732 len_t[v] = GREATER_ICOST; 733 } 734 } 735 736 /* Decide how many coding tables to use */ 737 // assert (this.nMTF > 0) : this.nMTF; 738 final int nGroups = this.nMTF < 200 ? 2 : this.nMTF < 600 ? 3 : this.nMTF < 1200 ? 4 : this.nMTF < 2400 ? 5 : 6; 739 740 /* Generate an initial set of coding tables */ 741 sendMTFValues0(nGroups, alphaSize); 742 743 /* 744 * Iterate up to N_ITERS times to improve the tables. 745 */ 746 final int nSelectors = sendMTFValues1(nGroups, alphaSize); 747 748 /* Compute MTF values for the selectors. */ 749 sendMTFValues2(nGroups, nSelectors); 750 751 /* Assign actual codes for the tables. */ 752 sendMTFValues3(nGroups, alphaSize); 753 754 /* Transmit the mapping table. */ 755 sendMTFValues4(); 756 757 /* Now the selectors. */ 758 sendMTFValues5(nGroups, nSelectors); 759 760 /* Now the coding tables. */ 761 sendMTFValues6(nGroups, alphaSize); 762 763 /* And finally, the block data proper */ 764 sendMTFValues7(); 765 } 766 767 private void sendMTFValues0(final int nGroups, final int alphaSize) { 768 final byte[][] len = this.data.sendMTFValues_len; 769 final int[] mtfFreq = this.data.mtfFreq; 770 771 int remF = this.nMTF; 772 int gs = 0; 773 774 for (int nPart = nGroups; nPart > 0; nPart--) { 775 final int tFreq = remF / nPart; 776 int ge = gs - 1; 777 int aFreq = 0; 778 779 for (final int a = alphaSize - 1; aFreq < tFreq && ge < a;) { 780 aFreq += mtfFreq[++ge]; 781 } 782 783 if (ge > gs && nPart != nGroups && nPart != 1 && (nGroups - nPart & 1) != 0) { 784 aFreq -= mtfFreq[ge--]; 785 } 786 787 final byte[] len_np = len[nPart - 1]; 788 for (int v = alphaSize; --v >= 0;) { 789 if (v >= gs && v <= ge) { 790 len_np[v] = LESSER_ICOST; 791 } else { 792 len_np[v] = GREATER_ICOST; 793 } 794 } 795 796 gs = ge + 1; 797 remF -= aFreq; 798 } 799 } 800 801 private int sendMTFValues1(final int nGroups, final int alphaSize) { 802 final Data dataShadow = this.data; 803 final int[][] rfreq = dataShadow.sendMTFValues_rfreq; 804 final int[] fave = dataShadow.sendMTFValues_fave; 805 final short[] cost = dataShadow.sendMTFValues_cost; 806 final char[] sfmap = dataShadow.sfmap; 807 final byte[] selector = dataShadow.selector; 808 final byte[][] len = dataShadow.sendMTFValues_len; 809 final byte[] len_0 = len[0]; 810 final byte[] len_1 = len[1]; 811 final byte[] len_2 = len[2]; 812 final byte[] len_3 = len[3]; 813 final byte[] len_4 = len[4]; 814 final byte[] len_5 = len[5]; 815 final int nMTFShadow = this.nMTF; 816 817 int nSelectors = 0; 818 819 for (int iter = 0; iter < N_ITERS; iter++) { 820 for (int t = nGroups; --t >= 0;) { 821 fave[t] = 0; 822 final int[] rfreqt = rfreq[t]; 823 for (int i = alphaSize; --i >= 0;) { 824 rfreqt[i] = 0; 825 } 826 } 827 828 nSelectors = 0; 829 830 for (int gs = 0; gs < this.nMTF;) { 831 // Set group start & end marks. 832 833 // Calculate the cost of this group as coded by each of the 834 // coding tables. 835 836 final int ge = Math.min(gs + G_SIZE - 1, nMTFShadow - 1); 837 838 final byte mask = (byte) 0xff; 839 if (nGroups == N_GROUPS) { 840 // unrolled version of the else-block 841 842 short cost0 = 0; 843 short cost1 = 0; 844 short cost2 = 0; 845 short cost3 = 0; 846 short cost4 = 0; 847 short cost5 = 0; 848 849 for (int i = gs; i <= ge; i++) { 850 final int icv = sfmap[i]; 851 cost0 += (short) (len_0[icv] & mask); 852 cost1 += (short) (len_1[icv] & mask); 853 cost2 += (short) (len_2[icv] & mask); 854 cost3 += (short) (len_3[icv] & mask); 855 cost4 += (short) (len_4[icv] & mask); 856 cost5 += (short) (len_5[icv] & mask); 857 } 858 859 cost[0] = cost0; 860 cost[1] = cost1; 861 cost[2] = cost2; 862 cost[3] = cost3; 863 cost[4] = cost4; 864 cost[5] = cost5; 865 866 } else { 867 for (int t = nGroups; --t >= 0;) { 868 cost[t] = 0; 869 } 870 871 for (int i = gs; i <= ge; i++) { 872 final int icv = sfmap[i]; 873 for (int t = nGroups; --t >= 0;) { 874 cost[t] += (short) (len[t][icv] & mask); 875 } 876 } 877 } 878 879 /* 880 * Find the coding table which is best for this group, and record its identity in the selector table. 881 */ 882 int bt = -1; 883 for (int t = nGroups, bc = 999999999; --t >= 0;) { 884 final int cost_t = cost[t]; 885 if (cost_t < bc) { 886 bc = cost_t; 887 bt = t; 888 } 889 } 890 891 fave[bt]++; 892 selector[nSelectors] = (byte) bt; 893 nSelectors++; 894 895 /* 896 * Increment the symbol frequencies for the selected table. 897 */ 898 final int[] rfreq_bt = rfreq[bt]; 899 for (int i = gs; i <= ge; i++) { 900 rfreq_bt[sfmap[i]]++; 901 } 902 903 gs = ge + 1; 904 } 905 906 /* 907 * Recompute the tables based on the accumulated frequencies. 908 */ 909 for (int t = 0; t < nGroups; t++) { 910 hbMakeCodeLengths(len[t], rfreq[t], this.data, alphaSize, 20); 911 } 912 } 913 914 return nSelectors; 915 } 916 917 private void sendMTFValues2(final int nGroups, final int nSelectors) { 918 // assert (nGroups < 8) : nGroups; 919 920 final Data dataShadow = this.data; 921 final byte[] pos = dataShadow.sendMTFValues2_pos; 922 923 for (int i = nGroups; --i >= 0;) { 924 pos[i] = (byte) i; 925 } 926 927 for (int i = 0; i < nSelectors; i++) { 928 final byte ll_i = dataShadow.selector[i]; 929 byte tmp = pos[0]; 930 int j = 0; 931 932 while (ll_i != tmp) { 933 j++; 934 final byte tmp2 = tmp; 935 tmp = pos[j]; 936 pos[j] = tmp2; 937 } 938 939 pos[0] = tmp; 940 dataShadow.selectorMtf[i] = (byte) j; 941 } 942 } 943 944 private void sendMTFValues3(final int nGroups, final int alphaSize) { 945 final int[][] code = this.data.sendMTFValues_code; 946 final byte[][] len = this.data.sendMTFValues_len; 947 948 for (int t = 0; t < nGroups; t++) { 949 int minLen = 32; 950 int maxLen = 0; 951 final byte[] len_t = len[t]; 952 for (int i = alphaSize; --i >= 0;) { 953 final int l = len_t[i] & 0xff; 954 if (l > maxLen) { 955 maxLen = l; 956 } 957 if (l < minLen) { 958 minLen = l; 959 } 960 } 961 962 // assert (maxLen <= 20) : maxLen; 963 // assert (minLen >= 1) : minLen; 964 965 hbAssignCodes(code[t], len[t], minLen, maxLen, alphaSize); 966 } 967 } 968 969 private void sendMTFValues4() throws IOException { 970 final boolean[] inUse = this.data.inUse; 971 final boolean[] inUse16 = this.data.sentMTFValues4_inUse16; 972 973 for (int i = 16; --i >= 0;) { 974 inUse16[i] = false; 975 final int i16 = i * 16; 976 for (int j = 16; --j >= 0;) { 977 if (inUse[i16 + j]) { 978 inUse16[i] = true; 979 break; 980 } 981 } 982 } 983 984 for (int i = 0; i < 16; i++) { 985 bsW(1, inUse16[i] ? 1 : 0); 986 } 987 988 final OutputStream outShadow = this.out; 989 int bsLiveShadow = this.bsLive; 990 int bsBuffShadow = this.bsBuff; 991 992 for (int i = 0; i < 16; i++) { 993 if (inUse16[i]) { 994 final int i16 = i * 16; 995 for (int j = 0; j < 16; j++) { 996 // inlined: bsW(1, inUse[i16 + j] ? 1 : 0); 997 while (bsLiveShadow >= 8) { 998 outShadow.write(bsBuffShadow >> 24); // write 8-bit 999 bsBuffShadow <<= 8; 1000 bsLiveShadow -= 8; 1001 } 1002 if (inUse[i16 + j]) { 1003 bsBuffShadow |= 1 << 32 - bsLiveShadow - 1; 1004 } 1005 bsLiveShadow++; 1006 } 1007 } 1008 } 1009 1010 this.bsBuff = bsBuffShadow; 1011 this.bsLive = bsLiveShadow; 1012 } 1013 1014 private void sendMTFValues5(final int nGroups, final int nSelectors) throws IOException { 1015 bsW(3, nGroups); 1016 bsW(15, nSelectors); 1017 1018 final OutputStream outShadow = this.out; 1019 final byte[] selectorMtf = this.data.selectorMtf; 1020 1021 int bsLiveShadow = this.bsLive; 1022 int bsBuffShadow = this.bsBuff; 1023 1024 for (int i = 0; i < nSelectors; i++) { 1025 for (int j = 0, hj = selectorMtf[i] & 0xff; j < hj; j++) { 1026 // inlined: bsW(1, 1); 1027 while (bsLiveShadow >= 8) { 1028 outShadow.write(bsBuffShadow >> 24); 1029 bsBuffShadow <<= 8; 1030 bsLiveShadow -= 8; 1031 } 1032 bsBuffShadow |= 1 << 32 - bsLiveShadow - 1; 1033 bsLiveShadow++; 1034 } 1035 1036 // inlined: bsW(1, 0); 1037 while (bsLiveShadow >= 8) { 1038 outShadow.write(bsBuffShadow >> 24); 1039 bsBuffShadow <<= 8; 1040 bsLiveShadow -= 8; 1041 } 1042 // bsBuffShadow |= 0 << (32 - bsLiveShadow - 1); 1043 bsLiveShadow++; 1044 } 1045 1046 this.bsBuff = bsBuffShadow; 1047 this.bsLive = bsLiveShadow; 1048 } 1049 1050 private void sendMTFValues6(final int nGroups, final int alphaSize) throws IOException { 1051 final byte[][] len = this.data.sendMTFValues_len; 1052 final OutputStream outShadow = this.out; 1053 1054 int bsLiveShadow = this.bsLive; 1055 int bsBuffShadow = this.bsBuff; 1056 1057 for (int t = 0; t < nGroups; t++) { 1058 final byte[] len_t = len[t]; 1059 int curr = len_t[0] & 0xff; 1060 1061 // inlined: bsW(5, curr); 1062 while (bsLiveShadow >= 8) { 1063 outShadow.write(bsBuffShadow >> 24); // write 8-bit 1064 bsBuffShadow <<= 8; 1065 bsLiveShadow -= 8; 1066 } 1067 bsBuffShadow |= curr << 32 - bsLiveShadow - 5; 1068 bsLiveShadow += 5; 1069 1070 for (int i = 0; i < alphaSize; i++) { 1071 final int lti = len_t[i] & 0xff; 1072 while (curr < lti) { 1073 // inlined: bsW(2, 2); 1074 while (bsLiveShadow >= 8) { 1075 outShadow.write(bsBuffShadow >> 24); // write 8-bit 1076 bsBuffShadow <<= 8; 1077 bsLiveShadow -= 8; 1078 } 1079 bsBuffShadow |= 2 << 32 - bsLiveShadow - 2; 1080 bsLiveShadow += 2; 1081 1082 curr++; /* 10 */ 1083 } 1084 1085 while (curr > lti) { 1086 // inlined: bsW(2, 3); 1087 while (bsLiveShadow >= 8) { 1088 outShadow.write(bsBuffShadow >> 24); // write 8-bit 1089 bsBuffShadow <<= 8; 1090 bsLiveShadow -= 8; 1091 } 1092 bsBuffShadow |= 3 << 32 - bsLiveShadow - 2; 1093 bsLiveShadow += 2; 1094 1095 curr--; /* 11 */ 1096 } 1097 1098 // inlined: bsW(1, 0); 1099 while (bsLiveShadow >= 8) { 1100 outShadow.write(bsBuffShadow >> 24); // write 8-bit 1101 bsBuffShadow <<= 8; 1102 bsLiveShadow -= 8; 1103 } 1104 // bsBuffShadow |= 0 << (32 - bsLiveShadow - 1); 1105 bsLiveShadow++; 1106 } 1107 } 1108 1109 this.bsBuff = bsBuffShadow; 1110 this.bsLive = bsLiveShadow; 1111 } 1112 1113 private void sendMTFValues7() throws IOException { 1114 final Data dataShadow = this.data; 1115 final byte[][] len = dataShadow.sendMTFValues_len; 1116 final int[][] code = dataShadow.sendMTFValues_code; 1117 final OutputStream outShadow = this.out; 1118 final byte[] selector = dataShadow.selector; 1119 final char[] sfmap = dataShadow.sfmap; 1120 final int nMTFShadow = this.nMTF; 1121 1122 int selCtr = 0; 1123 1124 int bsLiveShadow = this.bsLive; 1125 int bsBuffShadow = this.bsBuff; 1126 1127 for (int gs = 0; gs < nMTFShadow;) { 1128 final int ge = Math.min(gs + G_SIZE - 1, nMTFShadow - 1); 1129 final int selector_selCtr = selector[selCtr] & 0xff; 1130 final int[] code_selCtr = code[selector_selCtr]; 1131 final byte[] len_selCtr = len[selector_selCtr]; 1132 1133 while (gs <= ge) { 1134 final int sfmap_i = sfmap[gs]; 1135 1136 // 1137 // inlined: bsW(len_selCtr[sfmap_i] & 0xff, 1138 // code_selCtr[sfmap_i]); 1139 // 1140 while (bsLiveShadow >= 8) { 1141 outShadow.write(bsBuffShadow >> 24); 1142 bsBuffShadow <<= 8; 1143 bsLiveShadow -= 8; 1144 } 1145 final int n = len_selCtr[sfmap_i] & 0xFF; 1146 bsBuffShadow |= code_selCtr[sfmap_i] << 32 - bsLiveShadow - n; 1147 bsLiveShadow += n; 1148 1149 gs++; 1150 } 1151 1152 gs = ge + 1; 1153 selCtr++; 1154 } 1155 1156 this.bsBuff = bsBuffShadow; 1157 this.bsLive = bsLiveShadow; 1158 } 1159 1160 @Override 1161 public void write(final byte[] buf, int offs, final int len) throws IOException { 1162 if (offs < 0) { 1163 throw new IndexOutOfBoundsException("offs(" + offs + ") < 0."); 1164 } 1165 if (len < 0) { 1166 throw new IndexOutOfBoundsException("len(" + len + ") < 0."); 1167 } 1168 if (offs + len > buf.length) { 1169 throw new IndexOutOfBoundsException("offs(" + offs + ") + len(" + len + ") > buf.length(" + buf.length + ")."); 1170 } 1171 if (closed) { 1172 throw new IOException("Stream closed"); 1173 } 1174 1175 for (final int hi = offs + len; offs < hi;) { 1176 write0(buf[offs++]); 1177 } 1178 } 1179 1180 @Override 1181 public void write(final int b) throws IOException { 1182 if (closed) { 1183 throw new IOException("Closed"); 1184 } 1185 write0(b); 1186 } 1187 1188 /** 1189 * Keeps track of the last bytes written and implicitly performs run-length encoding as the first step of the bzip2 algorithm. 1190 */ 1191 private void write0(int b) throws IOException { 1192 if (this.currentChar != -1) { 1193 b &= 0xff; 1194 if (this.currentChar == b) { 1195 if (++this.runLength > 254) { 1196 writeRun(); 1197 this.currentChar = -1; 1198 this.runLength = 0; 1199 } 1200 // else nothing to do 1201 } else { 1202 writeRun(); 1203 this.runLength = 1; 1204 this.currentChar = b; 1205 } 1206 } else { 1207 this.currentChar = b & 0xff; 1208 this.runLength++; 1209 } 1210 } 1211 1212 /** 1213 * Writes the current byte to the buffer, run-length encoding it if it has been repeated at least four times (the first step RLEs sequences of four 1214 * identical bytes). 1215 * 1216 * <p> 1217 * Flushes the current block before writing data if it is full. 1218 * </p> 1219 * 1220 * <p> 1221 * "write to the buffer" means adding to data.buffer starting two steps "after" this.last - initially starting at index 1 (not 0) - and updating this.last 1222 * to point to the last index written minus 1. 1223 * </p> 1224 */ 1225 private void writeRun() throws IOException { 1226 final int lastShadow = this.last; 1227 1228 if (lastShadow < this.allowableBlockSize) { 1229 final int currentCharShadow = this.currentChar; 1230 final Data dataShadow = this.data; 1231 dataShadow.inUse[currentCharShadow] = true; 1232 final byte ch = (byte) currentCharShadow; 1233 1234 int runLengthShadow = this.runLength; 1235 this.crc.update(currentCharShadow, runLengthShadow); 1236 1237 switch (runLengthShadow) { 1238 case 1: 1239 dataShadow.block[lastShadow + 2] = ch; 1240 this.last = lastShadow + 1; 1241 break; 1242 1243 case 2: 1244 dataShadow.block[lastShadow + 2] = ch; 1245 dataShadow.block[lastShadow + 3] = ch; 1246 this.last = lastShadow + 2; 1247 break; 1248 1249 case 3: { 1250 final byte[] block = dataShadow.block; 1251 block[lastShadow + 2] = ch; 1252 block[lastShadow + 3] = ch; 1253 block[lastShadow + 4] = ch; 1254 this.last = lastShadow + 3; 1255 } 1256 break; 1257 1258 default: { 1259 runLengthShadow -= 4; 1260 dataShadow.inUse[runLengthShadow] = true; 1261 final byte[] block = dataShadow.block; 1262 block[lastShadow + 2] = ch; 1263 block[lastShadow + 3] = ch; 1264 block[lastShadow + 4] = ch; 1265 block[lastShadow + 5] = ch; 1266 block[lastShadow + 6] = (byte) runLengthShadow; 1267 this.last = lastShadow + 5; 1268 } 1269 break; 1270 1271 } 1272 } else { 1273 endBlock(); 1274 initBlock(); 1275 writeRun(); 1276 } 1277 } 1278 1279}