PHP: Bzip2 Functions - Manual (original) (raw)

Table of Contents

Found A Problem?

salsi at icosaedro dot it

9 years ago

<?php
/*
 * Reading a BZIP2 file can be tricky, and I never seen a complete example of
 * code that account for any possible failure that may happen accessing a file
 * in general, and decoding compressed data in this specific case.
 * The example that follows is my attempt to address this gap.
 * Some things that worth noting are:
 * - Encoding/decoding errors must be detected with bzerrno().
 * - bzopen() may fail returning FALSE if the file cannot be created or read,
 *   but succeeds also if the file is not properly encoded.
 * - bzread() may fail returning FALSE if it fails reading from the source, but
 *   it returns the empty string on end of file and on encoding error.
 * - bzread() may still return corrupted data with no error whatsoever until the
 *   BZIP2 algo encounters the first hash code, so data retrieved cannot be
 *   trusted until the very end of the file has been reached.
 */

// Safety first:
error_reporting(-1);
// On error, set $php_errormsg:
ini_set("track_errors", "1");

/**
 * Reads and displays on stdout the content of a BZIP2 compressed file with
 * full error detection.
 * @param string $fn Filename.
 * @return void
 */
function displaysBZIP2File($fn)
{
    echo "Reading $fn:\n";
    <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>b</mi><mi>z</mi><mo>=</mo><mi mathvariant="normal">@</mi><mi>b</mi><mi>z</mi><mi>o</mi><mi>p</mi><mi>e</mi><mi>n</mi><mo stretchy="false">(</mo></mrow><annotation encoding="application/x-tex">bz = @bzopen(</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.6944em;"></span><span class="mord mathnormal">b</span><span class="mord mathnormal" style="margin-right:0.04398em;">z</span><span class="mspace" style="margin-right:0.2778em;"></span><span class="mrel">=</span><span class="mspace" style="margin-right:0.2778em;"></span></span><span class="base"><span class="strut" style="height:1em;vertical-align:-0.25em;"></span><span class="mord">@</span><span class="mord mathnormal">b</span><span class="mord mathnormal">zo</span><span class="mord mathnormal">p</span><span class="mord mathnormal">e</span><span class="mord mathnormal">n</span><span class="mopen">(</span></span></span></span>fn, "r");
    if( $bz === FALSE ){
        echo "ERROR: bzopen() failed: $php_errormsg\n";
        return;
    }
    <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>e</mi><mi>r</mi><mi>r</mi><mi>n</mi><mi>o</mi><mo>=</mo><mi>b</mi><mi>z</mi><mi>e</mi><mi>r</mi><mi>r</mi><mi>n</mi><mi>o</mi><mo stretchy="false">(</mo></mrow><annotation encoding="application/x-tex">errno = bzerrno(</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.4306em;"></span><span class="mord mathnormal" style="margin-right:0.02778em;">err</span><span class="mord mathnormal">n</span><span class="mord mathnormal">o</span><span class="mspace" style="margin-right:0.2778em;"></span><span class="mrel">=</span><span class="mspace" style="margin-right:0.2778em;"></span></span><span class="base"><span class="strut" style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathnormal">b</span><span class="mord mathnormal" style="margin-right:0.02778em;">zerr</span><span class="mord mathnormal">n</span><span class="mord mathnormal">o</span><span class="mopen">(</span></span></span></span>bz);
    if( $errno != 0 ){
        // May detect "DATA_ERROR_MAGIC" (not a BZIP2 file), or "DATA_ERROR"
        // (BZIP2 decoding error) and maybe others BZIP2 errors too.
        echo "ERROR: bzopen(): BZIP2 decoding failed: ", bzerrstr($bz), "\n";
        @bzclose($bz);
        return;
    }
    while(! feof($bz) ) {
        <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>s</mi><mo>=</mo><mi>b</mi><mi>z</mi><mi>r</mi><mi>e</mi><mi>a</mi><mi>d</mi><mo stretchy="false">(</mo></mrow><annotation encoding="application/x-tex">s = bzread(</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.4306em;"></span><span class="mord mathnormal">s</span><span class="mspace" style="margin-right:0.2778em;"></span><span class="mrel">=</span><span class="mspace" style="margin-right:0.2778em;"></span></span><span class="base"><span class="strut" style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathnormal">b</span><span class="mord mathnormal">zre</span><span class="mord mathnormal">a</span><span class="mord mathnormal">d</span><span class="mopen">(</span></span></span></span>bz, 100);
        if( $s === FALSE ){
            echo "ERROR: bzread() failed: $php_errormsg\n";
            @bzclose($bz);
            return;
        }
        <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>e</mi><mi>r</mi><mi>r</mi><mi>n</mi><mi>o</mi><mo>=</mo><mi>b</mi><mi>z</mi><mi>e</mi><mi>r</mi><mi>r</mi><mi>n</mi><mi>o</mi><mo stretchy="false">(</mo></mrow><annotation encoding="application/x-tex">errno = bzerrno(</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.4306em;"></span><span class="mord mathnormal" style="margin-right:0.02778em;">err</span><span class="mord mathnormal">n</span><span class="mord mathnormal">o</span><span class="mspace" style="margin-right:0.2778em;"></span><span class="mrel">=</span><span class="mspace" style="margin-right:0.2778em;"></span></span><span class="base"><span class="strut" style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathnormal">b</span><span class="mord mathnormal" style="margin-right:0.02778em;">zerr</span><span class="mord mathnormal">n</span><span class="mord mathnormal">o</span><span class="mopen">(</span></span></span></span>bz);
        if( $errno != 0 ){
            // May detect "DATA_ERROR" (BZIP2 decoding error) and maybe others
            // BZIP2 errors too.
            echo "ERROR: bzread(): BZIP2 decoding failed: ", bzerrstr($bz), "\n";
            @bzclose($bz);
            return;
        }
        echo "read: ", var_export($s, true), "\n";
    }
    if( ! bzclose($bz) ){
        echo "ERROR: bzclose() failed: $php_errormsg\n";
    }
}

// Target file:
$fn = "test.bz2";

// Test 1: writes and read a good BZIP2 file:
file_put_contents($fn, bzcompress("Content of the file."));
displaysBZIP2File($fn); // works ok.

// Test 2: invalid content, not a BZIP2 file:
file_put_contents($fn, "This ia plain text file, no compression at all!");
displaysBZIP2File($fn); // ERROR: bzread(): BZIP2 decoding failed: DATA_ERROR_MAGIC

// Test 3: creates a corrupted BZIP2 file:
$plain = str_repeat("Quite random string. ", 1000); <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>c</mi><mi>o</mi><mi>m</mi><mi>p</mi><mi>r</mi><mi>e</mi><mi>s</mi><mi>s</mi><mi>e</mi><mi>d</mi><mo>=</mo><mi>b</mi><mi>z</mi><mi>c</mi><mi>o</mi><mi>m</mi><mi>p</mi><mi>r</mi><mi>e</mi><mi>s</mi><mi>s</mi><mo stretchy="false">(</mo></mrow><annotation encoding="application/x-tex">compressed = bzcompress(</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.8889em;vertical-align:-0.1944em;"></span><span class="mord mathnormal">co</span><span class="mord mathnormal">m</span><span class="mord mathnormal">p</span><span class="mord mathnormal">resse</span><span class="mord mathnormal">d</span><span class="mspace" style="margin-right:0.2778em;"></span><span class="mrel">=</span><span class="mspace" style="margin-right:0.2778em;"></span></span><span class="base"><span class="strut" style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathnormal">b</span><span class="mord mathnormal">zco</span><span class="mord mathnormal">m</span><span class="mord mathnormal">p</span><span class="mord mathnormal">ress</span><span class="mopen">(</span></span></span></span>plain); <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>c</mi><mi>o</mi><mi>m</mi><mi>p</mi><mi>r</mi><mi>e</mi><mi>s</mi><mi>s</mi><mi>e</mi><msub><mi>d</mi><mi>c</mi></msub><mi>o</mi><mi>r</mi><mi>r</mi><mi>u</mi><mi>p</mi><mi>t</mi><mi>e</mi><mi>d</mi><mo>=</mo></mrow><annotation encoding="application/x-tex">compressed_corrupted = </annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.8889em;vertical-align:-0.1944em;"></span><span class="mord mathnormal">co</span><span class="mord mathnormal">m</span><span class="mord mathnormal">p</span><span class="mord mathnormal">resse</span><span class="mord"><span class="mord mathnormal">d</span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.1514em;"><span style="top:-2.55em;margin-left:0em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathnormal mtight">c</span></span></span></span><span class="vlist-s">​</span></span><span class="vlist-r"><span class="vlist" style="height:0.15em;"><span></span></span></span></span></span></span><span class="mord mathnormal" style="margin-right:0.02778em;">orr</span><span class="mord mathnormal">u</span><span class="mord mathnormal">pt</span><span class="mord mathnormal">e</span><span class="mord mathnormal">d</span><span class="mspace" style="margin-right:0.2778em;"></span><span class="mrel">=</span></span></span></span>compressed; <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>c</mi><mi>o</mi><mi>m</mi><mi>p</mi><mi>r</mi><mi>e</mi><mi>s</mi><mi>s</mi><mi>e</mi><msub><mi>d</mi><mi>c</mi></msub><mi>o</mi><mi>r</mi><mi>r</mi><mi>u</mi><mi>p</mi><mi>t</mi><mi>e</mi><mi>d</mi><mo stretchy="false">[</mo><mo stretchy="false">(</mo><mi>i</mi><mi>n</mi><mi>t</mi><mo stretchy="false">)</mo><mo stretchy="false">(</mo><mi>s</mi><mi>t</mi><mi>r</mi><mi>l</mi><mi>e</mi><mi>n</mi><mo stretchy="false">(</mo></mrow><annotation encoding="application/x-tex">compressed_corrupted[(int)(strlen(</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathnormal">co</span><span class="mord mathnormal">m</span><span class="mord mathnormal">p</span><span class="mord mathnormal">resse</span><span class="mord"><span class="mord mathnormal">d</span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.1514em;"><span style="top:-2.55em;margin-left:0em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathnormal mtight">c</span></span></span></span><span class="vlist-s">​</span></span><span class="vlist-r"><span class="vlist" style="height:0.15em;"><span></span></span></span></span></span></span><span class="mord mathnormal" style="margin-right:0.02778em;">orr</span><span class="mord mathnormal">u</span><span class="mord mathnormal">pt</span><span class="mord mathnormal">e</span><span class="mord mathnormal">d</span><span class="mopen">[(</span><span class="mord mathnormal">in</span><span class="mord mathnormal">t</span><span class="mclose">)</span><span class="mopen">(</span><span class="mord mathnormal">s</span><span class="mord mathnormal">t</span><span class="mord mathnormal" style="margin-right:0.02778em;">r</span><span class="mord mathnormal" style="margin-right:0.01968em;">l</span><span class="mord mathnormal">e</span><span class="mord mathnormal">n</span><span class="mopen">(</span></span></span></span>compressed)/2)] = 'X'; // put random char in middle
file_put_contents($fn, $compressed_corrupted);
displaysBZIP2File($fn);
// Only after some Kbytes of garbage, it tells:
// ERROR: bzread(): BZIP2 decoding failed: DATA_ERROR

// Safe coding against headache, ever.
?>

ec10 at gmx dot net

21 years ago

<?php
/**
 * @return bool
 * @param string $in
 * @param string $out
 * @desc compressing the file with the bzip2-extension
*/
function bzip2 ($in, $out)
{
    if (!file_exists ($in) || !is_readable ($in))
        return false;
    if ((!file_exists ($out) && !is_writeable (dirname ($out)) || (file_exists($out) && !is_writable($out)) ))
        return false;
    
    <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>i</mi><msub><mi>n</mi><mi>f</mi></msub><mi>i</mi><mi>l</mi><mi>e</mi><mo>=</mo><mi>f</mi><mi>o</mi><mi>p</mi><mi>e</mi><mi>n</mi><mo stretchy="false">(</mo></mrow><annotation encoding="application/x-tex">in_file = fopen (</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.9805em;vertical-align:-0.2861em;"></span><span class="mord mathnormal">i</span><span class="mord"><span class="mord mathnormal">n</span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.3361em;"><span style="top:-2.55em;margin-left:0em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathnormal mtight" style="margin-right:0.10764em;">f</span></span></span></span><span class="vlist-s">​</span></span><span class="vlist-r"><span class="vlist" style="height:0.2861em;"><span></span></span></span></span></span></span><span class="mord mathnormal">i</span><span class="mord mathnormal" style="margin-right:0.01968em;">l</span><span class="mord mathnormal">e</span><span class="mspace" style="margin-right:0.2778em;"></span><span class="mrel">=</span><span class="mspace" style="margin-right:0.2778em;"></span></span><span class="base"><span class="strut" style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathnormal" style="margin-right:0.10764em;">f</span><span class="mord mathnormal">o</span><span class="mord mathnormal">p</span><span class="mord mathnormal">e</span><span class="mord mathnormal">n</span><span class="mopen">(</span></span></span></span>in, "rb");
    <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>o</mi><mi>u</mi><msub><mi>t</mi><mi>f</mi></msub><mi>i</mi><mi>l</mi><mi>e</mi><mo>=</mo><mi>b</mi><mi>z</mi><mi>o</mi><mi>p</mi><mi>e</mi><mi>n</mi><mo stretchy="false">(</mo></mrow><annotation encoding="application/x-tex">out_file = bzopen (</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.9805em;vertical-align:-0.2861em;"></span><span class="mord mathnormal">o</span><span class="mord mathnormal">u</span><span class="mord"><span class="mord mathnormal">t</span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.3361em;"><span style="top:-2.55em;margin-left:0em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathnormal mtight" style="margin-right:0.10764em;">f</span></span></span></span><span class="vlist-s">​</span></span><span class="vlist-r"><span class="vlist" style="height:0.2861em;"><span></span></span></span></span></span></span><span class="mord mathnormal">i</span><span class="mord mathnormal" style="margin-right:0.01968em;">l</span><span class="mord mathnormal">e</span><span class="mspace" style="margin-right:0.2778em;"></span><span class="mrel">=</span><span class="mspace" style="margin-right:0.2778em;"></span></span><span class="base"><span class="strut" style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathnormal">b</span><span class="mord mathnormal">zo</span><span class="mord mathnormal">p</span><span class="mord mathnormal">e</span><span class="mord mathnormal">n</span><span class="mopen">(</span></span></span></span>out, "wb");
    
    while (!feof ($in_file)) {
        <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>b</mi><mi>u</mi><mi>f</mi><mi>f</mi><mi>e</mi><mi>r</mi><mo>=</mo><mi>f</mi><mi>g</mi><mi>e</mi><mi>t</mi><mi>s</mi><mo stretchy="false">(</mo></mrow><annotation encoding="application/x-tex">buffer = fgets (</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.8889em;vertical-align:-0.1944em;"></span><span class="mord mathnormal">b</span><span class="mord mathnormal">u</span><span class="mord mathnormal" style="margin-right:0.10764em;">ff</span><span class="mord mathnormal" style="margin-right:0.02778em;">er</span><span class="mspace" style="margin-right:0.2778em;"></span><span class="mrel">=</span><span class="mspace" style="margin-right:0.2778em;"></span></span><span class="base"><span class="strut" style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathnormal" style="margin-right:0.10764em;">f</span><span class="mord mathnormal" style="margin-right:0.03588em;">g</span><span class="mord mathnormal">e</span><span class="mord mathnormal">t</span><span class="mord mathnormal">s</span><span class="mopen">(</span></span></span></span>in_file, 4096);
         bzwrite ($out_file, $buffer, 4096);
    }

    fclose ($in_file);
    bzclose ($out_file);
    
    return true;
}

/**
 * @return bool
 * @param string $in
 * @param string $out
 * @desc uncompressing the file with the bzip2-extension
*/
function bunzip2 ($in, $out)
{
    if (!file_exists ($in) || !is_readable ($in))
        return false;
    if ((!file_exists ($out) && !is_writeable (dirname ($out)) || (file_exists($out) && !is_writable($out)) ))
        return false;

    <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>i</mi><msub><mi>n</mi><mi>f</mi></msub><mi>i</mi><mi>l</mi><mi>e</mi><mo>=</mo><mi>b</mi><mi>z</mi><mi>o</mi><mi>p</mi><mi>e</mi><mi>n</mi><mo stretchy="false">(</mo></mrow><annotation encoding="application/x-tex">in_file = bzopen (</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.9805em;vertical-align:-0.2861em;"></span><span class="mord mathnormal">i</span><span class="mord"><span class="mord mathnormal">n</span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.3361em;"><span style="top:-2.55em;margin-left:0em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathnormal mtight" style="margin-right:0.10764em;">f</span></span></span></span><span class="vlist-s">​</span></span><span class="vlist-r"><span class="vlist" style="height:0.2861em;"><span></span></span></span></span></span></span><span class="mord mathnormal">i</span><span class="mord mathnormal" style="margin-right:0.01968em;">l</span><span class="mord mathnormal">e</span><span class="mspace" style="margin-right:0.2778em;"></span><span class="mrel">=</span><span class="mspace" style="margin-right:0.2778em;"></span></span><span class="base"><span class="strut" style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathnormal">b</span><span class="mord mathnormal">zo</span><span class="mord mathnormal">p</span><span class="mord mathnormal">e</span><span class="mord mathnormal">n</span><span class="mopen">(</span></span></span></span>in, "rb");
    <span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mi>o</mi><mi>u</mi><msub><mi>t</mi><mi>f</mi></msub><mi>i</mi><mi>l</mi><mi>e</mi><mo>=</mo><mi>f</mi><mi>o</mi><mi>p</mi><mi>e</mi><mi>n</mi><mo stretchy="false">(</mo></mrow><annotation encoding="application/x-tex">out_file = fopen (</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.9805em;vertical-align:-0.2861em;"></span><span class="mord mathnormal">o</span><span class="mord mathnormal">u</span><span class="mord"><span class="mord mathnormal">t</span><span class="msupsub"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:0.3361em;"><span style="top:-2.55em;margin-left:0em;margin-right:0.05em;"><span class="pstrut" style="height:2.7em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mathnormal mtight" style="margin-right:0.10764em;">f</span></span></span></span><span class="vlist-s">​</span></span><span class="vlist-r"><span class="vlist" style="height:0.2861em;"><span></span></span></span></span></span></span><span class="mord mathnormal">i</span><span class="mord mathnormal" style="margin-right:0.01968em;">l</span><span class="mord mathnormal">e</span><span class="mspace" style="margin-right:0.2778em;"></span><span class="mrel">=</span><span class="mspace" style="margin-right:0.2778em;"></span></span><span class="base"><span class="strut" style="height:1em;vertical-align:-0.25em;"></span><span class="mord mathnormal" style="margin-right:0.10764em;">f</span><span class="mord mathnormal">o</span><span class="mord mathnormal">p</span><span class="mord mathnormal">e</span><span class="mord mathnormal">n</span><span class="mopen">(</span></span></span></span>out, "wb");

    while ($buffer = bzread ($in_file, 4096)) {
        fwrite ($out_file, $buffer, 4096);
    }
 
    bzclose ($in_file);
    fclose ($out_file);
    
    return true;
}
?>