Source code for crx_unpack

# *-* coding: utf-8 *-*
"""
The purpose of this module is to mimic how Google Chrome unpacks CRX files
as closely as possible. Involved in this is the need to remove the CRX headers
(see the structure details of CRXs on the :doc:`Home <index>` page), separate
the underlying ZIP file, extract the contents of the ZIP file, among other
things.

For end users, the only function you should need to call is :py:func:`unpack`,
which will handle each of the steps mentioned above.
"""

import codecs
import logging
import os
import re
from os import path
from shutil import rmtree
from struct import Struct
from subprocess import check_call, CalledProcessError
from tempfile import NamedTemporaryFile
from zipfile import ZipFile, BadZipFile

try:
    from PIL import Image
except ImportError:
    FORBID_IMG_CONVERT = True

    # Just in case PIL isn't available *and* someone calls convert_imgs() directly...
    class Image:
        @staticmethod
        def open():
            # To understand why this raises this exception, see the convert_imgs() function
            raise OSError
else:
    FORBID_IMG_CONVERT = False

__all__ = ['unpack', 'BadCrxHeader', 'BadZipFile']

with open(path.abspath(path.join(path.dirname(__file__), 'VERSION'))) as _v:
    __version__ = _v.read().strip()

HEADER_FMT = Struct('<4s3I')
CONVERT_IMAGES = False
ERR_TYPE = (None,
            BadZipFile,
            MemoryError,
            IndexError,
            IsADirectoryError,
            NotADirectoryError)
DIR_MODE = 700
FILE_MODE = 644
JUST_EXIT = False


[docs]class BadCrxHeader(Exception): """Raised when a CRX's header length or values aren't valid."""
[docs]def unpack(crx_file, ext_dir=None, *, overwrite_if_exists=False, img_tallies=None, test_contents=True, passwd=None, skip_img_formats=None, unpack_in_subprocess=False, convert_in_subprocess=True, do_convert=CONVERT_IMAGES, zip_dir=None): """Unpack the CRX and extract it in the directory at ext_dir. Return the absolute, normalized path to the extraction directory (useful if it wasn't given as a parameter). As part of the unpacking process, this function will create a duplicate of the CRX but with the headers removed. This is technically a temporary file and will not persist past a reboot of the machine. However, because this ZIP file may be of interest to users, it is not deleted after the unpacking process is complete. To discover the path to this file, you'll need to either (1) set the ``zip_dir`` parameter yourself, or (2) set the logging level to ``DEBUG``. :param str crx_file: Path to the CRX file. :param str ext_dir: Directory where to extract the contents. :param str zip_dir: Directory where to store the ZIP file after removing the Chrome headers. Defaults to ``ext_dir/../``. :param bool overwrite_if_exists: When extracting to a directory that already exists, unpack will normally fail. Setting this to True will delete the contents of the destination directory before unzipping. :param dict img_tallies: A dictionary for storing the number of each type of image file converted during the unpacking process. :param bool test_contents: When unpacking the CRX, use the zipfile module's test feature to test the validity of the embedded zip file before extraction. :param str passwd: Optional password to use when extracting the CRX. If the CRX was obtained from Google's `Chrome Web Store <https://chrome.google.com/webstore>`_, you should *not* need this. If you provide a password here, it will be passed on to the :py:func:`extract_zip` function. :param skip_img_formats: The image formats to skip when attempting to convert them to PNG. This will typically include the strings ICO, PNG, and WEBP. :type skip_img_formats: list or tuple :param bool unpack_in_subprocess: Flag indicating if the job of unpacking the CRX should be done in a subprocess rather than calling the function directly. Usually this shouldn't need to be set as it will only hinder performance. :param bool convert_in_subprocess: Flag indicating if the job of converting the images in the CRX should be done in a subprocess rather than calling the function directly. Usually this SHOULD be set, since converting images can sometimes cause a segmentation fault, which kills the whole process. :param bool do_convert: Flag indicating whether images should be converted during the unpacking process (intended to mimic Chrome's unpacking process more closely). :return: Directory where the archive was extracted. :rtype: str """ if img_tallies is None: # This means that the calling function won't have access to these numbers, but for consistency's sake we'll # store them anyway. img_tallies = {} if skip_img_formats is None: skip_img_formats = [] # Make sure the file exists, get basic info about it crx_file = path.abspath(crx_file) crx_dir, base = path.split(crx_file) crx_size = path.getsize(crx_file) # Raises an error for us if the file doesn't exist if not re.search('\.crx$', base): raise OSError('File has unsupported extension, expected ".crx"') # Make sure directory exists for extracting the CRX if ext_dir is None: ext_dir = path.join(crx_dir, base.rsplit('.', 1)[0]) ext_dir = path.abspath(ext_dir) if path.isdir(ext_dir): if overwrite_if_exists: # Delete the entire directory and its contents. Ignore errors because the files will likely # be overwritten upon unzip anyway. rmtree(ext_dir, ignore_errors=True) else: err = FileExistsError() err.errno = '' err.strerror = 'Cannot unpack CRX to directory that already exists' err.filename = ext_dir raise err # Make sure directory exists for storing the zip file if zip_dir is None: zip_dir = path.join(ext_dir, '..') zip_dir = path.abspath(zip_dir) zip_path, signature, pub_key = [None] * 3 with open(crx_file, 'rb') as fin: header_vals = fin.read(4 * 4) # 4 values, each 4 bytes (32 bits) long if len(header_vals) < 16: raise BadCrxHeader('Invalid header length') magic, version, pup_key_len, sig_len = HEADER_FMT.unpack(header_vals) if magic != b'Cr24': raise BadCrxHeader('Invalid magic number: %s' % codecs.encode(magic, 'hex').decode('utf-8')) if version != 2: raise BadCrxHeader('Invalid version number: %d' % version) # Read in the public key and signature pub_key = fin.read(pup_key_len) signature = fin.read(sig_len) # TODO: Add verification methods for the public key and the signature # verify_pub_key(pub_key) # verify_signature(signature) # Detach zip file with NamedTemporaryFile('wb', dir=zip_dir, suffix='.zip', delete=False) as fout: zip_path = fout.name logging.debug('Created a named temp file at: {}'.format(zip_path)) # Read the rest of the file and save it as a .zip fout.write(fin.read()) if None in (zip_path, signature, pub_key): raise IOError('Could not separate zip file from the CRX.') path.getsize(zip_path) # Raises an error for us if the file doesn't exist # Extract the zip file if unpack_in_subprocess: prog = ['python3', __file__] if not test_contents: prog.append('-t') prog += ['xo', zip_path, ext_dir] if passwd is not None: prog.append(passwd) try: check_call(prog) except CalledProcessError as err: if 0 < err.returncode < len(ERR_TYPE): e = ERR_TYPE[err.returncode]() logging.warning('Got error of type "%s" while unpacking file at: %s' % (e.__class__.__name__, ext_dir)) # Re-raise the original exception raise ERR_TYPE[err.returncode] logging.warning('Got error of unknown type. Return code was: %d' % err.returncode) raise else: extract_zip(zip_path, ext_dir, pwd=passwd, test_contents=test_contents) if do_convert and not FORBID_IMG_CONVERT: if convert_in_subprocess: # Get the logger info so the subprocess can recreate it log_obj = logging.getLogger() fmt = log_obj.handlers[0].formatter._fmt log_file = log_obj.handlers[0].baseFilename level = log_obj.level # Create the subprocess prog = ['python3', __file__, 'convert', ext_dir, '--log-file=%s' % log_file, '--log-level=%s' % level, '--log-fmt=%s' % fmt] for f in skip_img_formats: prog += ['-s', f] try: check_call(prog) except CalledProcessError: logging.warning('Image conversion subprocess failed while unpacking %s' % crx_file) with open('failed_conversions.txt', 'a') as fout: fout.write(crx_file + '\n') else: convert_imgs(ext_dir, img_tallies=img_tallies, skip_other=skip_img_formats) set_mode(ext_dir) # Set mode after converting in case PIL changes things return ext_dir
[docs]def extract_zip(zip_file, extract_dir, pwd=None, test_contents=True, reraise_errors=not JUST_EXIT): """Simple wrapper around the Python zipfile.ZipFile class. Typically, it is not necessary to call this function directly from anywhere other than the :py:func:`unpack` function. :param str zip_file: Path to the zip file to be extracted. :param str extract_dir: Directory where the contents will be extracted. :param str pwd: Password for the zip file. :param bool test_contents: Whether to use the library's testzip() function on the archive before extracting. Tests if the CRC and header of each file in the archive are valid. :param bool reraise_errors: Set to False when the ``unpack`` script is run with the `xo` (extract only) command, in which case the function will return a non-zero value when an error occurs. The default, False, indicates that any errors that come up should just be re-raised. :rtype: None """ try: zip_obj = ZipFile(zip_file) if test_contents and zip_obj.testzip() is not None: # A file's CRC and/or header was invalid raise BadZipFile zip_obj.extractall(extract_dir, pwd=pwd) except ERR_TYPE[1:] as err: if reraise_errors: raise for i in range(1, len(ERR_TYPE)): if isinstance(err, ERR_TYPE[i]): exit(i)
def set_mode(base_dir, file_mode=FILE_MODE, dir_mode=DIR_MODE): """Set file and dir permissions for everything under base_dir. :param str base_dir: Top directory where to start working on changing the file and dir modes. :param int file_mode: The permissions number to give all files in octal. The default is what Chrome OS uses on files. :param int dir_mode: The permissions number to give all dirs in octal. The default is what Chrome OS uses on dirs. :rtype: None """ # Verify we're running in POSIX system first. No need to do this if we're in Windows. if os.name != 'posix': return # These are the file and dir permissions to set. File: 644 Dir: 700 file_mode = _mode_from_num(file_mode) dir_mode = _mode_from_num(dir_mode) for root, dirs, files in os.walk(base_dir): for name in files: os.chmod(path.join(root, name), file_mode) for name in dirs: os.chmod(path.join(root, name), dir_mode) def _mode_from_num(num): """Return the ORed stat objects representing the octal number num. :param int num: Permissions number in octal, e.g. 644. :return: The equivalent of bitwise ORing the permission constants in the stat library. :rtype: int """ assert num > 100 # The user should at least be able to read the file... usr = int(num / 100) grp = int(num / 10) - usr * 10 oth = num % 10 return usr << 6 | grp << 3 | oth def convert_imgs(base_dir, skip_gifs=True, skip_other=None, img_tallies=None): """Convert all images under base_dir to PNG format. Just like Chrome, the file extension remains unchanged. Also, GIFs are skipped to preserve their animations if skip_gifs is True. :param str base_dir: The directory to walk through. :param bool skip_gifs: When True, GIFs won't be converted to preserve their animations. :param skip_other: The image formats to skip when attempting to convert them to PNG. This will typically include the strings ICO, PNG, and WEBP. :type skip_other: list|tuple :param dict img_tallies: A dictionary for storing the number of each type of image file converted during the unpacking process. :rtype: None """ if skip_other is None: skip_other = [] if img_tallies is None: # This means that the calling function won't have access to these numbers, but for consistency's sake we'll # store them anyway. img_tallies = {} for root, dirs, files in os.walk(base_dir): for name in files: fname = path.join(root, name) # Check that it the file has a non-zero size if not path.getsize(fname): continue try: img = Image.open(fname) except OSError: # Means the file isn't an image pass except: logging.warning('Got unhandled exception during image conversion of file: %s' % fname, exc_info=1) else: # Increase the tally for this image type f = img.format if f not in img_tallies.keys(): img_tallies[f] = 0 img_tallies[f] += 1 # Don't attempt to convert certain types of images if skip_gifs and f == 'GIF': continue if f in skip_other: continue try: # The save will fail in certain cases if the image isn't converted to RGBA mode, which is a # normal RGB mode but with transparency. The palette of 'WEB' is a guess, but seemed a better # option than the other one available for that function. img.convert(mode='RGBA', palette='WEB').save(fname, format='PNG') except OSError: # Means the file isn't an image or has no length pass except: logging.warning('Got unhandled exception while SAVING a converted image: %s' % fname, exc_info=1) def verify_pub_key(pub_key): raise NotImplementedError def verify_signature(sig): raise NotImplementedError