Source code for engineer.engine

# coding=utf-8
import argparse
import filecmp
import gzip
import logging
import os
import sys
import time
from codecs import open

import times
from feedgenerator import Rss201rev2Feed, Atom1Feed
from path import path

from engineer.exceptions import ThemeNotFoundException
from engineer.filters import naturaltime
from engineer.log import get_console_handler, bootstrap
from engineer.plugins import CommandPlugin, load_plugins
from engineer.util import relpath, compress, has_files, diff_dir
from engineer import version

try:
    import cPickle as pickle
except ImportError:
    import pickle

__author__ = 'Tyler Butler <tyler@tylerbutler.com>'


#noinspection PyUnusedLocal
def clean(args=None):
    from engineer.conf import settings

    logger = logging.getLogger('engineer.engine.clean')

    # Expand the ignore list to be full paths
    ignore_list = [path(settings.OUTPUT_DIR / i).normpath() for i in settings.OUTPUT_DIR_IGNORE]
    ignore_dirs = [p for p in ignore_list if p.isdir()]
    ignore_files = []
    for the_dir in ignore_dirs:
        ignore_files.extend([f.normpath() for f in the_dir.walkfiles()])
    ignore_files.extend([p.normpath() for p in ignore_list if p.isfile()])

    # Delete all FILES that are not ignored
    if settings.OUTPUT_DIR.exists():
        for p in settings.OUTPUT_DIR.walkfiles():
            if p in ignore_files:
                continue
            else:
                p.remove()

    # Delete all directories with no files. All non-ignored files were already deleted so every directory
    # except those that were ignored will be empty.
    for dirpath, dirnames, filenames in os.walk(settings.OUTPUT_DIR.normpath()):
        dirpath = path(dirpath)
        if dirpath != settings.OUTPUT_DIR:
            if not has_files(dirpath):
                # no files under this entire path, so can call rmtree
                # noinspection PyArgumentList
                dirpath.rmtree()
                del dirnames[:]
            elif dirpath in ignore_list:
                # we don't need to descend into the subdirs if this dir is in the ignore list
                del dirnames[:]

    try:
        settings.OUTPUT_CACHE_DIR.rmtree()
        settings.CACHE_DIR.rmtree()
        settings.ENGINEER.JINJA_CACHE_DIR.rmtree()
    except OSError as we:
        if hasattr(we, 'winerror') and we.winerror not in (2, 3):
            logger.exception(we.message)
        else:
            logger.warning("Couldn't find output directory: %s" % we.filename)

    logger.console('Cleaned output directory: %s' % settings.OUTPUT_DIR)


#noinspection PyShadowingBuiltins
[docs]def build(args=None):
    """Builds an Engineer site using the settings specified in *args*."""
    from engineer.conf import settings
    from engineer.loaders import LocalLoader
    from engineer.log import get_file_handler
    from engineer.models import PostCollection, TemplatePage
    from engineer.themes import ThemeManager
    from engineer.util import mirror_folder, ensure_exists, slugify

    if args and args.clean:
        clean()

    settings.create_required_directories()

    logger = logging.getLogger('engineer.engine.build')
    logger.parent.addHandler(get_file_handler(settings.LOG_FILE))

    logger.debug("Starting build using configuration file %s." % settings.SETTINGS_FILE)

    build_stats = {
        'time_run': times.now(),
        'counts': {
            'template_pages': 0,
            'new_posts': 0,
            'cached_posts': 0,
            'rollups': 0,
            'tag_pages': 0,
        },
        'files': {},
    }

    # Remove the output cache (not the post cache or the Jinja cache)
    # since we're rebuilding the site
    settings.OUTPUT_CACHE_DIR.rmtree(ignore_errors=True)

    theme = ThemeManager.current_theme()
    engineer_lib = (settings.OUTPUT_STATIC_DIR / 'engineer/lib/').abspath()
    ensure_exists(engineer_lib)
    # Copy Foundation files if used
    if theme.use_foundation:
        s = settings.ENGINEER.LIB_DIR / settings.ENGINEER.FOUNDATION_CSS
        t = ensure_exists(engineer_lib / settings.ENGINEER.FOUNDATION_CSS)
        mirror_folder(s, t)
        logger.debug("Copied Foundation library files.")

    # Copy LESS js file if needed
    if theme.use_lesscss and not settings.PREPROCESS_LESS:
        s = settings.ENGINEER.LIB_DIR / settings.ENGINEER.LESS_JS
        s.copy(engineer_lib)
        logger.debug("Copied LESS CSS files.")

    # Copy jQuery files if needed
    if theme.use_jquery:
        s = settings.ENGINEER.LIB_DIR / settings.ENGINEER.JQUERY
        s.copy(engineer_lib)
        logger.debug("Copied jQuery files.")

    # Copy modernizr files if needed
    if theme.use_modernizr:
        s = settings.ENGINEER.LIB_DIR / settings.ENGINEER.MODERNIZR
        s.copy(engineer_lib)
        logger.debug("Copied Modernizr files.")

    # Copy normalize.css if needed
    if theme.use_normalize_css:
        s = settings.ENGINEER.LIB_DIR / settings.ENGINEER.NORMALIZE_CSS
        s.copy(engineer_lib)
        logger.debug("Copied normalize.css.")

    # Copy 'raw' content to output cache - first pass
    # This first pass ensures that any static content - JS/LESS/CSS - that
    # is needed by site-specific pages (like template pages) is available
    # during the build
    if settings.CONTENT_DIR.exists():
        mirror_folder(settings.CONTENT_DIR,
                      settings.OUTPUT_CACHE_DIR,
                      delete_orphans=False)

    # Copy theme static content to output dir
    theme_output_dir = settings.OUTPUT_STATIC_DIR / 'theme'
    logger.debug("Copying theme static files to output cache.")
    theme.copy_content(theme_output_dir)
    logger.debug("Copied static files for theme to %s." % relpath(theme_output_dir))

    # Copy any theme additional content to output dir if needed
    if theme.content_mappings:
        logger.debug("Copying additional theme content to output cache.")
        theme.copy_related_content(theme_output_dir)
        logger.debug("Copied additional files for theme to %s." % relpath(theme_output_dir))

    # Load markdown input posts
    logger.info("Loading posts...")
    new_posts, cached_posts = LocalLoader.load_all(input=settings.POST_DIR)
    all_posts = PostCollection(new_posts + cached_posts)

    to_publish = PostCollection(all_posts.published)
    if settings.PUBLISH_DRAFTS:
        to_publish.extend(all_posts.drafts)
    if settings.PUBLISH_PENDING:
        to_publish.extend(all_posts.pending)
    if settings.PUBLISH_REVIEW:
        to_publish.extend(all_posts.review)

    if not settings.PUBLISH_PENDING and len(all_posts.pending) > 0:
        logger.warning("This site contains the following pending posts:")
        for post in all_posts.pending:
            logger.warning("\t'%s' - publish time: %s, %s." % (post.title,
                                                               naturaltime(post.timestamp),
                                                               post.timestamp_local))
        logger.warning("These posts won't be published until you build the site again after their publish time.")

    all_posts = PostCollection(
        sorted(to_publish, reverse=True, key=lambda p: p.timestamp))

    # Generate template pages
    if settings.TEMPLATE_PAGE_DIR.exists():
        logger.info("Generating template pages from %s." % settings.TEMPLATE_PAGE_DIR)
        template_pages = []
        for template in settings.TEMPLATE_PAGE_DIR.walkfiles('*.html'):
            # We create all the TemplatePage objects first so we have all of the URLs to them in the template
            # environment. Without this step, template pages might have broken links if they link to a page that is
            # loaded after them, since the URL to the not-yet-loaded page will be missing.
            template_pages.append(TemplatePage(template))
        for page in template_pages:
            rendered_page = page.render_html(all_posts)
            ensure_exists(page.output_path)
            with open(page.output_path / page.output_file_name, mode='wb',
                      encoding='UTF-8') as the_file:
                the_file.write(rendered_page)
                logger.info("Output template page %s." % relpath(the_file.name))
                build_stats['counts']['template_pages'] += 1
        logger.info("Generated %s template pages." % build_stats['counts']['template_pages'])

    # Generate individual post pages
    for post in all_posts:
        rendered_post = post.render_html(all_posts)
        ensure_exists(post.output_path)
        with open(post.output_path, mode='wb',
                  encoding='UTF-8') as the_file:
            the_file.write(rendered_post)
            if post in new_posts:
                logger.console("Output new or modified post '%s'." % post.title)
                build_stats['counts']['new_posts'] += 1
            elif post in cached_posts:
                build_stats['counts']['cached_posts'] += 1

    # Generate rollup pages
    num_posts = len(all_posts)
    num_slices = (
        num_posts / settings.ROLLUP_PAGE_SIZE) if num_posts % settings.ROLLUP_PAGE_SIZE == 0 \
        else (num_posts / settings.ROLLUP_PAGE_SIZE) + 1

    slice_num = 0
    for posts in all_posts.paginate():
        slice_num += 1
        has_next = slice_num < num_slices
        has_previous = 1 < slice_num <= num_slices
        rendered_page = posts.render_listpage_html(slice_num, has_next,
                                                   has_previous)
        ensure_exists(posts.output_path(slice_num))
        with open(posts.output_path(slice_num), mode='wb',
                  encoding='UTF-8') as the_file:
            the_file.write(rendered_page)
            logger.debug("Output rollup page %s." % relpath(the_file.name))
            build_stats['counts']['rollups'] += 1

        # Copy first rollup page to root of site - it's the homepage.
        if slice_num == 1:
            path.copyfile(posts.output_path(slice_num),
                          settings.OUTPUT_CACHE_DIR / 'index.html')
            logger.debug(
                "Output '%s'." % (settings.OUTPUT_CACHE_DIR / 'index.html'))

    # Generate archive page
    if num_posts > 0:
        archive_output_path = settings.OUTPUT_CACHE_DIR / 'archives/index.html'
        ensure_exists(archive_output_path)

        rendered_archive = all_posts.render_archive_html(all_posts)

        with open(archive_output_path, mode='wb', encoding='UTF-8') as the_file:
            the_file.write(rendered_archive)
            logger.debug("Output %s." % relpath(the_file.name))

    # Generate tag pages
    if num_posts > 0:
        tags_output_path = settings.OUTPUT_CACHE_DIR / 'tag'
        for tag in all_posts.all_tags:
            rendered_tag_page = all_posts.render_tag_html(tag, all_posts)
            tag_path = ensure_exists(
                tags_output_path / slugify(tag) / 'index.html')
            with open(tag_path, mode='wb', encoding='UTF-8') as the_file:
                the_file.write(rendered_tag_page)
                build_stats['counts']['tag_pages'] += 1
                logger.debug("Output %s." % relpath(the_file.name))

    # Generate feeds
    rss_feed_output_path = ensure_exists(settings.OUTPUT_CACHE_DIR / 'feeds/rss.xml')
    atom_feed_output_path = ensure_exists(settings.OUTPUT_CACHE_DIR / 'feeds/atom.xml')
    rss_feed = Rss201rev2Feed(
        title=settings.FEED_TITLE,
        link=settings.SITE_URL,
        description=settings.FEED_DESCRIPTION,
        feed_url=settings.FEED_URL
    )

    atom_feed = Atom1Feed(
        title=settings.FEED_TITLE,
        link=settings.SITE_URL,
        description=settings.FEED_DESCRIPTION,
        feed_url=settings.FEED_URL
    )

    for feed in (rss_feed, atom_feed):
        for post in all_posts[:settings.FEED_ITEM_LIMIT]:
            title = settings.JINJA_ENV.get_template('core/feeds/title.jinja2').render(post=post)
            link = settings.JINJA_ENV.get_template('core/feeds/link.jinja2').render(post=post)
            content = settings.JINJA_ENV.get_template('core/feeds/content.jinja2').render(post=post)
            feed.add_item(
                title=title,
                link=link,
                description=content,
                pubdate=post.timestamp,
                unique_id=post.absolute_url)

    with open(rss_feed_output_path, mode='wb') as the_file:
        rss_feed.write(the_file, 'UTF-8')
        logger.debug("Output %s." % relpath(the_file.name))

    with open(atom_feed_output_path, mode='wb') as the_file:
        atom_feed.write(the_file, 'UTF-8')
        logger.debug("Output %s." % relpath(the_file.name))

    # Generate sitemap
    sitemap_file_name = 'sitemap.xml.gz'
    sitemap_output_path = ensure_exists(settings.OUTPUT_CACHE_DIR / sitemap_file_name)
    sitemap_content = settings.JINJA_ENV.get_or_select_template(['sitemap.xml',
                                                                 'theme/sitemap.xml',
                                                                 'core/sitemap.xml']).render(post_list=all_posts)
    with gzip.open(sitemap_output_path, mode='wb') as the_file:
        the_file.write(sitemap_content)
        logger.debug("Output %s." % relpath(the_file.name))

    # Copy 'raw' content to output cache - second/final pass
    if settings.CONTENT_DIR.exists():
        mirror_folder(settings.CONTENT_DIR,
                      settings.OUTPUT_CACHE_DIR,
                      delete_orphans=False)

    # Compress all files marked for compression
    for the_file, compression_type in settings.COMPRESS_FILE_LIST:
        if the_file not in settings.COMPRESSION_CACHE:
            with open(the_file, mode='rb') as input:
                output = compress(input.read(), compression_type)
                logger.debug("Compressed %s." % relpath(the_file))
            settings.COMPRESSION_CACHE[the_file] = output
        else:
            logger.debug("Found pre-compressed file in cache: %s." % relpath(the_file))
            output = settings.COMPRESSION_CACHE[the_file]
        with open(the_file, mode='wb') as f:
            f.write(output)

    # Remove LESS files if LESS preprocessing is being done
    if settings.PREPROCESS_LESS:
        logger.debug("Deleting LESS files since PREPROCESS_LESS is True.")
        for f in settings.OUTPUT_STATIC_DIR.walkfiles(pattern="*.less"):
            logger.debug("Deleting file: %s." % relpath(f))
            f.remove_p()

    # Check if anything has changed other than the sitemap
    have_changes = False
    compare = filecmp.dircmp(settings.OUTPUT_CACHE_DIR,
                             settings.OUTPUT_DIR,
                             ignore=settings.OUTPUT_DIR_IGNORE)

    # The algorithm below takes advantage of the fact that once we've determined that there is more than one file
    # that's different, or if the first item returned by the generator is not the sitemap, then we can break out of
    # the generator loop early. This is also advantageous because it doesn't require us to completely exhaust the
    # generator. In the case of a fresh site build, for example, the generator will return a lot more data. So the
    # other approach here of expanding the generator into a list with a list comprehension would be inefficient
    # in many cases. This approach performs equally well in all cases at the cost of some unusual-looking code.
    diff_file_count = 0
    if not has_files(settings.OUTPUT_DIR):
        have_changes = True
    else:
        for file_path in diff_dir(compare):
            diff_file_count += 1
            if file_path != sitemap_output_path:
                have_changes = True
                break
            if diff_file_count > 1:
                have_changes = True
                break

    if not have_changes:
        logger.console('')
        logger.console("No site changes to publish.")
    else:
        logger.debug("Synchronizing output directory with output cache.")
        build_stats['files'] = mirror_folder(settings.OUTPUT_CACHE_DIR,
                                             settings.OUTPUT_DIR,
                                             ignore_list=settings.OUTPUT_DIR_IGNORE)
        from pprint import pformat

        logger.debug("Folder mirroring report: %s" % pformat(build_stats['files']))
        logger.console('')
        logger.console("Site: '%s' output to %s." % (settings.SITE_TITLE, settings.OUTPUT_DIR))
        logger.console("Posts: %s (%s new or updated)" % (
            (build_stats['counts']['new_posts'] + build_stats['counts']['cached_posts']),
            build_stats['counts']['new_posts']))
        logger.console("Post rollup pages: %s (%s posts per page)" % (
            build_stats['counts']['rollups'], settings.ROLLUP_PAGE_SIZE))
        logger.console("Template pages: %s" % build_stats['counts']['template_pages'])
        logger.console("Tag pages: %s" % build_stats['counts']['tag_pages'])
        logger.console("%s new items, %s modified items, and %s deleted items." % (
            len(build_stats['files']['new']),
            len(build_stats['files']['overwritten']),
            len(build_stats['files']['deleted'])))

    logger.console('')
    logger.console("Full build log at %s." % settings.LOG_FILE)
    logger.console('')

    with open(settings.BUILD_STATS_FILE, mode='wb') as the_file:
        pickle.dump(build_stats, the_file)
    settings.CACHE.close()
    return build_stats


def serve(args):
    import bottle
    from engineer.conf import settings
    from engineer import emma

    logger = logging.getLogger(__name__)

    if not settings.OUTPUT_DIR.exists():
        logger.warning(
            "Output directory doesn't exist - did you forget to run 'engineer build'?")
        exit()

    debug_server = bottle.Bottle()
    debug_server.mount('/_emma', emma.Emma().app)

    #noinspection PyUnresolvedReferences,PyUnusedLocal
    @debug_server.route('/')
    @debug_server.route('/<filepath:path>')
    def serve_static(filepath='index.html'):
        if settings.HOME_URL != '/':
            # if HOME_URL is not root, we need to adjust the paths
            if filepath.startswith(settings.HOME_URL[1:]):
                filepath = filepath[len(settings.HOME_URL) - 1:]
            else:
                return bottle.HTTPResponse(status=404)
        response = bottle.static_file(filepath, root=settings.OUTPUT_DIR)
        if type(response) is bottle.HTTPError:
            return bottle.static_file(path(filepath) / 'index.html',
                                      root=settings.OUTPUT_DIR)
        else:
            return response

    bottle.debug(True)
    bottle.run(app=debug_server, host='0.0.0.0', port=args.port, reloader=True)


def start_emma(args):
    from engineer import emma

    logger = logging.getLogger('engineer.engine.start_emma')

    em = emma.EmmaStandalone()
    try:
        if args.prefix:
            em.emma_instance.prefix = args.prefix
        if args.generate:
            em.emma_instance.generate_secret()
            logger.console(
                "New Emma URL: %s" % em.emma_instance.get_secret_path(True))
        elif args.url:
            logger.console(
                "Current Emma URL: %s" % em.emma_instance.get_secret_path(True))
        elif args.run:
            em.run(port=args.port)
    except emma.NoSecretException:
        logger.warning(
            "You haven't created a secret for Emma yet. Try 'engineer emma --generate' first.")
    exit()


def init(args):
    from engineer import __file__ as package_file

    logger = logging.getLogger('engineer.engine.init')

    sample_site_path = path(package_file).dirname() / ('sample_site/%s' % args.mode)
    target = path.getcwd()
    if target.listdir() and not args.force:
        logger.warning("Target folder %s is not empty." % target)
        exit()
    elif args.force:
        logger.info("Deleting folder contents.")
        try:
            for item in target.dirs():
                item.rmtree()
            for item in target.files():
                item.remove()
        except Exception as e:
            logger.error("Couldn't delete folder contents - aborting.")
            logger.exception(e)
            exit()

    from engineer.util import mirror_folder, ensure_exists

    if args.sample:
        mirror_folder(sample_site_path, target)
    else:
        ensure_exists(target / 'posts')
        ensure_exists(target / 'content')
        ensure_exists(target / 'templates')
        mirror_folder(sample_site_path, target, recurse=False)
    logger.console("Initialization complete.")
    exit()


def get_argparser():
    # Common parameters
    common_parser = argparse.ArgumentParser(add_help=False)
    common_parser.add_argument('-v', '--verbose',
                               dest='verbose',
                               action='count',
                               help="Display verbose output.")
    common_parser.add_argument('-s', '--config', '--settings',
                               dest='config_file',
                               help="Specify a configuration file to use.")

    desc = "Engineer static site builder. [v%s, %s %s]" % (version, version.date, time.strftime('%X', version.time))
    main_parser = argparse.ArgumentParser(description=desc)
    subparsers = main_parser.add_subparsers(title="subcommands",
                                            dest='parser_name')

    parser_build = subparsers.add_parser('build',
                                         help="Build the site.",
                                         parents=[common_parser])
    parser_build.add_argument('-c', '--clean',
                              dest='clean',
                              action='store_true',
                              help="Clean the output directory and clear all the caches before building.")
    parser_build.set_defaults(func=build)

    parser_clean = subparsers.add_parser('clean',
                                         help="Clean the output directory and clear all caches.",
                                         parents=[common_parser])
    parser_clean.set_defaults(func=clean)

    parser_serve = subparsers.add_parser('serve',
                                         help="Start the development server.",
                                         parents=[common_parser])
    parser_serve.add_argument('-p', '--port',
                              type=int,
                              default=8000,
                              dest='port',
                              help="The port the development server should listen on.")
    parser_serve.set_defaults(func=serve)

    parser_emma = subparsers.add_parser('emma',
                                        help="Start Emma, the built-in management server.",
                                        parents=[common_parser])
    parser_emma.add_argument('-p', '--port',
                             type=int,
                             default=8080,
                             dest='port',
                             help="The port Emma should listen on.")
    parser_emma.add_argument('--prefix',
                             type=str,
                             dest='prefix',
                             help="The prefix path the Emma site will be rooted at.")
    emma_options = parser_emma.add_mutually_exclusive_group(required=True)
    emma_options.add_argument('-r', '--run',
                              dest='run',
                              action='store_true',
                              help="Run Emma.")
    emma_options.add_argument('-g', '--generate',
                              dest='generate',
                              action='store_true',
                              help="Generate a new secret location for Emma.")
    emma_options.add_argument('-u', '--url',
                              dest='url',
                              action='store_true',
                              help="Get Emma's current URL.")
    parser_emma.set_defaults(func=start_emma)
    parser_init = subparsers.add_parser('init',
                                        help="Initialize the current directory as an engineer site.",
                                        parents=[common_parser])
    parser_init.add_argument('-m', '--mode',
                             dest='mode',
                             default='default',
                             choices=['azure'],
                             help="Initialize site with folder structures designed for deployment to a service such "
                                  "as Azure.")
    parser_init.add_argument('--sample',
                             dest='sample',
                             action='store_true',
                             help="Include sample content.")
    parser_init.add_argument('--force', '-f',
                             dest='force',
                             action='store_true',
                             help="Delete target folder contents. Use with caution!")
    parser_init.set_defaults(func=init)

    #noinspection PyUnresolvedReferences
    for cmd_plugin in CommandPlugin.plugins:
        if cmd_plugin.active():
            cmd_plugin.add_command(subparsers, main_parser, common_parser)

    return main_parser


def cmdline(args=sys.argv):
    # bootstrap logging
    bootstrap()

    # Load all plugins
    load_plugins()

    args = get_argparser().parse_args(args[1:])
    skip_settings = ('init',)

    logger = logging.getLogger('engineer')
    if args.verbose >= 2:
        logger.removeHandler(get_console_handler(logging.WARNING))
        logger.addHandler(get_console_handler(logging.DEBUG))
    elif args.verbose == 1:
        logger.removeHandler(get_console_handler(logging.WARNING))
        logger.addHandler(get_console_handler(logging.INFO))
    else:
        pass  # WARNING level is added by default in bootstrap method

    if args.parser_name in skip_settings:
        pass
    else:  # try loading settings
        try:
            from engineer.conf import settings

            if args.config_file is None:
                default_settings_file = path.getcwd() / 'config.yaml'
                logger.info("No '--settings' parameter specified, defaulting to %s." % default_settings_file)
                settings.reload(default_settings_file)
            else:
                settings.reload(settings_file=args.config_file)
        except Exception as e:
            logger.error(e.message)
            exit()

    args.func(args)
    exit()