# -*- coding: utf-8 -*-
# Elisa - Home multimedia server
# Copyright (C) 2006-2008 Fluendo Embedded S.L. (www.fluendo.com).
# All rights reserved.
#
# This file is available under one of two license agreements.
#
# This file is licensed under the GPL version 3.
# See "LICENSE.GPL" in the root of this distribution including a special
# exception to use Elisa with Fluendo's plugins.
#
# The GPL part of Elisa is also available under a commercial licensing
# agreement from Fluendo.
# See "LICENSE.Elisa" in the root directory of this distribution package
# for details on that license.

"""
Metadata crawler
"""

__maintainer__ = 'Philippe Normand <philippe@fluendo.com>'
__maintainer1__ = 'Alessandro Decina <alessandro@fluendo.com>'

from elisa.core import common
from elisa.base_components.service_provider import ServiceProvider
from elisa.core.media_uri import MediaUri
from elisa.extern import enum
from elisa.core.utils import classinit

import os
import time
from twisted.internet import reactor, defer, task

plugin_registry = common.application.plugin_registry
MediaLocationMessage = plugin_registry.get_component_class('base:media_location_message')
MediaDB = plugin_registry.get_component_class('media_db:db')

class MediaScannerItem(object):
    def __init__(self, source_uri, source_id,
            current_uri=None, current_metadata=None,
            requested_media_types=None):
        self.defer = defer.Deferred()
        self.source_uri = source_uri
        self.source_id = source_id
        if current_uri is None:
            current_uri = source_uri
        self.current_uri = current_uri
        if current_metadata is None:
            current_metadata = {}
        self.current_metadata = current_metadata
        self.requested_media_types = requested_media_types

    def __str__(self):
        return '%s %s %s' % (self.source_uri, self.current_uri,
                self.current_metadata)

class MediaScanner(ServiceProvider):
    """
    Scan media sources extracting metadata and populating the
    L{elisa.core.media_db.MediaDB}.

    The MediaScanner crawls sources looking for resources from which to get
    metadata. Static source locations can be specified in the configuration
    file. Sources can also be added, removed and updated at runtime with
    L{elisa.core.media_scanner.MediaScanner.add_source},
    L{elisa.core.media_scanner.MediaScanner.remove_source} and
    L{elisa.core.media_scanner.MediaScanner.update_source).

    Sources are periodically rescanned to keep the
    L{elisa.core.media_db.MediaDB} up to date. Monitorable sources are
    automatically watched for metadata changes so they need not be rescanned.
    FIXME: monitoring is not implemented currently.
    """

    # Allows property fget/fset/fdel/doc overriding
    __metaclass__ = classinit.ClassInitMeta
    __classinit__ = classinit.build_properties
    
    default_config = {'enabled': True,
                      'db_backend': 'sqlite',
                      'database': 'elisa.db',
                      'username': '',
                      'password': '',
                      'hostname': '',
                      'scan_interval': 0.01,
                      'commit_interval': 5,
                      'generate_thumbnails': False,
                      'fivemin_location_updates': [],
                      'hourly_location_updates': [],
                      'daily_location_updates': [],
                      'weekly_location_updates': [],
                      'ignored_locations': []
                      }

    
    min_commit_interval = 1
    min_scan_interval = 0.01

    def initialize(self):
        super(MediaScanner, self).__init__()

        self.debug("Creating")

        self._queue = []
        self._delayed_start = 0
        self._media_manager = common.application.media_manager
        self._metadata_manager = common.application.metadata_manager

        self._started = False
        self._running = False

        self._enabled = self.config.as_bool('enabled')
        self._fivemin_locations = self.config['fivemin_location_updates']
        self._hourly_locations = self.config['hourly_location_updates']
        self._daily_locations = self.config['daily_location_updates']
        self._weekly_locations = self.config['weekly_location_updates']
        self._ignored_locations = self.config['ignored_locations']
        self._update_intervals = dict(fivemin=60 * 5,
                hour=60 * 60,
                day=60 * 60 * 24,
                week=60 * 60 * 24 * 7)
        self._interval_locations = {'fivemin': self._fivemin_locations,
                              'hour': self._hourly_locations,
                              'day': self._daily_locations,
                              'week': self._weekly_locations}
        
        interval = self.config.as_float('scan_interval')
        self._scan_interval = max(interval, self.min_scan_interval)

        interval = self.config.as_int('commit_interval')
        self._commit_interval = max(interval, self.min_commit_interval)

        self._generate_thumbnails = self.config.as_bool('generate_thumbnails')

        self._source_scan_start = {}
        self._delayed_calls = {}
        self._call_later_next = None
        self._commit_call = None
        self._commit = False
        
        self.bus = common.application.bus
        self.bus.register(self._bus_media_location_message_cb,
                MediaLocationMessage)

    def _set_option(self, name, value):
        self.config[name] = value

    def _save_config(self):
        self.debug("Saving config")

        self.config['fivemin_location_updates'] = self._fivemin_locations
        self.config['hourly_location_updates'] = self._hourly_locations
        self.config['daily_location_updates'] = self._daily_locations
        self.config['weekly_location_updates'] = self._weekly_locations

    def start(self, delayed_start=5):
        """
        Start the scanner.

        @param delayed_start:   number of seconds to wait before starting
        @type delayed_start:    int
        """

        if not self._enabled:
            return

        self.debug("Starting")
        self._started = True
        self._delayed_start = delayed_start
        self._schedule_periodic_updates()
        self._ensure_running()

    def stop(self):
        """
        Stop the scanner.
        """

        if not self._enabled:
            return

        self.info("Stopping")
        self._started = False

        for label, call in self._delayed_calls.iteritems():
            call.cancel()
         
        if self._call_later_next is not None:
            self._call_later_next.cancel()
            self._call_later_next = None

        if self._commit_call is not None and self._commit_call.active():
            self._commit_call.cancel()
            self._commit_call = None

        self._checkpoint()

        self._delayed_calls = {}
        self._save_config()

    def _bus_media_location_message_cb(self, message, sender):
        uri = MediaUri(message.mount_point)

        plugin_registry = common.application.plugin_registry
        MediaLocationMessage = plugin_registry.get_component_class('base:media_location_message')
        if message.action == MediaLocationMessage.ActionType.LOCATION_ADDED:
            self.add_source(uri, message.media_types)
        elif message.action == MediaLocationMessage.ActionType.LOCATION_REMOVED:
            self.remove_source(uri)

    def add_source(self, source_uri, media_types=None):
        """
        Add a new source to the scan queue.

        @param source_uri:  The location of the source
        @type source_uri:   L{elisa.core.media_uri.MediaUri}
        @param media_types: a sequence of media types to scan on this source eg:
                            ('audio', 'image') or None
        @type media_types:  sequence
        @returns:           defer called when the source scan is complete
        @rtype:             L{twisted.internet.defer.Deferred}
        """

        media_db = MediaDB.get_connection()
        media_db.add_source(source_uri) 

        return self._schedule_source_update(source_uri,
                requested_media_types=media_types)

    def add_media(self, source_uri, uri):
        return self._schedule_source_update(source_uri, uri, media_types)

    def remove_source(self, source_uri):
        """
        Mark a source as unavailable in the database and stop monitoring it.

        @param source_uri: The location of the source
        @type source_uri:  L{elisa.core.media_uri.MediaUri}
        """
        # TODO: cancel an eventual update of the source
        MediaDB.get_connection().hide_source(source_uri)

    def remove_media(self, source_uri, uri):
        # FIXME: writeme
        pass

    def update_source(self, source_uri):
        """
        Schedule a new scan of the source located at given uri.

        @param source_uri: The location of the source
        @type source_uri:  L{elisa.core.media_uri.MediaUri}
        @returns:           defer
        @rtype:             L{twisted.internet.defer.Deferred} called when the
        
        source update is complete
        """
        return self._schedule_source_update(source_uri)
        
    def _schedule_source_update(self, source_uri, current_uri=None,
            requested_media_types=None):
        if unicode(source_uri) in self._ignored_locations:
            self.info('Ignoring update of %r', source_uri)
            # FIXME: define an exception to return here
            return defer.succeed('ignored')

        if not self._media_manager.is_scannable(source_uri):
            self.info('%r is not scannable', source_uri)
            # FIXME: define an exception to return here
            return defer.succeed('not scannable')

        if source_uri in self._source_scan_start:
            self.info('Source %r is already being updated', source_uri)
            return defer.succeed('updating already')

        def source_update_done(children_update_info):
            t1 = time.time()
            t0 = self._source_scan_start.pop(source_uri)
            media_manager = self._media_manager

            delta = t1 - t0
            db = MediaDB.get_connection()
            count = db.get_files_count_for_source_uri(source_uri)
            if count:
                speed = "(%s s/file)"  % (delta / count,)
            else:
                speed = ""
            msg = 'Parse of %s took %s seconds %s' % (source_uri, delta, speed)
                
            self.info(msg)

            return children_update_info
        
        def source_update_failure(reason):
            # FIXME: this is called when scanning an item in the source fails.
            # We just log and skip the error for the moment, but this should be
            # properly handled by upper layers
            self.warning('scanning of an item of %s failed: %s',
                    source_uri, reason)

            # eat the error...

        self.info("Scheduling update of %r", source_uri)
        db = MediaDB.get_connection()
        source_info = db.get_source_for_uri(source_uri)
        item = MediaScannerItem(source_uri, source_info.id,
                current_uri, None, requested_media_types)
        item.defer.addCallback(source_update_done)
        item.defer.addErrback(source_update_failure)
        self._enqueue(item)

        if self._started:
            self._ensure_running()

        return item.defer

    def _ensure_running(self):
        was_running = self._running
        self._running = True
        if not was_running:
            # start now if we aren't already scanning
            self._process_next(delayed=True)

    def _enqueue(self, item):
        self.log("enqueing %s", item)
        self._queue.insert(0, item)
        self.log("%r items in queue", len(self._queue))
                 
    def _dequeue(self):
        try:
            item = self._queue.pop(0)
        except IndexError:
            item = None
        return item

    def _process_next(self, seconds=None, delayed=False):
        assert self._call_later_next is None
        # do the real work in delayed calls so that we a) avoid recursion and b)
        # let other reactor sources do their work while we scan
        if seconds is None:
            seconds = self._scan_interval

        self._call_later_next = reactor.callLater(seconds, self._process_next_real, delayed)

    def _checkpoint(self):
        self.debug('committing')
        db = MediaDB.get_connection()
        db._backend.save_changes()

    def _process_next_real(self, delayed=False):
        self._call_later_next = None
        if not self._started:
            self.debug('stopping _process_next, scanner stopped')
            return

        if delayed and self._delayed_start:
            self.debug("Delayed start in %s seconds", self._delayed_start)
            self._delayed_start -= 1
            self._process_next(1, delayed)
            return

        if self._commit_call is None or not self._commit_call.active():
            self._commit_call = \
                    reactor.callLater(self._commit_interval, self._checkpoint)

        item = self._dequeue()
        if not item:
            if self._commit_call is not None and self._commit_call.active():
                self._commit_call.cancel()
                self._commit_call = None

            self._checkpoint()
            self.debug('scanner queue empty')
            self._running = False
            return

        if item.source_uri == item.current_uri:
            # record a timestamp so we can report roughly how long it took to
            # scan this source
            self._source_scan_start[item.source_uri] = time.time()

        try:
            self._process(item)
        except Exception, exc:
            self.warning("Source scanning failed: %s" % exc)
            raise
    
    def _schedule_periodic_updates(self):
        # schedule some events
        for label, interval in self._update_intervals.iteritems():
            call = reactor.callLater(interval, self._periodic_update, label)
            self._delayed_calls[label] = call

    def _periodic_update(self, interval_label):
        # this call is done, we will readd it later
        del self._delayed_calls[interval_label]

        interval = self._update_intervals.get(interval_label)
        sources = self._interval_locations.get(interval_label)
        if sources:
            msg = "Launching scheduled update of the %s: %s" % (interval_label,
                                                                sources)
            self.info(msg)
            
            def periodic_update_source_done(result):
                call = reactor.callLater(interval,
                        self._periodic_update, interval_label)
                self._delayed_calls[interval_label] = call

                return result

            def periodic_update_source_failed(result, source_uri):
                # FIXME: log a warning
                locations = self._interval_locations.get(interval_label)
                locations.remove(source_uri)
                
                # swallow the error and continue in periodic_update_source_done
                # so we readd the call

            for source_uri in sources:
                dfr = self.update_source(MediaUri(source_uri))
                dfr.addErrback(periodic_update_source_failed, source_uri)
                dfr.addCallback(periodic_update_source_done)

    def _process(self, item):
        source_uri = item.source_uri
        source_id = item.source_id
        current_uri = item.current_uri
        requested_media_types = item.requested_media_types

        self.debug("Processing source %r uri %r", source_uri, current_uri)

        def is_directory_done(is_directory):
            if is_directory:
                self.debug("%r has children", current_uri)
                def get_direct_children_done(children):
                    # FIXME: iterating all the children here is likely to be slow,
                    # what we need is a better interface to just get the *next*
                    # children

                    self.debug('got children for %s: %s', source_uri, children)

                    def iterate_over_children_iter(children, children_deferreds):
                        for child in children:
                            uri, metadata = child

                            try:
                                current_mtime = metadata['fs_mtime']
                            except KeyError:
                                pass
                            else:
                                db = MediaDB.get_connection()
                                info = db.get_media_information(uri,
                                        extended=False)
                                if info and info.fs_mtime is not None and \
                                    info.fs_mtime == current_mtime:
                                    self.debug("skipping %s not modified "
                                            "since last scan", uri)
                                    self._call_later_delay = 0.0
                                    yield None
                                    continue

                            child_item = MediaScannerItem(source_uri, source_id,
                                    uri, metadata, requested_media_types)
                            children_deferreds.append(child_item.defer)
                            self.debug("%r adding %r", source_uri, uri)
                            self._enqueue(child_item)
                            
                            yield None

                    def iterate_over_children_done(iterator, children_deferreds):
                        # item.defer is the deferred of the parent dir, we want to
                        # call it back only after having scanned all the children
                        dfr = defer.DeferredList(children_deferreds, consumeErrors=1)
                        dfr.chainDeferred(item.defer)
                        self._process_next()

                    children_deferreds = []
                    dfr = task.coiterate(iterate_over_children_iter(children,
                            children_deferreds))
                    dfr.addCallback(iterate_over_children_done,
                            children_deferreds)

                def get_direct_children_failure(failure):
                    self.warning('error getting children for %s: %s (skipping)',
                            source_uri, failure)
                    self._process_next()
                    item.defer.errback(failure)

                children_defer = self._media_manager.get_direct_children(current_uri, [])
                children_defer.addCallback(get_direct_children_done)
                children_defer.addErrback(get_direct_children_failure)
            else:
                self._process_media_file(item)

        def is_directory_failure(failure):
            self.warning('is_directory failed on uri %s: %s (skipping)',
                    current_uri, failure)

        is_directory_defer = self._media_manager.is_directory(current_uri)
        is_directory_defer.addCallback(is_directory_done)
        is_directory_defer.addErrback(is_directory_failure)

    def _process_media_file(self, item):
        source = item.source_uri
        source_id = item.source_id
        uri = item.current_uri
        metadata = item.current_metadata
        requested_media_types = item.requested_media_types

        def get_metadata_done(res_metadata):
            self.debug('finished scan of %s', uri)

            file_type = res_metadata['file_type']

            if requested_media_types and file_type not in requested_media_types:
                self.debug('ignoring %s, media type %s, requested %s' %
                        (uri, file_type, requested_media_types))
                self._process_next()
                item.defer.errback('not interesting')
                return res_metadata

            db_metadata = {}
            if file_type == 'audio':
                for key in ('artist', 'album', 'song', 'track'):
                    try:
                        db_metadata[key] = res_metadata[key]
                    except KeyError:
                        pass

                default_image = res_metadata['default_image']
                if default_image:
                    # FIXME: this is a hack to avoid saving covers
                    #        coming from Amazon in db...
                    home = os.path.expanduser('~/.elisa')
                    amazon_cache = os.path.join(home, 'amazon_cache')
                    if not default_image.path.startswith(amazon_cache):
                        db_metadata['cover_uri'] = default_image

            if file_type in ('audio', 'video', 'image'):
                MediaDB.get_connection().add_media(uri, uri.label, source_id, 'file',
                        format=file_type, typ=res_metadata['mime_type'],
                        fs_mtime=res_metadata['fs_mtime'], metadata=db_metadata)
                self._call_later_delay = 0.3
            
            # continue with the next file
            self._process_next()
            item.defer.callback(res_metadata)

            return res_metadata

        def get_metadata_failure(failure):
            self.debug('error scanning %s: %s', uri, failure)
            self._process_next()
            item.defer.errback(failure)
        
        self.debug('starting scan of %s' % uri)

        metadata['uri'] = uri
        for tag in ('content-type', 'file_type', 'mime_type', 'default_image',
                    'artist', 'album', 'song', 'track'):
            if not metadata.has_key(tag):
                metadata[tag] = None

        if self._generate_thumbnails:
            metadata['thumbnail'] = None

        get_metadata_defer = \
                self._metadata_manager.get_metadata(metadata, low_priority=True)
        get_metadata_defer.addCallback(get_metadata_done)
        get_metadata_defer.addErrback(get_metadata_failure)
