# Copyright 2018-2022 Streamlit Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Declares the EventBasedPathWatcher class, which watches given paths in the file system. How these classes work together ------------------------------- - EventBasedPathWatcher : each instance of this is able to watch a single file or directory at a given path so long as there's a browser interested in it. This uses _MultiPathWatcher to watch paths. - _MultiPathWatcher : singleton that watches multiple paths. It does this by holding a watchdog.observer.Observer object, and manages several _FolderEventHandler instances. This creates _FolderEventHandlers as needed, if the required folder is not already being watched. And it also tells existing _FolderEventHandlers which paths it should be watching for. - _FolderEventHandler : event handler for when a folder is modified. You can register paths in that folder that you're interested in. Then this object listens to folder events, sees if registered paths changed, and fires callbacks if so. """ import os import threading from typing import Callable, cast, Dict, Optional from blinker import Signal, ANY from streamlit.util import repr_ from streamlit.watcher import util from watchdog import events from watchdog.observers import Observer from streamlit.logger import get_logger LOGGER = get_logger(__name__) class EventBasedPathWatcher: """Watches a single path on disk using watchdog""" @staticmethod def close_all() -> None: """Close the _MultiPathWatcher singleton.""" path_watcher = _MultiPathWatcher.get_singleton() path_watcher.close() LOGGER.debug("Watcher closed") def __init__( self, path: str, on_changed: Callable[[str], None], *, # keyword-only arguments: glob_pattern: Optional[str] = None, allow_nonexistent: bool = False, ) -> None: """Constructor for EventBasedPathWatchers. Parameters ---------- path : str The path to watch. on_changed : Callable[[str], None] Callback to call when the path changes. glob_pattern : Optional[str] A glob pattern to filter the files in a directory that should be watched. Only relevant when creating an EventBasedPathWatcher on a directory. allow_nonexistent : bool If True, the watcher will not raise an exception if the path does not exist. This can be used to watch for the creation of a file or directory at a given path. """ self._path = os.path.abspath(path) self._on_changed = on_changed path_watcher = _MultiPathWatcher.get_singleton() path_watcher.watch_path( self._path, on_changed, glob_pattern=glob_pattern, allow_nonexistent=allow_nonexistent, ) LOGGER.debug("Watcher created for %s", self._path) def __repr__(self) -> str: return repr_(self) def close(self) -> None: """Stop watching the path corresponding to this EventBasedPathWatcher.""" path_watcher = _MultiPathWatcher.get_singleton() path_watcher.stop_watching_path(self._path, self._on_changed) class _MultiPathWatcher(object): """Watches multiple paths.""" _singleton: Optional["_MultiPathWatcher"] = None @classmethod def get_singleton(cls) -> "_MultiPathWatcher": """Return the singleton _MultiPathWatcher object. Instantiates one if necessary. """ if cls._singleton is None: LOGGER.debug("No singleton. Registering one.") _MultiPathWatcher() return cast("_MultiPathWatcher", _MultiPathWatcher._singleton) # Don't allow constructor to be called more than once. def __new__(cls) -> "_MultiPathWatcher": """Constructor.""" if _MultiPathWatcher._singleton is not None: raise RuntimeError("Use .get_singleton() instead") return super(_MultiPathWatcher, cls).__new__(cls) def __init__(self) -> None: """Constructor.""" _MultiPathWatcher._singleton = self # Map of folder_to_watch -> _FolderEventHandler. self._folder_handlers: Dict[str, _FolderEventHandler] = {} # Used for mutation of _folder_handlers dict self._lock = threading.Lock() # The Observer object from the Watchdog module. Since this class is # only instantiated once, we only have a single Observer in Streamlit, # and it's in charge of watching all paths we're interested in. self._observer = Observer() self._observer.start() # Start observer thread. def __repr__(self) -> str: return repr_(self) def watch_path( self, path: str, callback: Callable[[str], None], *, # keyword-only arguments: glob_pattern: Optional[str] = None, allow_nonexistent: bool = False, ) -> None: """Start watching a path.""" folder_path = os.path.abspath(os.path.dirname(path)) with self._lock: folder_handler = self._folder_handlers.get(folder_path) if folder_handler is None: folder_handler = _FolderEventHandler() self._folder_handlers[folder_path] = folder_handler folder_handler.watch = self._observer.schedule( folder_handler, folder_path, recursive=True ) folder_handler.add_path_change_listener( path, callback, glob_pattern=glob_pattern, allow_nonexistent=allow_nonexistent, ) def stop_watching_path(self, path: str, callback: Callable[[str], None]) -> None: """Stop watching a path.""" folder_path = os.path.abspath(os.path.dirname(path)) with self._lock: folder_handler = self._folder_handlers.get(folder_path) if folder_handler is None: LOGGER.debug( "Cannot stop watching path, because it is already not being " "watched. %s", folder_path, ) return folder_handler.remove_path_change_listener(path, callback) if not folder_handler.is_watching_paths(): # Sometimes watchdog's FileSystemEventHandler does not have # a .watch property. It's unclear why -- may be due to a # race condition. if hasattr(folder_handler, "watch"): self._observer.unschedule(folder_handler.watch) del self._folder_handlers[folder_path] def close(self) -> None: with self._lock: """Close this _MultiPathWatcher object forever.""" if len(self._folder_handlers) != 0: self._folder_handlers = {} LOGGER.debug( "Stopping observer thread even though there is a non-zero " "number of event observers!" ) else: LOGGER.debug("Stopping observer thread") self._observer.stop() self._observer.join(timeout=5) class WatchedPath(object): """Emits notifications when a single path is modified.""" def __init__( self, md5: str, modification_time: float, *, # keyword-only arguments: glob_pattern: Optional[str] = None, allow_nonexistent: bool = False, ): self.md5 = md5 self.modification_time = modification_time self.glob_pattern = glob_pattern self.allow_nonexistent = allow_nonexistent self.on_changed = Signal() def __repr__(self) -> str: return repr_(self) class _FolderEventHandler(events.FileSystemEventHandler): """Listen to folder events. If certain paths change, fire a callback. The super class, FileSystemEventHandler, listens to changes to *folders*, but we need to listen to changes to *both* folders and files. I believe this is a limitation of the Mac FSEvents system API, and the watchdog library takes the lower common denominator. So in this class we watch for folder events and then filter them based on whether or not we care for the path the event is about. """ def __init__(self) -> None: super(_FolderEventHandler, self).__init__() self._watched_paths: Dict[str, WatchedPath] = {} self._lock = threading.Lock() # for watched_paths mutations def __repr__(self) -> str: return repr_(self) def add_path_change_listener( self, path: str, callback: Callable[[str], None], *, # keyword-only arguments: glob_pattern: Optional[str] = None, allow_nonexistent: bool = False, ) -> None: """Add a path to this object's event filter.""" with self._lock: watched_path = self._watched_paths.get(path, None) if watched_path is None: md5 = util.calc_md5_with_blocking_retries( path, glob_pattern=glob_pattern, allow_nonexistent=allow_nonexistent, ) modification_time = util.path_modification_time(path, allow_nonexistent) watched_path = WatchedPath( md5=md5, modification_time=modification_time, glob_pattern=glob_pattern, allow_nonexistent=allow_nonexistent, ) self._watched_paths[path] = watched_path watched_path.on_changed.connect(callback, weak=False) def remove_path_change_listener( self, path: str, callback: Callable[[str], None] ) -> None: """Remove a path from this object's event filter.""" with self._lock: watched_path = self._watched_paths.get(path, None) if watched_path is None: return watched_path.on_changed.disconnect(callback) if not watched_path.on_changed.has_receivers_for(ANY): del self._watched_paths[path] def is_watching_paths(self) -> bool: """Return true if this object has 1+ paths in its event filter.""" return len(self._watched_paths) > 0 def handle_path_change_event(self, event: events.FileSystemEvent) -> None: """Handle when a path (corresponding to a file or dir) is changed. The events that can call this are modification, creation or moved events. """ # Check for both modified and moved files, because many programs write # to a backup file then rename (i.e. move) it. if event.event_type == events.EVENT_TYPE_MODIFIED: changed_path = event.src_path elif event.event_type == events.EVENT_TYPE_MOVED: LOGGER.debug("Move event: src %s; dest %s", event.src_path, event.dest_path) changed_path = event.dest_path # On OSX with VI, on save, the file is deleted, the swap file is # modified and then the original file is created hence why we # capture EVENT_TYPE_CREATED elif event.event_type == events.EVENT_TYPE_CREATED: changed_path = event.src_path else: LOGGER.debug("Don't care about event type %s", event.event_type) return changed_path = os.path.abspath(changed_path) changed_path_info = self._watched_paths.get(changed_path, None) if changed_path_info is None: LOGGER.debug( "Ignoring changed path %s.\nWatched_paths: %s", changed_path, self._watched_paths, ) return modification_time = util.path_modification_time( changed_path, changed_path_info.allow_nonexistent ) if modification_time == changed_path_info.modification_time: LOGGER.debug("File/dir timestamp did not change: %s", changed_path) return changed_path_info.modification_time = modification_time new_md5 = util.calc_md5_with_blocking_retries( changed_path, glob_pattern=changed_path_info.glob_pattern, allow_nonexistent=changed_path_info.allow_nonexistent, ) if new_md5 == changed_path_info.md5: LOGGER.debug("File/dir MD5 did not change: %s", changed_path) return LOGGER.debug("File/dir MD5 changed: %s", changed_path) changed_path_info.md5 = new_md5 changed_path_info.on_changed.send(changed_path) def on_created(self, event: events.FileSystemEvent) -> None: self.handle_path_change_event(event) def on_modified(self, event: events.FileSystemEvent) -> None: self.handle_path_change_event(event) def on_moved(self, event: events.FileSystemEvent) -> None: self.handle_path_change_event(event)