mephisto.abstractions.blueprints.static_html_task.static_html_blueprint

View Source
#!/usr/bin/env python3

# Copyright (c) Meta Platforms and its affiliates.
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

from mephisto.abstractions.blueprints.abstract.static_task.static_blueprint import (
    StaticBlueprint,
    StaticBlueprintArgs,
    SharedStaticTaskState,
)
from dataclasses import dataclass, field
from omegaconf import MISSING, DictConfig
from mephisto.abstractions.blueprint import Blueprint
from mephisto.abstractions.blueprints.static_html_task.static_html_task_builder import (
    StaticHTMLTaskBuilder,
)
from mephisto.operations.registry import register_mephisto_abstraction

import os
import time
import csv
import types

from typing import ClassVar, List, Type, Any, Dict, Iterable, Optional, TYPE_CHECKING

if TYPE_CHECKING:
    from mephisto.data_model.task_run import TaskRun
    from mephisto.abstractions.blueprint import (
        AgentState,
        TaskRunner,
        TaskBuilder,
        SharedTaskState,
    )
    from mephisto.abstractions.blueprints.abstract.static_task.static_blueprint import (
        SharedStaticTaskState,
    )
    from mephisto.data_model.assignment import Assignment
    from mephisto.data_model.agent import OnboardingAgent
    from mephisto.data_model.worker import Worker
    from mephisto.data_model.unit import Unit

BLUEPRINT_TYPE_STATIC_HTML = "static_task"


@dataclass
class StaticHTMLBlueprintArgs(StaticBlueprintArgs):
    """
    Adds required options for StaticBlueprints.
    task_source points to the file intending to be deployed for this task
    data_csv has the data to be deployed for this task.
    """

    _blueprint_type: str = BLUEPRINT_TYPE_STATIC_HTML
    _group: str = field(
        default="StaticBlueprint",
        metadata={
            "help": (
                "Tasks launched from static blueprints need a "
                "source html file to display to workers, as well as a csv "
                "containing values that will be inserted into templates in "
                "the html. "
            )
        },
    )
    task_source: str = field(
        default=MISSING,
        metadata={
            "help": "Path to source HTML file for the task being run",
            "required": True,
        },
    )
    preview_source: Optional[str] = field(
        default=MISSING,
        metadata={"help": "Optional path to source HTML file to preview the task"},
    )
    onboarding_source: Optional[str] = field(
        default=MISSING,
        metadata={"help": "Optional path to source HTML file to onboarding the task"},
    )


@register_mephisto_abstraction()
class StaticHTMLBlueprint(StaticBlueprint):
    """Blueprint for a task that runs off of a built react javascript bundle"""

    TaskBuilderClass = StaticHTMLTaskBuilder
    ArgsClass = StaticHTMLBlueprintArgs
    BLUEPRINT_TYPE = BLUEPRINT_TYPE_STATIC_HTML

    def __init__(
        self,
        task_run: "TaskRun",
        args: "DictConfig",
        shared_state: "SharedTaskState",
    ):
        assert isinstance(
            shared_state, SharedStaticTaskState
        ), "Cannot initialize with a non-static state"
        super().__init__(task_run, args, shared_state)
        self.html_file = os.path.expanduser(args.blueprint.task_source)
        if not os.path.exists(self.html_file):
            raise FileNotFoundError(
                f"Specified html file {self.html_file} was not found from {os.getcwd()}"
            )

        self.onboarding_html_file = args.blueprint.get("onboarding_source", None)
        if self.onboarding_html_file is not None:
            self.onboarding_html_file = os.path.expanduser(self.onboarding_html_file)
            if not os.path.exists(self.onboarding_html_file):
                raise FileNotFoundError(
                    f"Specified onboarding html file {self.onboarding_html_file} was not found from {os.getcwd()}"
                )

        task_file_name = os.path.basename(self.html_file)
        for entry in self._initialization_data_dicts:
            entry["html"] = task_file_name

    @classmethod
    def assert_task_args(cls, args: DictConfig, shared_state: "SharedTaskState"):
        """Ensure that the data can be properly loaded"""
        Blueprint.assert_task_args(args, shared_state)
        blue_args = args.blueprint
        assert isinstance(
            shared_state, SharedStaticTaskState
        ), "Cannot assert args on a non-static state"
        if isinstance(shared_state.static_task_data, types.GeneratorType):
            raise AssertionError("You can't launch an HTML static task on a generator")
        if blue_args.get("data_csv", None) is not None:
            csv_file = os.path.expanduser(blue_args.data_csv)
            assert os.path.exists(
                csv_file
            ), f"Provided csv file {csv_file} doesn't exist"
        elif blue_args.get("data_json", None) is not None:
            json_file = os.path.expanduser(blue_args.data_json)
            assert os.path.exists(
                json_file
            ), f"Provided JSON file {json_file} doesn't exist"
        elif blue_args.get("data_jsonl", None) is not None:
            jsonl_file = os.path.expanduser(blue_args.data_jsonl)
            assert os.path.exists(
                jsonl_file
            ), f"Provided JSON-L file {jsonl_file} doesn't exist"
        elif shared_state.static_task_data is not None:
            assert (
                len([w for w in shared_state.static_task_data]) > 0
            ), "Length of data dict provided was 0"
        else:
            raise AssertionError(
                "Must provide one of a data csv, json, json-L, or a list of tasks"
            )

        if blue_args.get("onboarding_qualification", None) is not None:
            assert blue_args.get("onboarding_source", None) is not None, (
                "Must use onboarding html with an onboarding qualification to "
                "use onboarding."
            )
            assert shared_state.validate_onboarding is not None, (
                "Must use an onboarding validation function to use onboarding "
                "with static tasks."
            )
View Source
class StaticHTMLBlueprintArgs(StaticBlueprintArgs):
    """
    Adds required options for StaticBlueprints.
    task_source points to the file intending to be deployed for this task
    data_csv has the data to be deployed for this task.
    """

    _blueprint_type: str = BLUEPRINT_TYPE_STATIC_HTML
    _group: str = field(
        default="StaticBlueprint",
        metadata={
            "help": (
                "Tasks launched from static blueprints need a "
                "source html file to display to workers, as well as a csv "
                "containing values that will be inserted into templates in "
                "the html. "
            )
        },
    )
    task_source: str = field(
        default=MISSING,
        metadata={
            "help": "Path to source HTML file for the task being run",
            "required": True,
        },
    )
    preview_source: Optional[str] = field(
        default=MISSING,
        metadata={"help": "Optional path to source HTML file to preview the task"},
    )
    onboarding_source: Optional[str] = field(
        default=MISSING,
        metadata={"help": "Optional path to source HTML file to onboarding the task"},
    )

Adds required options for StaticBlueprints. task_source points to the file intending to be deployed for this task data_csv has the data to be deployed for this task.

#   StaticHTMLBlueprintArgs( _blueprint_type: str = 'static_task', block_qualification: str = '???', onboarding_qualification: str = '???', _group: str = 'StaticBlueprint', units_per_assignment: int = 1, extra_source_dir: str = '???', data_json: str = '???', data_jsonl: str = '???', data_csv: str = '???', task_source: str = '???', preview_source: Union[str, NoneType] = '???', onboarding_source: Union[str, NoneType] = '???' )
#   task_source: str = '???'
#   preview_source: Union[str, NoneType] = '???'
#   onboarding_source: Union[str, NoneType] = '???'
View Source
class StaticHTMLBlueprint(StaticBlueprint):
    """Blueprint for a task that runs off of a built react javascript bundle"""

    TaskBuilderClass = StaticHTMLTaskBuilder
    ArgsClass = StaticHTMLBlueprintArgs
    BLUEPRINT_TYPE = BLUEPRINT_TYPE_STATIC_HTML

    def __init__(
        self,
        task_run: "TaskRun",
        args: "DictConfig",
        shared_state: "SharedTaskState",
    ):
        assert isinstance(
            shared_state, SharedStaticTaskState
        ), "Cannot initialize with a non-static state"
        super().__init__(task_run, args, shared_state)
        self.html_file = os.path.expanduser(args.blueprint.task_source)
        if not os.path.exists(self.html_file):
            raise FileNotFoundError(
                f"Specified html file {self.html_file} was not found from {os.getcwd()}"
            )

        self.onboarding_html_file = args.blueprint.get("onboarding_source", None)
        if self.onboarding_html_file is not None:
            self.onboarding_html_file = os.path.expanduser(self.onboarding_html_file)
            if not os.path.exists(self.onboarding_html_file):
                raise FileNotFoundError(
                    f"Specified onboarding html file {self.onboarding_html_file} was not found from {os.getcwd()}"
                )

        task_file_name = os.path.basename(self.html_file)
        for entry in self._initialization_data_dicts:
            entry["html"] = task_file_name

    @classmethod
    def assert_task_args(cls, args: DictConfig, shared_state: "SharedTaskState"):
        """Ensure that the data can be properly loaded"""
        Blueprint.assert_task_args(args, shared_state)
        blue_args = args.blueprint
        assert isinstance(
            shared_state, SharedStaticTaskState
        ), "Cannot assert args on a non-static state"
        if isinstance(shared_state.static_task_data, types.GeneratorType):
            raise AssertionError("You can't launch an HTML static task on a generator")
        if blue_args.get("data_csv", None) is not None:
            csv_file = os.path.expanduser(blue_args.data_csv)
            assert os.path.exists(
                csv_file
            ), f"Provided csv file {csv_file} doesn't exist"
        elif blue_args.get("data_json", None) is not None:
            json_file = os.path.expanduser(blue_args.data_json)
            assert os.path.exists(
                json_file
            ), f"Provided JSON file {json_file} doesn't exist"
        elif blue_args.get("data_jsonl", None) is not None:
            jsonl_file = os.path.expanduser(blue_args.data_jsonl)
            assert os.path.exists(
                jsonl_file
            ), f"Provided JSON-L file {jsonl_file} doesn't exist"
        elif shared_state.static_task_data is not None:
            assert (
                len([w for w in shared_state.static_task_data]) > 0
            ), "Length of data dict provided was 0"
        else:
            raise AssertionError(
                "Must provide one of a data csv, json, json-L, or a list of tasks"
            )

        if blue_args.get("onboarding_qualification", None) is not None:
            assert blue_args.get("onboarding_source", None) is not None, (
                "Must use onboarding html with an onboarding qualification to "
                "use onboarding."
            )
            assert shared_state.validate_onboarding is not None, (
                "Must use an onboarding validation function to use onboarding "
                "with static tasks."
            )

Blueprint for a task that runs off of a built react javascript bundle

#   StaticHTMLBlueprint( task_run: mephisto.data_model.task_run.TaskRun, args: omegaconf.dictconfig.DictConfig, shared_state: mephisto.abstractions.blueprint.SharedTaskState )
View Source
    def __init__(
        self,
        task_run: "TaskRun",
        args: "DictConfig",
        shared_state: "SharedTaskState",
    ):
        assert isinstance(
            shared_state, SharedStaticTaskState
        ), "Cannot initialize with a non-static state"
        super().__init__(task_run, args, shared_state)
        self.html_file = os.path.expanduser(args.blueprint.task_source)
        if not os.path.exists(self.html_file):
            raise FileNotFoundError(
                f"Specified html file {self.html_file} was not found from {os.getcwd()}"
            )

        self.onboarding_html_file = args.blueprint.get("onboarding_source", None)
        if self.onboarding_html_file is not None:
            self.onboarding_html_file = os.path.expanduser(self.onboarding_html_file)
            if not os.path.exists(self.onboarding_html_file):
                raise FileNotFoundError(
                    f"Specified onboarding html file {self.onboarding_html_file} was not found from {os.getcwd()}"
                )

        task_file_name = os.path.basename(self.html_file)
        for entry in self._initialization_data_dicts:
            entry["html"] = task_file_name
#   BLUEPRINT_TYPE: str = 'static_task'
#  
@classmethod
def assert_task_args( cls, args: omegaconf.dictconfig.DictConfig, shared_state: mephisto.abstractions.blueprint.SharedTaskState ):
View Source
    @classmethod
    def assert_task_args(cls, args: DictConfig, shared_state: "SharedTaskState"):
        """Ensure that the data can be properly loaded"""
        Blueprint.assert_task_args(args, shared_state)
        blue_args = args.blueprint
        assert isinstance(
            shared_state, SharedStaticTaskState
        ), "Cannot assert args on a non-static state"
        if isinstance(shared_state.static_task_data, types.GeneratorType):
            raise AssertionError("You can't launch an HTML static task on a generator")
        if blue_args.get("data_csv", None) is not None:
            csv_file = os.path.expanduser(blue_args.data_csv)
            assert os.path.exists(
                csv_file
            ), f"Provided csv file {csv_file} doesn't exist"
        elif blue_args.get("data_json", None) is not None:
            json_file = os.path.expanduser(blue_args.data_json)
            assert os.path.exists(
                json_file
            ), f"Provided JSON file {json_file} doesn't exist"
        elif blue_args.get("data_jsonl", None) is not None:
            jsonl_file = os.path.expanduser(blue_args.data_jsonl)
            assert os.path.exists(
                jsonl_file
            ), f"Provided JSON-L file {jsonl_file} doesn't exist"
        elif shared_state.static_task_data is not None:
            assert (
                len([w for w in shared_state.static_task_data]) > 0
            ), "Length of data dict provided was 0"
        else:
            raise AssertionError(
                "Must provide one of a data csv, json, json-L, or a list of tasks"
            )

        if blue_args.get("onboarding_qualification", None) is not None:
            assert blue_args.get("onboarding_source", None) is not None, (
                "Must use onboarding html with an onboarding qualification to "
                "use onboarding."
            )
            assert shared_state.validate_onboarding is not None, (
                "Must use an onboarding validation function to use onboarding "
                "with static tasks."
            )

Ensure that the data can be properly loaded

#   class StaticHTMLBlueprint.TaskBuilderClass(mephisto.abstractions._subcomponents.task_builder.TaskBuilder):
View Source
class StaticHTMLTaskBuilder(TaskBuilder):
    """
    Builder for a static task, pulls the appropriate html,
    builds the frontend (if a build doesn't already exist),
    then puts the file into the server directory
    """

    BUILT_FILE = "done.built"
    BUILT_MESSAGE = "built!"

    def rebuild_core(self):
        """Rebuild the frontend for this task"""
        return_dir = os.getcwd()
        os.chdir(FRONTEND_SOURCE_DIR)
        if os.path.exists(FRONTEND_BUILD_DIR):
            shutil.rmtree(FRONTEND_BUILD_DIR)
        packages_installed = subprocess.call(["npm", "install"])
        if packages_installed != 0:
            raise Exception(
                "please make sure npm is installed, otherwise view "
                "the above error for more info."
            )

        webpack_complete = subprocess.call(["npm", "run", "dev"])
        if webpack_complete != 0:
            raise Exception(
                "Webpack appears to have failed to build your "
                "frontend. See the above error for more information."
            )
        os.chdir(return_dir)

    def build_in_dir(self, build_dir: str):
        """Build the frontend if it doesn't exist, then copy into the server directory"""
        # Only build this task if it hasn't already been built
        if True:  # not os.path.exists(FRONTEND_BUILD_DIR):
            self.rebuild_core()

        # Copy the built core and the given task file to the target path
        use_html_file = os.path.expanduser(self.args.blueprint["task_source"])

        target_resource_dir = os.path.join(build_dir, "static")
        file_name = os.path.basename(use_html_file)
        target_path = os.path.join(target_resource_dir, file_name)
        shutil.copy2(use_html_file, target_path)

        # Copy over the preview file as preview.html, default to the task file if none specified
        preview_file = self.args.blueprint.get("preview_source") or use_html_file
        use_preview_file = os.path.expanduser(preview_file)

        target_path = os.path.join(target_resource_dir, "preview.html")
        shutil.copy2(use_preview_file, target_path)

        # Copy over the onboarding file as onboarding.html if it's specified
        onboarding_html_file = self.args.blueprint.get("onboarding_source", None)
        if onboarding_html_file is not None:
            onboarding_html_file = os.path.expanduser(onboarding_html_file)
            target_path = os.path.join(target_resource_dir, "onboarding.html")
            shutil.copy2(onboarding_html_file, target_path)

        # If any additional task files are required via a source_dir, copy those as well
        extra_dir_path = self.args.blueprint.get("extra_source_dir")
        if extra_dir_path is not None:
            extra_dir_path = os.path.expanduser(extra_dir_path)
            copy_tree(extra_dir_path, target_resource_dir)

        bundle_js_file = os.path.join(FRONTEND_BUILD_DIR, "bundle.js")
        target_path = os.path.join(target_resource_dir, "bundle.js")
        shutil.copy2(bundle_js_file, target_path)

        # Write a built file confirmation
        with open(os.path.join(build_dir, self.BUILT_FILE), "w+") as built_file:
            built_file.write(self.BUILT_MESSAGE)

Builder for a static task, pulls the appropriate html, builds the frontend (if a build doesn't already exist), then puts the file into the server directory

Inherited Members
mephisto.abstractions._subcomponents.task_builder.TaskBuilder
TaskBuilder
mephisto.abstractions.blueprints.static_html_task.static_html_task_builder.StaticHTMLTaskBuilder
BUILT_FILE
BUILT_MESSAGE
rebuild_core
build_in_dir
View Source
class StaticHTMLBlueprintArgs(StaticBlueprintArgs):
    """
    Adds required options for StaticBlueprints.
    task_source points to the file intending to be deployed for this task
    data_csv has the data to be deployed for this task.
    """

    _blueprint_type: str = BLUEPRINT_TYPE_STATIC_HTML
    _group: str = field(
        default="StaticBlueprint",
        metadata={
            "help": (
                "Tasks launched from static blueprints need a "
                "source html file to display to workers, as well as a csv "
                "containing values that will be inserted into templates in "
                "the html. "
            )
        },
    )
    task_source: str = field(
        default=MISSING,
        metadata={
            "help": "Path to source HTML file for the task being run",
            "required": True,
        },
    )
    preview_source: Optional[str] = field(
        default=MISSING,
        metadata={"help": "Optional path to source HTML file to preview the task"},
    )
    onboarding_source: Optional[str] = field(
        default=MISSING,
        metadata={"help": "Optional path to source HTML file to onboarding the task"},
    )

Adds required options for StaticBlueprints. task_source points to the file intending to be deployed for this task data_csv has the data to be deployed for this task.