Source code for langchain_community.document_loaders.azure_ai_data
from typing import Iterator, Optional
from langchain_community.docstore.document import Document
from langchain_community.document_loaders.base import BaseLoader
from langchain_community.document_loaders.unstructured import UnstructuredFileIOLoader
[docs]class AzureAIDataLoader(BaseLoader):
    """Load from Azure AI Data."""
[docs]    def __init__(self, url: str, glob: Optional[str] = None):
        """Initialize with URL to a data asset or storage location
        ."""
        self.url = url
        """URL to the data asset or storage location."""
        self.glob_pattern = glob
        """Optional glob pattern to select files. Defaults to None.""" 
[docs]    def lazy_load(self) -> Iterator[Document]:
        """A lazy loader for Documents."""
        try:
            from azureml.fsspec import AzureMachineLearningFileSystem
        except ImportError as exc:
            raise ImportError(
                "Could not import azureml-fspec package."
                "Please install it with `pip install azureml-fsspec`."
            ) from exc
        fs = AzureMachineLearningFileSystem(self.url)
        if self.glob_pattern:
            remote_paths_list = fs.glob(self.glob_pattern)
        else:
            remote_paths_list = fs.ls()
        for remote_path in remote_paths_list:
            with fs.open(remote_path) as f:
                loader = UnstructuredFileIOLoader(file=f)
                yield from loader.load()