Source code for autoextract_poet.page_inputs

from typing import ClassVar, Generic, Optional, TypeVar, Type

import attr

from autoextract_poet.items import (
    Article,
    Item,
    Product, ProductList, ArticleList, Comments, ForumPosts, JobPosting,
    RealEstate, Reviews, Vehicle,
)
from autoextract_poet.util import export


[docs]@export @attr.s(auto_attribs=True) class AutoExtractHtml: """A container for URL and HTML content retrieved from AutoExtract. ``url`` should be an URL of the response (after all redirects), not an URL of the request, if possible. ``html`` should be browser HTML in unicode """ url: str html: str
T = TypeVar("T", bound=Item)
[docs]@attr.s(auto_attribs=True) class AutoExtractData(Generic[T]): """Container for AutoExtract data. Should not be used directly by providers. Use derived classes like AutoExtractArticleData and similar. API responses are wrapped in a JSON array (this is to facilitate query batching) but we're receiving single responses here.. https://docs.zyte.com/automatic-extraction.html#responses """ page_type: ClassVar[str] data: dict @property def item_class(self): return get_item_class(self) def to_item(self) -> Optional[T]: return self.item_class.from_dict(self.data[self.page_type])
[docs]def get_item_class(page_input_cls: Type[AutoExtractData]) -> Type[Item]: """ Return item class for the page input class. >>> get_item_class(AutoExtractArticleData) is Article True >>> get_item_class(AutoExtractProductData) is Product True >>> get_item_class(AutoExtractData) is T True """ return page_input_cls.__orig_bases__[0].__args__[0] # type: ignore[attr-defined]
[docs]@export @attr.s(auto_attribs=True) class AutoExtractArticleData(AutoExtractData[Article]): """Container for AutoExtract Article data. https://docs.zyte.com/automatic-extraction/article.html """ page_type = "article"
[docs]@export @attr.s(auto_attribs=True) class AutoExtractProductData(AutoExtractData[Product]): """Container for AutoExtract Product data. https://docs.zyte.com/automatic-extraction/product.html """ page_type = "product"
[docs]@export @attr.s(auto_attribs=True) class AutoExtractProductListData(AutoExtractData[ProductList]): """Container for AutoExtract Product list data. https://docs.zyte.com/automatic-extraction/product-list.html """ page_type = "productList"
[docs]@export @attr.s(auto_attribs=True) class AutoExtractArticleListData(AutoExtractData[ArticleList]): """Container for AutoExtract Article list data. https://docs.zyte.com/automatic-extraction/article-list.html """ page_type = "articleList"
[docs]@export @attr.s(auto_attribs=True) class AutoExtractCommentsData(AutoExtractData[Comments]): """Container for AutoExtract Comments data. https://docs.zyte.com/automatic-extraction/comment.html """ page_type = "comments"
[docs]@export @attr.s(auto_attribs=True) class AutoExtractForumPostsData(AutoExtractData[ForumPosts]): """Container for AutoExtract Forum Posts data. https://docs.zyte.com/automatic-extraction/forum-post.html """ page_type = "forumPosts"
[docs]@export @attr.s(auto_attribs=True) class AutoExtractJobPostingData(AutoExtractData[JobPosting]): """Container for AutoExtract Job Posting data. https://docs.zyte.com/automatic-extraction/job-posting.html """ page_type = "jobPosting"
[docs]@export @attr.s(auto_attribs=True) class AutoExtractRealEstateData(AutoExtractData[RealEstate]): """Container for AutoExtract Real Estate data. https://docs.zyte.com/automatic-extraction/real-estate.html """ page_type = "realEstate"
[docs]@export @attr.s(auto_attribs=True) class AutoExtractReviewsData(AutoExtractData[Reviews]): """Container for AutoExtract Reviews data. https://docs.zyte.com/automatic-extraction/review.html """ page_type = "reviews"
[docs]@export @attr.s(auto_attribs=True) class AutoExtractVehicleData(AutoExtractData[Vehicle]): """Container for AutoExtract Vehicle data. https://docs.zyte.com/automatic-extraction/vehicle.html """ page_type = "vehicle"