Source code for trojmiastopl.utils

#!/usr/bin/python
# -*- coding: utf-8 -*-

import logging

import requests
from bs4 import BeautifulSoup
from scrapper_helpers.utils import caching, get_random_user_agent, key_sha1

from trojmiastopl import BASE_URL

log = logging.getLogger(__file__)

SEARCH_URL = "https://ogloszenia.trojmiasto.pl/szukaj/"


[docs]def decode_type(filter_value): """ Decodes offer type name to it's value List of available options and it's translation can be found bellow. :param filter_value: One of available type names :type filter_value: str :return: Int value for POST variable :rtype: int """ available = { "Mieszkanie": 100, # flat "Pokoj": 395, # room "Biuro": 400, # office "Dom": 200, # house "Blizniak": 230, # semi-detached house "Kamienica": 250, # tenement house "Pietrowy": 260, # storey house "Rekreacyjny": 220, # leisure house "Szeregowy": 240, # terraced house "Wolnostojacy": 210, # detached house "Lokal usługowy": 450 # service area } return available.get(filter_value, 0)
[docs]def decode_category_name(category): """ Decodes category name to it's value :param category: Category name :type category: str :return: Category number :rtype: int """ available = { "nieruchomosci-sprzedam": 101, "nieruchomosci-mam-do-wynajecia": 104 } return available.get(category, 100)
[docs]def get_url_for_filters(payload): """ Parses url from trojmiasto.pl search engine using POST method for given payload of data :param payload: Tuple of tuples containing POST key and argument :type payload: tuple :return: Url generated by trojmiasto.pl search engine :rtype: str """ response = requests.post(SEARCH_URL, payload, headers={'User-Agent': get_random_user_agent()}) html_parser = BeautifulSoup(response.content, "html.parser") url = html_parser.find(class_="nice-select-tsi").find("option").next_sibling.next_sibling.attrs["value"] return url
[docs]def get_url(category, region=None, **filters): """ Creates url for given parameters :param category: Search category :param region: Search region :param filters: Dictionary with additional filters. See :meth:'trojmiastopl.get_category' for reference :type category: str :type region: str :type filters: dict :return: Url for given parameters :rtype: str """ url = "/".join([BASE_URL, category]) + "/" category_id = decode_category_name(category) if filters: if region is not None: payload = (("id_kat", category_id), ("s", region)) else: payload = (("id_kat", category_id),) for k, v in filters.items(): if isinstance(v, tuple): if v[0] is None: v[0] = 0 if v[1] is None: payload += (k, v[0]), continue payload += (k, v[0]), (k, v[1]) continue elif "offer_type" == k: v = decode_type(v) k = "rodzaj_nieruchomosci" elif "data_wprow" == k: available = ["1d", "3d", "1w", "2w"] if v not in available: continue payload += (k, v), try: url = get_url_for_filters(payload) except (AttributeError, requests.HTTPError): raise requests.HTTPError elif region is not None: url += "s,{0}.html".format(region) return url
@caching(key_func=key_sha1)
[docs]def get_content_for_url(url): """ Connects with given url If environmental variable DEBUG is True it will cache response for url in /var/temp directory :param url: Website url :type url: str :return: Response for requested url """ response = requests.get(url, headers={'User-Agent': get_random_user_agent()}) response.raise_for_status() return response