From c17a948ae091fb41a8bd8c8aacdd8123e4fdbb9a Mon Sep 17 00:00:00 2001 From: Berack96 Date: Tue, 30 Sep 2025 00:34:07 +0200 Subject: [PATCH] Refactor news API integration to use NewsApiWrapper and GnewsWrapper; add tests for Gnews API functionality --- demos/news_api.py | 4 +-- pyproject.toml | 3 +- src/app/news/__init__.py | 5 +-- src/app/news/base.py | 6 ++++ src/app/news/gnews_api.py | 31 +++++++++++++++++ src/app/news/news_api.py | 17 +++++++--- tests/api/test_gnews_api.py | 34 +++++++++++++++++++ tests/api/test_news_api.py | 22 +++++++++++-- tests/conftest.py | 2 -- uv.lock | 66 +++++++++++++++++++++++++++++++++++++ 10 files changed, 175 insertions(+), 15 deletions(-) create mode 100644 src/app/news/gnews_api.py create mode 100644 tests/api/test_gnews_api.py diff --git a/demos/news_api.py b/demos/news_api.py index 0fc4c37..26dab24 100644 --- a/demos/news_api.py +++ b/demos/news_api.py @@ -5,10 +5,10 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../src' ########################################### from dotenv import load_dotenv -from app.news import NewsAPI +from app.news import NewsApiWrapper def main(): - api = NewsAPI() + api = NewsApiWrapper() print("ok") if __name__ == "__main__": diff --git a/pyproject.toml b/pyproject.toml index 3ef7154..b83de19 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,8 +14,6 @@ dependencies = [ "pytest", # ✅ per gestire variabili d'ambiente (generalmente API keys od opzioni) "dotenv", - # 🟡 per fare scraping di pagine web - #"bs4", # ✅ per fare una UI web semplice con input e output "gradio", @@ -34,6 +32,7 @@ dependencies = [ # ✅ per interagire con API di notizie "newsapi-python", + "gnews", ] [tool.pytest.ini_options] diff --git a/src/app/news/__init__.py b/src/app/news/__init__.py index 9b88ff6..957453d 100644 --- a/src/app/news/__init__.py +++ b/src/app/news/__init__.py @@ -1,3 +1,4 @@ -from .news_api import NewsAPI +from .news_api import NewsApiWrapper +from .gnews_api import GnewsWrapper -__all__ = ["NewsAPI"] \ No newline at end of file +__all__ = ["NewsApiWrapper", "GnewsWrapper"] \ No newline at end of file diff --git a/src/app/news/base.py b/src/app/news/base.py index 0391424..8b3f55d 100644 --- a/src/app/news/base.py +++ b/src/app/news/base.py @@ -6,3 +6,9 @@ class Article(BaseModel): title: str = "" description: str = "" +class NewsWrapper: + def get_top_headlines(self, query: str, total: int = 100) -> list[Article]: + raise NotImplementedError("This method should be overridden by subclasses") + def get_latest_news(self, query: str, total: int = 100) -> list[Article]: + raise NotImplementedError("This method should be overridden by subclasses") + diff --git a/src/app/news/gnews_api.py b/src/app/news/gnews_api.py new file mode 100644 index 0000000..53451c9 --- /dev/null +++ b/src/app/news/gnews_api.py @@ -0,0 +1,31 @@ +from gnews import GNews +from .base import Article, NewsWrapper + +def result_to_article(result: dict) -> Article: + article = Article() + article.source = result.get("source", "") + article.time = result.get("publishedAt", "") + article.title = result.get("title", "") + article.description = result.get("description", "") + return article + +class GnewsWrapper(NewsWrapper): + def get_top_headlines(self, query: str, total: int = 100) -> list[Article]: + gnews = GNews(language='en', max_results=total, period='7d') + results = gnews.get_top_news() + + articles = [] + for result in results: + article = result_to_article(result) + articles.append(article) + return articles + + def get_latest_news(self, query: str, total: int = 100) -> list[Article]: + gnews = GNews(language='en', max_results=total, period='7d') + results = gnews.get_news(query) + + articles = [] + for result in results: + article = result_to_article(result) + articles.append(article) + return articles diff --git a/src/app/news/news_api.py b/src/app/news/news_api.py index ce213cf..9629ecd 100644 --- a/src/app/news/news_api.py +++ b/src/app/news/news_api.py @@ -1,7 +1,6 @@ import os import newsapi -from .base import Article - +from .base import Article, NewsWrapper def result_to_article(result: dict) -> Article: article = Article() @@ -11,7 +10,7 @@ def result_to_article(result: dict) -> Article: article.description = result.get("description", "") return article -class NewsAPI: +class NewsApiWrapper(NewsWrapper): def __init__(self): api_key = os.getenv("NEWS_API_KEY") assert api_key is not None, "NEWS_API_KEY environment variable not set" @@ -21,7 +20,7 @@ class NewsAPI: self.language = "en" # TODO Only English articles for now? self.max_page_size = 100 - def get_top_headlines(self, query:str, total:int=100) -> list[Article]: + def get_top_headlines(self, query: str, total: int = 100) -> list[Article]: page_size = min(self.max_page_size, total) pages = (total // page_size) + (1 if total % page_size > 0 else 0) @@ -32,4 +31,14 @@ class NewsAPI: articles.extend(results) return articles + def get_latest_news(self, query: str, total: int = 100) -> list[Article]: + page_size = min(self.max_page_size, total) + pages = (total // page_size) + (1 if total % page_size > 0 else 0) + + articles = [] + for page in range(1, pages + 1): + everything = self.client.get_everything(q=query, language=self.language, sort_by="publishedAt", page_size=page_size, page=page) + results = [result_to_article(article) for article in everything.get("articles", [])] + articles.extend(results) + return articles diff --git a/tests/api/test_gnews_api.py b/tests/api/test_gnews_api.py new file mode 100644 index 0000000..49c8418 --- /dev/null +++ b/tests/api/test_gnews_api.py @@ -0,0 +1,34 @@ +import pytest +from app.news import GnewsWrapper + + +@pytest.mark.news +@pytest.mark.api +class TestGnewsAPI: + + def test_gnews_api_initialization(self): + gnews_api = GnewsWrapper() + assert gnews_api is not None + + def test_gnews_api_get_latest_news(self): + gnews_api = GnewsWrapper() + articles = gnews_api.get_latest_news(query="crypto", total=2) + assert isinstance(articles, list) + assert len(articles) == 2 + for article in articles: + assert hasattr(article, 'source') + assert hasattr(article, 'time') + assert hasattr(article, 'title') + assert hasattr(article, 'description') + + def test_gnews_api_get_top_headlines(self): + news_api = GnewsWrapper() + articles = news_api.get_top_headlines(query="crypto", total=2) + assert isinstance(articles, list) + assert len(articles) == 2 + for article in articles: + assert hasattr(article, 'source') + assert hasattr(article, 'time') + assert hasattr(article, 'title') + assert hasattr(article, 'description') + diff --git a/tests/api/test_news_api.py b/tests/api/test_news_api.py index 9558882..4778d5b 100644 --- a/tests/api/test_news_api.py +++ b/tests/api/test_news_api.py @@ -1,13 +1,29 @@ -from app.news import NewsAPI +import pytest +from app.news import NewsApiWrapper + +@pytest.mark.news +@pytest.mark.api class TestNewsAPI: def test_news_api_initialization(self): - news_api = NewsAPI() + news_api = NewsApiWrapper() assert news_api.client is not None + def test_news_api_get_latest_news(self): + news_api = NewsApiWrapper() + articles = news_api.get_latest_news(query="crypto", total=2) + assert isinstance(articles, list) + assert len(articles) > 0 # Ensure we got some articles (apparently it doesn't always return the requested number) + for article in articles: + assert hasattr(article, 'source') + assert hasattr(article, 'time') + assert hasattr(article, 'title') + assert hasattr(article, 'description') + + def test_news_api_get_top_headlines(self): - news_api = NewsAPI() + news_api = NewsApiWrapper() articles = news_api.get_top_headlines(query="crypto", total=2) assert isinstance(articles, list) assert len(articles) > 0 # Ensure we got some articles (apparently it doesn't always return the requested number) diff --git a/tests/conftest.py b/tests/conftest.py index f2601b1..21502d1 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -30,7 +30,6 @@ def pytest_collection_modifyitems(config, items): """Modifica automaticamente gli item di test aggiungendogli marker basati sul nome""" markers_to_add = { - "api": pytest.mark.api, "coinbase": pytest.mark.api, "cryptocompare": pytest.mark.api, "overview": pytest.mark.slow, @@ -38,7 +37,6 @@ def pytest_collection_modifyitems(config, items): "gemini": pytest.mark.gemini, "ollama_gpt": pytest.mark.ollama_gpt, "ollama_qwen": pytest.mark.ollama_qwen, - "news": pytest.mark.news, } for item in items: diff --git a/uv.lock b/uv.lock index 26356c0..7eb69ba 100644 --- a/uv.lock +++ b/uv.lock @@ -130,6 +130,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/df/73/b6e24bd22e6720ca8ee9a85a0c4a2971af8497d8f3193fa05390cbd46e09/backoff-2.2.1-py3-none-any.whl", hash = "sha256:63579f9a0628e06278f7e47b7d7d5b6ce20dc65c5e96a6f3ca99a6adca0396e8", size = 15148, upload-time = "2022-10-05T19:19:30.546Z" }, ] +[[package]] +name = "beautifulsoup4" +version = "4.14.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "soupsieve" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/77/e9/df2358efd7659577435e2177bfa69cba6c33216681af51a707193dec162a/beautifulsoup4-4.14.2.tar.gz", hash = "sha256:2a98ab9f944a11acee9cc848508ec28d9228abfd522ef0fad6a02a72e0ded69e", size = 625822, upload-time = "2025-09-29T10:05:42.613Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/94/fe/3aed5d0be4d404d12d36ab97e2f1791424d9ca39c2f754a6285d59a3b01d/beautifulsoup4-4.14.2-py3-none-any.whl", hash = "sha256:5ef6fa3a8cbece8488d66985560f97ed091e22bbc4e9c2338508a9d5de6d4515", size = 106392, upload-time = "2025-09-29T10:05:43.771Z" }, +] + [[package]] name = "brotli" version = "1.1.0" @@ -310,6 +323,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/87/22/f020c047ae1346613db9322638186468238bcfa8849b4668a22b97faad65/dateparser-1.2.2-py3-none-any.whl", hash = "sha256:5a5d7211a09013499867547023a2a0c91d5a27d15dd4dbcea676ea9fe66f2482", size = 315453, upload-time = "2025-06-26T09:29:21.412Z" }, ] +[[package]] +name = "dnspython" +version = "2.8.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/8c/8b/57666417c0f90f08bcafa776861060426765fdb422eb10212086fb811d26/dnspython-2.8.0.tar.gz", hash = "sha256:181d3c6996452cb1189c4046c61599b84a5a86e099562ffde77d26984ff26d0f", size = 368251, upload-time = "2025-09-07T18:58:00.022Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ba/5a/18ad964b0086c6e62e2e7500f7edc89e3faa45033c71c1893d34eed2b2de/dnspython-2.8.0-py3-none-any.whl", hash = "sha256:01d9bbc4a2d76bf0db7c1f729812ded6d912bd318d3b1cf81d30c0f845dbf3af", size = 331094, upload-time = "2025-09-07T18:57:58.071Z" }, +] + [[package]] name = "docstring-parser" version = "0.17.0" @@ -344,6 +366,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/32/e4/c543271a8018874b7f682bf6156863c416e1334b8ed3e51a69495c5d4360/fastapi-0.116.2-py3-none-any.whl", hash = "sha256:c3a7a8fb830b05f7e087d920e0d786ca1fc9892eb4e9a84b227be4c1bc7569db", size = 95670, upload-time = "2025-09-16T18:29:21.329Z" }, ] +[[package]] +name = "feedparser" +version = "6.0.12" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "sgmllib3k" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/dc/79/db7edb5e77d6dfbc54d7d9df72828be4318275b2e580549ff45a962f6461/feedparser-6.0.12.tar.gz", hash = "sha256:64f76ce90ae3e8ef5d1ede0f8d3b50ce26bcce71dd8ae5e82b1cd2d4a5f94228", size = 286579, upload-time = "2025-09-10T13:33:59.486Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4e/eb/c96d64137e29ae17d83ad2552470bafe3a7a915e85434d9942077d7fd011/feedparser-6.0.12-py3-none-any.whl", hash = "sha256:6bbff10f5a52662c00a2e3f86a38928c37c48f77b3c511aedcd51de933549324", size = 81480, upload-time = "2025-09-10T13:33:58.022Z" }, +] + [[package]] name = "ffmpy" version = "0.6.1" @@ -421,6 +455,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/01/61/d4b89fec821f72385526e1b9d9a3a0385dda4a72b206d28049e2c7cd39b8/gitpython-3.1.45-py3-none-any.whl", hash = "sha256:8908cb2e02fb3b93b7eb0f2827125cb699869470432cc885f019b8fd0fccff77", size = 208168, upload-time = "2025-07-24T03:45:52.517Z" }, ] +[[package]] +name = "gnews" +version = "0.4.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "beautifulsoup4" }, + { name = "dnspython" }, + { name = "feedparser" }, + { name = "requests" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/6c/65/d4b19ebde3edd4d0cb63660fe61e9777de1dd35ea819cb72a5b53002bb97/gnews-0.4.2.tar.gz", hash = "sha256:5016cf5299f42ea072adb295abe5e9f093c5c422da2c12e6661d1dcdbc56d011", size = 24847, upload-time = "2025-07-27T13:46:54.717Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6f/77/00b21cce68b6041e78edf23efbc95eea6a4555cd474594b7360d1b9e4444/gnews-0.4.2-py3-none-any.whl", hash = "sha256:ed1fa603a7edeb3886925e756b114afb1e0c5b7b9f56fe5ebeedeeb730d2a9c4", size = 18142, upload-time = "2025-07-27T13:46:53.848Z" }, +] + [[package]] name = "google-auth" version = "2.40.3" @@ -1164,6 +1213,12 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/6a/23/8146aad7d88f4fcb3a6218f41a60f6c2d4e3a72de72da1825dc7c8f7877c/semantic_version-2.10.0-py2.py3-none-any.whl", hash = "sha256:de78a3b8e0feda74cabc54aab2da702113e33ac9d9eb9d2389bcf1f58b7d9177", size = 15552, upload-time = "2022-05-26T13:35:21.206Z" }, ] +[[package]] +name = "sgmllib3k" +version = "1.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/9e/bd/3704a8c3e0942d711c1299ebf7b9091930adae6675d7c8f476a7ce48653c/sgmllib3k-1.0.0.tar.gz", hash = "sha256:7868fb1c8bfa764c1ac563d3cf369c381d1325d36124933a726f29fcdaa812e9", size = 5750, upload-time = "2010-08-24T14:33:52.445Z" } + [[package]] name = "shellingham" version = "1.5.4" @@ -1200,6 +1255,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235, upload-time = "2024-02-25T23:20:01.196Z" }, ] +[[package]] +name = "soupsieve" +version = "2.8" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/6d/e6/21ccce3262dd4889aa3332e5a119a3491a95e8f60939870a3a035aabac0d/soupsieve-2.8.tar.gz", hash = "sha256:e2dd4a40a628cb5f28f6d4b0db8800b8f581b65bb380b97de22ba5ca8d72572f", size = 103472, upload-time = "2025-08-27T15:39:51.78Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/14/a0/bb38d3b76b8cae341dad93a2dd83ab7462e6dbcdd84d43f54ee60a8dc167/soupsieve-2.8-py3-none-any.whl", hash = "sha256:0cc76456a30e20f5d7f2e14a98a4ae2ee4e5abdc7c5ea0aafe795f344bc7984c", size = 36679, upload-time = "2025-08-27T15:39:50.179Z" }, +] + [[package]] name = "starlette" version = "0.48.0" @@ -1308,6 +1372,7 @@ dependencies = [ { name = "agno" }, { name = "coinbase-advanced-py" }, { name = "dotenv" }, + { name = "gnews" }, { name = "google-genai" }, { name = "gradio" }, { name = "newsapi-python" }, @@ -1321,6 +1386,7 @@ requires-dist = [ { name = "agno" }, { name = "coinbase-advanced-py" }, { name = "dotenv" }, + { name = "gnews" }, { name = "google-genai" }, { name = "gradio" }, { name = "newsapi-python" },