Add DuckDuckGo and Google News wrappers; refactor CryptoPanic and NewsAPI
- Implemented DuckDuckGoWrapper for news retrieval using DuckDuckGo tools. - Added GoogleNewsWrapper for accessing Google News RSS feed. - Refactored CryptoPanicWrapper to unify get_top_headlines and get_latest_news methods. - Updated NewsApiWrapper to simplify top headlines retrieval. - Added tests for DuckDuckGo and Google News wrappers. - Enhanced documentation for CryptoPanicWrapper and NewsApiWrapper. - Created base module for social media integrations.
This commit is contained in:
@@ -1,18 +1,32 @@
|
||||
from app.utils.wrapper_handler import WrapperHandler
|
||||
from .base import NewsWrapper, Article
|
||||
from .news_api import NewsApiWrapper
|
||||
from .gnews_api import GnewsWrapper
|
||||
from .gnews_api import GoogleNewsWrapper
|
||||
from .cryptopanic_api import CryptoPanicWrapper
|
||||
from .duckduckgo import DuckDuckGoWrapper
|
||||
|
||||
__all__ = ["NewsApiWrapper", "GnewsWrapper", "CryptoPanicWrapper"]
|
||||
__all__ = ["NewsApiWrapper", "GoogleNewsWrapper", "CryptoPanicWrapper", "DuckDuckGoWrapper"]
|
||||
|
||||
|
||||
class NewsAPIs(NewsWrapper):
|
||||
"""
|
||||
A wrapper class that aggregates multiple news API wrappers and tries them in order until one succeeds.
|
||||
This class uses the WrapperHandler to manage multiple NewsWrapper instances.
|
||||
It includes, and tries, the following news API wrappers in this order:
|
||||
- GnewsWrapper
|
||||
- DuckDuckGoWrapper
|
||||
- NewsApiWrapper
|
||||
- CryptoPanicWrapper
|
||||
|
||||
It provides methods to get top headlines and latest news by delegating the calls to the first successful wrapper.
|
||||
If all wrappers fail, it raises an exception.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
wrappers = [GnewsWrapper, NewsApiWrapper, CryptoPanicWrapper]
|
||||
wrappers = [GoogleNewsWrapper, DuckDuckGoWrapper, NewsApiWrapper, CryptoPanicWrapper]
|
||||
self.wrapper_handler: WrapperHandler[NewsWrapper] = WrapperHandler.build_wrappers(wrappers)
|
||||
|
||||
def get_top_headlines(self, query: str, total: int = 100) -> list[Article]:
|
||||
return self.wrapper_handler.try_call(lambda w: w.get_top_headlines(query, total))
|
||||
def get_top_headlines(self, total: int = 100) -> list[Article]:
|
||||
return self.wrapper_handler.try_call(lambda w: w.get_top_headlines(total))
|
||||
def get_latest_news(self, query: str, total: int = 100) -> list[Article]:
|
||||
return self.wrapper_handler.try_call(lambda w: w.get_latest_news(query, total))
|
||||
|
||||
@@ -7,8 +7,29 @@ class Article(BaseModel):
|
||||
description: str = ""
|
||||
|
||||
class NewsWrapper:
|
||||
def get_top_headlines(self, query: str, total: int = 100) -> list[Article]:
|
||||
raise NotImplementedError("This method should be overridden by subclasses")
|
||||
def get_latest_news(self, query: str, total: int = 100) -> list[Article]:
|
||||
"""
|
||||
Base class for news API wrappers.
|
||||
All news API wrappers should inherit from this class and implement the methods.
|
||||
"""
|
||||
|
||||
def get_top_headlines(self, total: int = 100) -> list[Article]:
|
||||
"""
|
||||
Get top headlines, optionally limited by total.
|
||||
Args:
|
||||
total (int): The maximum number of articles to return.
|
||||
Returns:
|
||||
list[Article]: A list of Article objects.
|
||||
"""
|
||||
raise NotImplementedError("This method should be overridden by subclasses")
|
||||
|
||||
def get_latest_news(self, query: str, total: int = 100) -> list[Article]:
|
||||
"""
|
||||
Get latest news based on a query.
|
||||
Args:
|
||||
query (str): The search query.
|
||||
total (int): The maximum number of articles to return.
|
||||
Returns:
|
||||
list[Article]: A list of Article objects.
|
||||
"""
|
||||
raise NotImplementedError("This method should be overridden by subclasses")
|
||||
|
||||
|
||||
@@ -31,6 +31,13 @@ def get_articles(response: dict) -> list[Article]:
|
||||
return articles
|
||||
|
||||
class CryptoPanicWrapper(NewsWrapper):
|
||||
"""
|
||||
A wrapper for the CryptoPanic API (Documentation: https://cryptopanic.com/developers/api/)
|
||||
Requires an API key set in the environment variable CRYPTOPANIC_API_KEY.
|
||||
It is free to use, but has rate limits and restrictions based on the plan type (the free plan is 'developer' with 100 req/month).
|
||||
Supports different plan types via the CRYPTOPANIC_API_PLAN environment variable (developer, growth, enterprise).
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.api_key = os.getenv("CRYPTOPANIC_API_KEY", "")
|
||||
assert self.api_key, "CRYPTOPANIC_API_KEY environment variable not set"
|
||||
@@ -55,7 +62,10 @@ class CryptoPanicWrapper(NewsWrapper):
|
||||
def set_filter(self, filter: CryptoPanicFilter):
|
||||
self.filter = filter
|
||||
|
||||
def get_top_headlines(self, query: str, total: int = 100) -> list[Article]:
|
||||
def get_top_headlines(self, total: int = 100) -> list[Article]:
|
||||
return self.get_latest_news("", total) # same endpoint so just call the other method
|
||||
|
||||
def get_latest_news(self, query: str, total: int = 100) -> list[Article]:
|
||||
params = self.get_base_params()
|
||||
params['currencies'] = query
|
||||
|
||||
@@ -65,6 +75,3 @@ class CryptoPanicWrapper(NewsWrapper):
|
||||
json_response = response.json()
|
||||
articles = get_articles(json_response)
|
||||
return articles[:total]
|
||||
|
||||
def get_latest_news(self, query: str, total: int = 100) -> list[Article]:
|
||||
return self.get_top_headlines(query, total) # same endpoint for both, so just call it
|
||||
|
||||
32
src/app/news/duckduckgo.py
Normal file
32
src/app/news/duckduckgo.py
Normal file
@@ -0,0 +1,32 @@
|
||||
import json
|
||||
from .base import Article, NewsWrapper
|
||||
from agno.tools.duckduckgo import DuckDuckGoTools
|
||||
|
||||
def create_article(result: dict) -> Article:
|
||||
article = Article()
|
||||
article.source = result.get("source", "")
|
||||
article.time = result.get("date", "")
|
||||
article.title = result.get("title", "")
|
||||
article.description = result.get("body", "")
|
||||
return article
|
||||
|
||||
class DuckDuckGoWrapper(NewsWrapper):
|
||||
"""
|
||||
A wrapper for DuckDuckGo News search using the Tool from agno.tools.duckduckgo.
|
||||
It can be rewritten to use direct API calls if needed in the future, but currently is easy to write and use.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.tool = DuckDuckGoTools()
|
||||
self.query = "crypto"
|
||||
|
||||
def get_top_headlines(self, total: int = 100) -> list[Article]:
|
||||
results = self.tool.duckduckgo_news(self.query, max_results=total)
|
||||
json_results = json.loads(results)
|
||||
return [create_article(result) for result in json_results]
|
||||
|
||||
def get_latest_news(self, query: str, total: int = 100) -> list[Article]:
|
||||
results = self.tool.duckduckgo_news(query or self.query, max_results=total)
|
||||
json_results = json.loads(results)
|
||||
return [create_article(result) for result in json_results]
|
||||
|
||||
@@ -9,8 +9,13 @@ def result_to_article(result: dict) -> Article:
|
||||
article.description = result.get("description", "")
|
||||
return article
|
||||
|
||||
class GnewsWrapper(NewsWrapper):
|
||||
def get_top_headlines(self, query: str, total: int = 100) -> list[Article]:
|
||||
class GoogleNewsWrapper(NewsWrapper):
|
||||
"""
|
||||
A wrapper for the Google News RSS Feed (Documentation: https://github.com/ranahaani/GNews/?tab=readme-ov-file#about-gnews)
|
||||
It does not require an API key and is free to use.
|
||||
"""
|
||||
|
||||
def get_top_headlines(self, total: int = 100) -> list[Article]:
|
||||
gnews = GNews(language='en', max_results=total, period='7d')
|
||||
results = gnews.get_top_news()
|
||||
|
||||
|
||||
@@ -11,6 +11,12 @@ def result_to_article(result: dict) -> Article:
|
||||
return article
|
||||
|
||||
class NewsApiWrapper(NewsWrapper):
|
||||
"""
|
||||
A wrapper for the NewsAPI (Documentation: https://newsapi.org/docs/get-started)
|
||||
Requires an API key set in the environment variable NEWS_API_KEY.
|
||||
It is free to use, but has rate limits and restrictions based on the plan type (the free plan is 'developer' with 100 req/day).
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
api_key = os.getenv("NEWS_API_KEY")
|
||||
assert api_key is not None, "NEWS_API_KEY environment variable not set"
|
||||
@@ -20,13 +26,13 @@ class NewsApiWrapper(NewsWrapper):
|
||||
self.language = "en" # TODO Only English articles for now?
|
||||
self.max_page_size = 100
|
||||
|
||||
def get_top_headlines(self, query: str, total: int = 100) -> list[Article]:
|
||||
def get_top_headlines(self, total: int = 100) -> list[Article]:
|
||||
page_size = min(self.max_page_size, total)
|
||||
pages = (total // page_size) + (1 if total % page_size > 0 else 0)
|
||||
|
||||
articles = []
|
||||
for page in range(1, pages + 1):
|
||||
headlines = self.client.get_top_headlines(q=query, category=self.category, language=self.language, page_size=page_size, page=page)
|
||||
headlines = self.client.get_top_headlines(q="", category=self.category, language=self.language, page_size=page_size, page=page)
|
||||
results = [result_to_article(article) for article in headlines.get("articles", [])]
|
||||
articles.extend(results)
|
||||
return articles
|
||||
|
||||
1
src/app/social/__init.py
Normal file
1
src/app/social/__init.py
Normal file
@@ -0,0 +1 @@
|
||||
from .base import SocialWrapper
|
||||
0
src/app/social/base.py
Normal file
0
src/app/social/base.py
Normal file
Reference in New Issue
Block a user