mirror of
https://github.com/MODSetter/SurfSense.git
synced 2025-09-11 06:44:40 +00:00
Fixed all ruff lint and formatting errors
This commit is contained in:
parent
0a03c42cc5
commit
d359a59f6d
85 changed files with 5520 additions and 3870 deletions
|
@ -1,176 +1,182 @@
|
|||
from notion_client import Client
|
||||
|
||||
|
||||
class NotionHistoryConnector:
|
||||
def __init__(self, token):
|
||||
"""
|
||||
Initialize the NotionPageFetcher with a token.
|
||||
|
||||
|
||||
Args:
|
||||
token (str): Notion integration token
|
||||
"""
|
||||
self.notion = Client(auth=token)
|
||||
|
||||
|
||||
def get_all_pages(self, start_date=None, end_date=None):
|
||||
"""
|
||||
Fetches all pages shared with your integration and their content.
|
||||
|
||||
|
||||
Args:
|
||||
start_date (str, optional): ISO 8601 date string (e.g., "2023-01-01T00:00:00Z")
|
||||
end_date (str, optional): ISO 8601 date string (e.g., "2023-12-31T23:59:59Z")
|
||||
|
||||
|
||||
Returns:
|
||||
list: List of dictionaries containing page data
|
||||
"""
|
||||
# Build the filter for the search
|
||||
# Note: Notion API requires specific filter structure
|
||||
search_params = {}
|
||||
|
||||
|
||||
# Filter for pages only (not databases)
|
||||
search_params["filter"] = {
|
||||
"value": "page",
|
||||
"property": "object"
|
||||
}
|
||||
|
||||
search_params["filter"] = {"value": "page", "property": "object"}
|
||||
|
||||
# Add date filters if provided
|
||||
if start_date or end_date:
|
||||
date_filter = {}
|
||||
|
||||
|
||||
if start_date:
|
||||
date_filter["on_or_after"] = start_date
|
||||
|
||||
|
||||
if end_date:
|
||||
date_filter["on_or_before"] = end_date
|
||||
|
||||
|
||||
# Add the date filter to the search params
|
||||
if date_filter:
|
||||
search_params["sort"] = {
|
||||
"direction": "descending",
|
||||
"timestamp": "last_edited_time"
|
||||
"timestamp": "last_edited_time",
|
||||
}
|
||||
|
||||
|
||||
# First, get a list of all pages the integration has access to
|
||||
search_results = self.notion.search(**search_params)
|
||||
|
||||
|
||||
pages = search_results["results"]
|
||||
all_page_data = []
|
||||
|
||||
|
||||
for page in pages:
|
||||
page_id = page["id"]
|
||||
|
||||
|
||||
# Get detailed page information
|
||||
page_content = self.get_page_content(page_id)
|
||||
|
||||
all_page_data.append({
|
||||
"page_id": page_id,
|
||||
"title": self.get_page_title(page),
|
||||
"content": page_content
|
||||
})
|
||||
|
||||
|
||||
all_page_data.append(
|
||||
{
|
||||
"page_id": page_id,
|
||||
"title": self.get_page_title(page),
|
||||
"content": page_content,
|
||||
}
|
||||
)
|
||||
|
||||
return all_page_data
|
||||
|
||||
|
||||
def get_page_title(self, page):
|
||||
"""
|
||||
Extracts the title from a page object.
|
||||
|
||||
|
||||
Args:
|
||||
page (dict): Notion page object
|
||||
|
||||
|
||||
Returns:
|
||||
str: Page title or a fallback string
|
||||
"""
|
||||
# Title can be in different properties depending on the page type
|
||||
if "properties" in page:
|
||||
# Try to find a title property
|
||||
for prop_name, prop_data in page["properties"].items():
|
||||
for _prop_name, prop_data in page["properties"].items():
|
||||
if prop_data["type"] == "title" and len(prop_data["title"]) > 0:
|
||||
return " ".join([text_obj["plain_text"] for text_obj in prop_data["title"]])
|
||||
|
||||
return " ".join(
|
||||
[text_obj["plain_text"] for text_obj in prop_data["title"]]
|
||||
)
|
||||
|
||||
# If no title found, return the page ID as fallback
|
||||
return f"Untitled page ({page['id']})"
|
||||
|
||||
|
||||
def get_page_content(self, page_id):
|
||||
"""
|
||||
Fetches the content (blocks) of a specific page.
|
||||
|
||||
|
||||
Args:
|
||||
page_id (str): The ID of the page to fetch
|
||||
|
||||
|
||||
Returns:
|
||||
list: List of processed blocks from the page
|
||||
"""
|
||||
blocks = []
|
||||
has_more = True
|
||||
cursor = None
|
||||
|
||||
|
||||
# Paginate through all blocks
|
||||
while has_more:
|
||||
if cursor:
|
||||
response = self.notion.blocks.children.list(block_id=page_id, start_cursor=cursor)
|
||||
response = self.notion.blocks.children.list(
|
||||
block_id=page_id, start_cursor=cursor
|
||||
)
|
||||
else:
|
||||
response = self.notion.blocks.children.list(block_id=page_id)
|
||||
|
||||
|
||||
blocks.extend(response["results"])
|
||||
has_more = response["has_more"]
|
||||
|
||||
|
||||
if has_more:
|
||||
cursor = response["next_cursor"]
|
||||
|
||||
|
||||
# Process nested blocks recursively
|
||||
processed_blocks = []
|
||||
for block in blocks:
|
||||
processed_block = self.process_block(block)
|
||||
processed_blocks.append(processed_block)
|
||||
|
||||
|
||||
return processed_blocks
|
||||
|
||||
|
||||
def process_block(self, block):
|
||||
"""
|
||||
Processes a block and recursively fetches any child blocks.
|
||||
|
||||
|
||||
Args:
|
||||
block (dict): The block to process
|
||||
|
||||
|
||||
Returns:
|
||||
dict: Processed block with content and children
|
||||
"""
|
||||
block_id = block["id"]
|
||||
block_type = block["type"]
|
||||
|
||||
|
||||
# Extract block content based on its type
|
||||
content = self.extract_block_content(block)
|
||||
|
||||
|
||||
# Check if block has children
|
||||
has_children = block.get("has_children", False)
|
||||
child_blocks = []
|
||||
|
||||
|
||||
if has_children:
|
||||
# Fetch and process child blocks
|
||||
children_response = self.notion.blocks.children.list(block_id=block_id)
|
||||
for child_block in children_response["results"]:
|
||||
child_blocks.append(self.process_block(child_block))
|
||||
|
||||
|
||||
return {
|
||||
"id": block_id,
|
||||
"type": block_type,
|
||||
"content": content,
|
||||
"children": child_blocks
|
||||
"children": child_blocks,
|
||||
}
|
||||
|
||||
|
||||
def extract_block_content(self, block):
|
||||
"""
|
||||
Extracts the content from a block based on its type.
|
||||
|
||||
|
||||
Args:
|
||||
block (dict): The block to extract content from
|
||||
|
||||
|
||||
Returns:
|
||||
str: Extracted content as a string
|
||||
"""
|
||||
block_type = block["type"]
|
||||
|
||||
|
||||
# Different block types have different structures
|
||||
if block_type in block and "rich_text" in block[block_type]:
|
||||
return "".join([text_obj["plain_text"] for text_obj in block[block_type]["rich_text"]])
|
||||
return "".join(
|
||||
[text_obj["plain_text"] for text_obj in block[block_type]["rich_text"]]
|
||||
)
|
||||
elif block_type == "image":
|
||||
# Instead of returning the raw URL which may contain sensitive AWS credentials,
|
||||
# return a placeholder or reference to the image
|
||||
|
@ -183,18 +189,21 @@ class NotionHistoryConnector:
|
|||
# Only return the domain part of external URLs to avoid potential sensitive parameters
|
||||
try:
|
||||
from urllib.parse import urlparse
|
||||
|
||||
parsed_url = urlparse(url)
|
||||
return f"[External Image from {parsed_url.netloc}]"
|
||||
except:
|
||||
except Exception:
|
||||
return "[External Image]"
|
||||
elif block_type == "code":
|
||||
language = block["code"]["language"]
|
||||
code_text = "".join([text_obj["plain_text"] for text_obj in block["code"]["rich_text"]])
|
||||
code_text = "".join(
|
||||
[text_obj["plain_text"] for text_obj in block["code"]["rich_text"]]
|
||||
)
|
||||
return f"```{language}\n{code_text}\n```"
|
||||
elif block_type == "equation":
|
||||
return block["equation"]["expression"]
|
||||
# Add more block types as needed
|
||||
|
||||
|
||||
# Return empty string for unsupported block types
|
||||
return ""
|
||||
|
||||
|
@ -203,23 +212,23 @@ class NotionHistoryConnector:
|
|||
# if __name__ == "__main__":
|
||||
# # Simple example of how to use this module
|
||||
# import argparse
|
||||
|
||||
|
||||
# parser = argparse.ArgumentParser(description="Fetch Notion pages using an integration token")
|
||||
# parser.add_argument("--token", help="Your Notion integration token")
|
||||
# parser.add_argument("--start-date", help="Start date in ISO format (e.g., 2023-01-01T00:00:00Z)")
|
||||
# parser.add_argument("--end-date", help="End date in ISO format (e.g., 2023-12-31T23:59:59Z)")
|
||||
# args = parser.parse_args()
|
||||
|
||||
|
||||
# token = args.token
|
||||
# if not token:
|
||||
# token = input("Enter your Notion integration token: ")
|
||||
|
||||
|
||||
# fetcher = NotionPageFetcher(token)
|
||||
|
||||
|
||||
# try:
|
||||
# pages = fetcher.get_all_pages(args.start_date, args.end_date)
|
||||
# print(f"Fetched {len(pages)} pages from Notion")
|
||||
# for page in pages:
|
||||
# print(f"- {page['title']}")
|
||||
# except Exception as e:
|
||||
# print(f"Error: {str(e)}")
|
||||
# print(f"Error: {str(e)}")
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue