mirror of
https://github.com/supermemoryai/supermemory.git
synced 2026-05-18 23:36:00 +00:00
fix: entities urls might not be there
This commit is contained in:
parent
6bc839510f
commit
e652682671
2 changed files with 68 additions and 1 deletions
67
apps/cf-ai-backend/src/queueConsumer/chunkers/chunkTweet.ts
Normal file
67
apps/cf-ai-backend/src/queueConsumer/chunkers/chunkTweet.ts
Normal file
|
|
@ -0,0 +1,67 @@
|
|||
import { TweetChunks } from "../../types";
|
||||
import chunkText from "./chonker";
|
||||
import { getRawTweet } from "@repo/shared-types/utils";
|
||||
|
||||
interface Tweet {
|
||||
id: string;
|
||||
text: string;
|
||||
links: Array<string>;
|
||||
images: Array<string>;
|
||||
videos: Array<string>;
|
||||
}
|
||||
interface Metadata {
|
||||
tweetId: string;
|
||||
tweetLinks: any[];
|
||||
tweetVids: any[];
|
||||
tweetImages: any[];
|
||||
}
|
||||
|
||||
export interface ThreadTweetData {
|
||||
chunkedTweet: string[];
|
||||
metadata: Metadata;
|
||||
}
|
||||
|
||||
export function chunkThread(threadText: string): TweetChunks {
|
||||
const thread = JSON.parse(threadText);
|
||||
if (typeof thread == "string") {
|
||||
console.log("DA WORKER FAILED DO SOMEHTING FIX DA WROKER");
|
||||
const rawTweet = getRawTweet(thread);
|
||||
const parsedTweet: any = JSON.parse(rawTweet);
|
||||
|
||||
const chunkedTweet = chunkText(parsedTweet.text, 1536);
|
||||
const metadata: Metadata = {
|
||||
tweetId: parsedTweet.id_str,
|
||||
tweetLinks: parsedTweet.entities?.urls.map(
|
||||
(url: any) => url.expanded_url,
|
||||
),
|
||||
tweetVids:
|
||||
parsedTweet.extended_entities?.media
|
||||
.filter((media: any) => media.type === "video")
|
||||
.map((media: any) => media.video_info!.variants[0].url) || [],
|
||||
tweetImages:
|
||||
parsedTweet.extended_entities?.media
|
||||
.filter((media: any) => media.type === "photo")
|
||||
.map((media: any) => media.media_url_https!) || [],
|
||||
};
|
||||
|
||||
const chunks = [{ chunkedTweet: chunkedTweet, metadata }];
|
||||
|
||||
return { type: "tweet", chunks };
|
||||
} else {
|
||||
console.log(JSON.stringify(thread));
|
||||
const chunkedTweets = thread.map((tweet: Tweet) => {
|
||||
const chunkedTweet = chunkText(tweet.text, 1536);
|
||||
|
||||
const metadata = {
|
||||
tweetId: tweet.id,
|
||||
tweetLinks: tweet.links,
|
||||
tweetVids: tweet.videos,
|
||||
tweetImages: tweet.images,
|
||||
};
|
||||
|
||||
return { chunkedTweet, metadata };
|
||||
});
|
||||
|
||||
return { type: "tweet", chunks: chunkedTweets };
|
||||
}
|
||||
}
|
||||
|
|
@ -43,7 +43,7 @@ export function transformTweetData(input: any): Tweet | null {
|
|||
display_text_range: tweet.legacy.display_text_range,
|
||||
entities: {
|
||||
hashtags: tweet.legacy.entities.hashtags,
|
||||
urls: tweet.legacy.entities.urls,
|
||||
urls: tweet.legacy.entities?.urls,
|
||||
user_mentions: tweet.legacy.entities.user_mentions,
|
||||
symbols: tweet.legacy.entities.symbols,
|
||||
},
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue