Module transcripter.core.youtube_manager
Classes
class YouTubeManager (config: Config)
-
A class to manage interactions with the YouTube API, including fetching video details and transcripts.
Attributes
config
:Config
- Configuration object containing API keys and other settings.
api_client
:Api
- YouTube API client initialized with the provided API key.
Initializes the YouTubeManager with the given configuration.
Args
config
:Config
- Configuration object containing API keys and other settings.
Expand source code
class YouTubeManager: """ A class to manage interactions with the YouTube API, including fetching video details and transcripts. Attributes: config (Config): Configuration object containing API keys and other settings. api_client (Api): YouTube API client initialized with the provided API key. """ def __init__(self, config: Config) -> None: """ Initializes the YouTubeManager with the given configuration. Args: config (Config): Configuration object containing API keys and other settings. """ self.config = config self.api_client = Api(api_key=self.config.YOUTUBE_API_KEY) logger.info("YouTubeManager initialized") def get_all_video_details_from_playlist( self, playlist_id: str ) -> Dict[str, Dict[str, Union[str, int]]]: """ Fetches details of all videos in a given playlist. Args: playlist_id (str): The ID of the playlist to fetch video details from. Returns: Dict[str, Dict[str, Union[str, int]]]: A dictionary where keys are video IDs and values are dictionaries containing video details such as title, publish date, and video ID. """ logger.info(f"Fetching video details for playlist: {playlist_id}") playlist_items = self.api_client.get_playlist_items( playlist_id=playlist_id, limit=50, count=None ).items logger.debug(f"Playlist items: {playlist_items}") return self._get_video_details(playlist_items) def get_all_video_details_from_channel( self, channel_id: str ) -> Dict[str, Dict[str, Union[str, int]]]: """ Fetches details of all videos in a given channel. Args: channel_id (str): The ID of the channel to fetch video details from. Returns: Dict[str, Dict[str, Union[str, int]]]: A dictionary where keys are video IDs and values are dictionaries containing video details such as title, publish date, and video ID. """ logger.info(f"Fetching video details for channel: {channel_id}") if not channel_id: logger.error("Channel ID is None") return {} channel_response = self.api_client.get_channel_info(channel_id=channel_id) if not channel_response.items: logger.error(f"No channel found for ID: {channel_id}") return {} channel_item = channel_response.items[0] if not hasattr(channel_item, "contentDetails") or not hasattr( channel_item.contentDetails, "relatedPlaylists" ): logger.error(f"Channel {channel_id} does not have expected content details") return {} playlist_id = channel_item.contentDetails.relatedPlaylists.uploads return self.get_all_video_details_from_playlist(playlist_id) def get_video_details(self, video_id: str) -> Dict[str, Union[str, int]]: """ Fetches details of a single video. Args: video_id (str): The ID of the video to fetch details for. Returns: Dict[str, Union[str, int]]: A dictionary containing video details such as title, publish date, and video ID. """ logger.info(f"Fetching video details for video: {video_id}") if not video_id: logger.error("Video ID is None") return {} video_response = self.api_client.get_video_by_id(video_id=video_id) video = video_response.items[0] return { "title": video.snippet.title, "publish_date": video.snippet.publishedAt, "video_id": video.id, } def _get_video_details(self, items: List) -> Dict[str, Dict[str, Union[str, int]]]: """ Helper method to extract video details from a list of playlist items. Args: items (List): A list of playlist items. Returns: Dict[str, Dict[str, Union[str, int]]]: A dictionary where keys are video IDs and values are dictionaries containing video details such as title, publish date, and video ID. """ videos = { item.contentDetails.videoId: { "title": item.snippet.title, "publish_date": item.snippet.publishedAt, "video_id": item.contentDetails.videoId, } for item in items } logger.debug(f"Fetched details for {len(videos)} videos") return videos def get_transcript_details_from_video( self, video_id: str ) -> Optional[List[Dict[str, Union[str, float]]]]: """ Fetches the transcript of a given video. Args: video_id (str): The ID of the video to fetch the transcript for. Returns: Optional[List[Dict[str, Union[str, float]]]]: A list of dictionaries containing transcript details such as start time and text, or None if an error occurs. """ logger.info(f"Fetching transcript for video: {video_id}") try: return YouTubeTranscriptApi.get_transcript(video_id) except Exception as e: logger.error(f"Error fetching transcript for video {video_id}: {str(e)}") return None @staticmethod def merge_transcript_chunks( chunks: List[Dict[str, Union[str, float]]], ) -> List[Dict[str, Union[str, float]]]: """ Merges adjacent transcript chunks into larger chunks. Args: chunks (List[Dict[str, Union[str, float]]]): A list of dictionaries containing transcript details such as start time and text. Returns: List[Dict[str, Union[str, float]]]: A list of merged transcript chunks. """ logger.debug(f"Merging {len(chunks)} transcript chunks") merged_list = [ { "start": chunks[i]["start"], "text": chunks[i]["text"] + " " + chunks[i + 1]["text"], } for i in range(0, len(chunks) - 1, 2) ] if len(chunks) % 2 != 0: merged_list.append(chunks[-1]) logger.debug(f"Merged into {len(merged_list)} chunks") return merged_list
Static methods
def merge_transcript_chunks(chunks: List[Dict[str, Union[str, float]]]) ‑> List[Dict[str, Union[str, float]]]
-
Merges adjacent transcript chunks into larger chunks.
Args
chunks
:List[Dict[str, Union[str, float]]]
- A list of dictionaries containing transcript details such as
start time and text.
Returns
List[Dict[str, Union[str, float]]]
- A list of merged transcript chunks.
Methods
def get_all_video_details_from_channel(self, channel_id: str) ‑> Dict[str, Dict[str, Union[str, int]]]
-
Fetches details of all videos in a given channel.
Args
channel_id
:str
- The ID of the channel to fetch video details from.
Returns
Dict[str, Dict[str, Union[str, int]]]
- A dictionary where keys are video IDs and values are dictionaries
containing video details such as title, publish date, and video ID.
def get_all_video_details_from_playlist(self, playlist_id: str) ‑> Dict[str, Dict[str, Union[str, int]]]
-
Fetches details of all videos in a given playlist.
Args
playlist_id
:str
- The ID of the playlist to fetch video details from.
Returns
Dict[str, Dict[str, Union[str, int]]]
- A dictionary where keys are video IDs and values are dictionaries
containing video details such as title, publish date, and video ID.
def get_transcript_details_from_video(self, video_id: str) ‑> Optional[List[Dict[str, Union[str, float]]]]
-
Fetches the transcript of a given video.
Args
video_id
:str
- The ID of the video to fetch the transcript for.
Returns
Optional[List[Dict[str, Union[str, float]]]]
- A list of dictionaries containing transcript details such as
start time and text, or None if an error occurs.
def get_video_details(self, video_id: str) ‑> Dict[str, Union[str, int]]
-
Fetches details of a single video.
Args
video_id
:str
- The ID of the video to fetch details for.
Returns
Dict[str, Union[str, int]]
- A dictionary containing video details such as title, publish date, and video ID.