diff --git a/overlays/yt-dlp/ard.patch b/overlays/yt-dlp/ard.patch new file mode 100644 index 0000000..2675553 --- /dev/null +++ b/overlays/yt-dlp/ard.patch @@ -0,0 +1,287 @@ +Url: https://github.com/yt-dlp/yt-dlp/pull/8760 +From e73c2f42f5947e674096a5bd31487bed207ba705 Mon Sep 17 00:00:00 2001 +From: tobi +Date: Tue, 12 Dec 2023 10:44:28 +0100 +Subject: [PATCH 1/4] fixed "status code 513" error while retrieving json + metadata + +--- + yt_dlp/extractor/ard.py | 51 ++++++++--------------------------------- + 1 file changed, 9 insertions(+), 42 deletions(-) + +diff --git a/yt_dlp/extractor/ard.py b/yt_dlp/extractor/ard.py +index ca1faa7d0bc..d0f464fa35f 100644 +--- a/yt_dlp/extractor/ard.py ++++ b/yt_dlp/extractor/ard.py +@@ -576,7 +576,7 @@ def _ARD_extract_playlist(self, url, playlist_id, display_id, client, mode): + ie=ARDBetaMediathekIE.ie_key())) + + if (show_page['pagination']['pageSize'] * (pageNumber + 1) +- >= show_page['pagination']['totalElements']): ++ >= show_page['pagination']['totalElements']): + # we've processed enough pages to get all playlist entries + break + pageNumber = pageNumber + 1 +@@ -593,50 +593,17 @@ def _real_extract(self, url): + return self._ARD_extract_playlist(url, video_id, display_id, client, playlist_type) + + player_page = self._download_json( +- 'https://api.ardmediathek.de/public-gateway', +- display_id, data=json.dumps({ +- 'query': '''{ +- playerPage(client:"%s", clipId: "%s") { +- blockedByFsk +- broadcastedOn +- maturityContentRating +- mediaCollection { +- _duration +- _geoblocked +- _isLive +- _mediaArray { +- _mediaStreamArray { +- _quality +- _server +- _stream +- } +- } +- _previewImage +- _subtitleUrl +- _type +- } +- show { +- title +- } +- image { +- src +- } +- synopsis +- title +- tracking { +- atiCustomVars { +- contentId +- } +- } +- } +-}''' % (client, video_id), +- }).encode(), headers={ ++ f"https://api.ardmediathek.de/page-gateway/pages/ard/item/{video_id}", display_id, headers={ + 'Content-Type': 'application/json' +- })['data']['playerPage'] +- title = player_page['title'] ++ } ++ ) ++ + content_id = str_or_none(try_get( + player_page, lambda x: x['tracking']['atiCustomVars']['contentId'])) +- media_collection = player_page.get('mediaCollection') or {} ++ ++ player_page = try_get(player_page.get("widgets"), lambda x: x[0]) ++ title = player_page['title'] ++ media_collection = player_page.get('mediaCollection', {}).get("embedded") or {} + if not media_collection and content_id: + media_collection = self._download_json( + 'https://www.ardmediathek.de/play/media/' + content_id, + +From d989e408ff79ae5f2847ef17458571e2b266f5af Mon Sep 17 00:00:00 2001 +From: tobi +Date: Sat, 16 Dec 2023 14:18:48 +0100 +Subject: [PATCH 2/4] added back public api code and integrated it with the fix + for error 503 + +--- + yt_dlp/extractor/ard.py | 80 +++++++++++++++++++++++++++++++++++------ + 1 file changed, 69 insertions(+), 11 deletions(-) + +diff --git a/yt_dlp/extractor/ard.py b/yt_dlp/extractor/ard.py +index d0f464fa35f..98a47f9de35 100644 +--- a/yt_dlp/extractor/ard.py ++++ b/yt_dlp/extractor/ard.py +@@ -19,6 +19,7 @@ + xpath_text, + ) + from ..compat import compat_etree_fromstring ++from ..networking.exceptions import HTTPError + + + class ARDMediathekBaseIE(InfoExtractor): +@@ -576,7 +577,7 @@ def _ARD_extract_playlist(self, url, playlist_id, display_id, client, mode): + ie=ARDBetaMediathekIE.ie_key())) + + if (show_page['pagination']['pageSize'] * (pageNumber + 1) +- >= show_page['pagination']['totalElements']): ++ >= show_page['pagination']['totalElements']): + # we've processed enough pages to get all playlist entries + break + pageNumber = pageNumber + 1 +@@ -592,18 +593,75 @@ def _real_extract(self, url): + # TODO: Extract only specified season + return self._ARD_extract_playlist(url, video_id, display_id, client, playlist_type) + +- player_page = self._download_json( +- f"https://api.ardmediathek.de/page-gateway/pages/ard/item/{video_id}", display_id, headers={ +- 'Content-Type': 'application/json' +- } +- ) +- +- content_id = str_or_none(try_get( +- player_page, lambda x: x['tracking']['atiCustomVars']['contentId'])) ++ try: ++ player_page = self._download_json( ++ 'https://api.ardmediathek.de/public-gateway', ++ display_id, data=json.dumps({ ++ 'query': '''{ ++ playerPage(client:"%s", clipId: "%s") { ++ blockedByFsk ++ broadcastedOn ++ maturityContentRating ++ mediaCollection { ++ _duration ++ _geoblocked ++ _isLive ++ _mediaArray { ++ _mediaStreamArray { ++ _quality ++ _server ++ _stream ++ } ++ } ++ _previewImage ++ _subtitleUrl ++ _type ++ } ++ show { ++ title ++ } ++ image { ++ src ++ } ++ synopsis ++ title ++ tracking { ++ atiCustomVars { ++ contentId ++ } ++ } ++ } ++}''' % (client, video_id), ++ }).encode(), headers={ ++ 'Content-Type': 'application/json' ++ })['data']['playerPage'] ++ ++ content_id = str_or_none(try_get( ++ player_page, lambda x: x['tracking']['atiCustomVars']['contentId'])) ++ except ExtractorError as e: ++ # try to get data from website API endpoint as public gateway failed with 503 ++ if isinstance(e.cause, HTTPError) and e.cause.status == 503: ++ player_page = self._download_json( ++ f"https://api.ardmediathek.de/page-gateway/pages/ard/item/{video_id}", display_id, headers={ ++ 'Content-Type': 'application/json' ++ } ++ ) ++ ++ # get the content id before overriding the player_page outer json below ++ content_id = str_or_none(try_get( ++ player_page, lambda x: x['tracking']['atiCustomVars']['contentId'])) ++ ++ player_page = try_get(player_page.get("widgets"), lambda x: x[0]) ++ else: ++ raise e + +- player_page = try_get(player_page.get("widgets"), lambda x: x[0]) + title = player_page['title'] +- media_collection = player_page.get('mediaCollection', {}).get("embedded") or {} ++ media_collection = player_page.get('mediaCollection', {}) or {} ++ ++ # if embedded is present, the website API was used ++ # the required attributes stored within the "embedded" json object ++ if "embedded" in media_collection: ++ media_collection = media_collection["embedded"] + if not media_collection and content_id: + media_collection = self._download_json( + 'https://www.ardmediathek.de/play/media/' + content_id, + +From 4db23c02aa9e2fb7d3931149d97548bea22439c5 Mon Sep 17 00:00:00 2001 +From: tobi +Date: Thu, 21 Dec 2023 17:41:18 +0100 +Subject: [PATCH 3/4] added requested improvements + +--- + yt_dlp/extractor/ard.py | 28 +++++++++++++--------------- + 1 file changed, 13 insertions(+), 15 deletions(-) + +diff --git a/yt_dlp/extractor/ard.py b/yt_dlp/extractor/ard.py +index 98a47f9de35..21b37bae1d9 100644 +--- a/yt_dlp/extractor/ard.py ++++ b/yt_dlp/extractor/ard.py +@@ -636,32 +636,30 @@ def _real_extract(self, url): + 'Content-Type': 'application/json' + })['data']['playerPage'] + +- content_id = str_or_none(try_get( +- player_page, lambda x: x['tracking']['atiCustomVars']['contentId'])) ++ raw_player_page = player_page + except ExtractorError as e: +- # try to get data from website API endpoint as public gateway failed with 503 +- if isinstance(e.cause, HTTPError) and e.cause.status == 503: ++ if not isinstance(e.cause, HTTPError) or e.cause.status != 503: ++ raise ++ else: # try to get data from website API endpoint as public gateway failed with 503 + player_page = self._download_json( +- f"https://api.ardmediathek.de/page-gateway/pages/ard/item/{video_id}", display_id, headers={ ++ f'https://api.ardmediathek.de/page-gateway/pages/ard/item/{video_id}', display_id, headers={ + 'Content-Type': 'application/json' +- } ++ }, note='Downloading fallback JSON metadata' + ) + +- # get the content id before overriding the player_page outer json below +- content_id = str_or_none(try_get( +- player_page, lambda x: x['tracking']['atiCustomVars']['contentId'])) ++ raw_player_page = player_page ++ player_page = try_get(player_page.get('widgets'), lambda x: x[0]) + +- player_page = try_get(player_page.get("widgets"), lambda x: x[0]) +- else: +- raise e ++ content_id = str_or_none(try_get( ++ raw_player_page, lambda x: x['tracking']['atiCustomVars']['contentId'])) + + title = player_page['title'] + media_collection = player_page.get('mediaCollection', {}) or {} + + # if embedded is present, the website API was used +- # the required attributes stored within the "embedded" json object +- if "embedded" in media_collection: +- media_collection = media_collection["embedded"] ++ # the required attributes stored within the 'embedded' json object ++ if 'embedded' in media_collection: ++ media_collection = media_collection['embedded'] + if not media_collection and content_id: + media_collection = self._download_json( + 'https://www.ardmediathek.de/play/media/' + content_id, + +From cf5ebbc90bdec8bf1461a1c9beeb69a68829af27 Mon Sep 17 00:00:00 2001 +From: Tobias Bittner +Date: Thu, 21 Dec 2023 20:48:07 +0100 +Subject: [PATCH 4/4] accepted suggestion which removes hanging indent + +Co-authored-by: Simon Sawicki +--- + yt_dlp/extractor/ard.py | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/yt_dlp/extractor/ard.py b/yt_dlp/extractor/ard.py +index 21b37bae1d9..713aa48162b 100644 +--- a/yt_dlp/extractor/ard.py ++++ b/yt_dlp/extractor/ard.py +@@ -644,8 +644,7 @@ def _real_extract(self, url): + player_page = self._download_json( + f'https://api.ardmediathek.de/page-gateway/pages/ard/item/{video_id}', display_id, headers={ + 'Content-Type': 'application/json' +- }, note='Downloading fallback JSON metadata' +- ) ++ }, note='Downloading fallback JSON metadata') + + raw_player_page = player_page + player_page = try_get(player_page.get('widgets'), lambda x: x[0]) + diff --git a/overlays/yt-dlp/default.nix b/overlays/yt-dlp/default.nix new file mode 100644 index 0000000..065440f --- /dev/null +++ b/overlays/yt-dlp/default.nix @@ -0,0 +1,7 @@ +self: super: { + yt-dlp = super.yt-dlp.overrideAttrs(old: { + patches = (old.patches or []) ++ [ + ./ard.patch + ]; + }); +}