[nzz] Relax kaltura regex

2020-11-16 09:42:26 +00:00 · 2018-11-20 20:50:40 +01:00 · 2018-11-20 20:50:40 +01:00 · 15ed5a2784
parent 2e1280ed43
commit 15ed5a2784
1 changed files with 10 additions and 3 deletions
--- a/youtube_dl/extractor/nzz.py
+++ b/youtube_dl/extractor/nzz.py
@ -11,20 +11,27 @@ from ..utils import (
 class NZZIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?nzz\.ch/(?:[^/]+/)*[^/?#]+-ld\.(?P<id>\d+)'
-    _TEST = {
+    _TESTS = [{
        'url': 'http://www.nzz.ch/zuerich/gymizyte/gymizyte-schreiben-schueler-heute-noch-diktate-ld.9153',
        'info_dict': {
            'id': '9153',
        },
        'playlist_mincount': 6,
-    }
+    }, {
        'url': 'https://www.nzz.ch/video/nzz-standpunkte/cvp-auf-der-suche-nach-dem-mass-der-mitte-ld.1368112',
        'info_dict': {
            'id': '1368112',
        },
        'playlist_count': 1,
    }]
    def _real_extract(self, url):
        page_id = self._match_id(url)
        webpage = self._download_webpage(url, page_id)
        entries = []
-        for player_element in re.findall(r'(<[^>]+class="kalturaPlayer"[^>]*>)', webpage):
+        for player_element in re.findall(
                r'(<[^>]+class="kalturaPlayer[^"]*"[^>]*>)', webpage):
            player_params = extract_attributes(player_element)
            if player_params.get('data-type') not in ('kaltura_singleArticle',):
                self.report_warning('Unsupported player type')