1
0
mirror of https://github.com/yt-dlp/yt-dlp synced 2024-06-29 05:54:33 +00:00

Compare commits

...

7 Commits

Author SHA1 Message Date
Frank Aurich
d875a0dab0
Merge e7079c3a5a into f3411af12e 2024-06-26 14:01:56 +03:00
megumin
f3411af12e
[ie/matchtv] Fix extractor (#10190)
Authored by: megumintyan
2024-06-25 00:49:09 +02:00
Frank Aurich
e7079c3a5a Update KiKA extractor:
- Use regular expression suggested by dirkf
  - Update existing test case, and add a second one with different URL schema
  - Remove static 'uploader' element
2024-05-15 22:08:43 +02:00
pukkandan
b96401f7c4 Update yt_dlp/extractor/kika.py 2024-05-15 22:08:43 +02:00
Frank Aurich
5d9a4a223c Incorporate changes suggested during code review 2024-05-15 22:08:43 +02:00
Frank Aurich
0408452f4b Remove Kika support from MDR extractor 2024-05-15 22:08:42 +02:00
Frank Aurich
164fbc7fc3 [KiKa] Add extractor 2024-05-15 22:08:42 +02:00
4 changed files with 107 additions and 76 deletions

View File

@ -947,6 +947,7 @@
)
from .kicker import KickerIE
from .kickstarter import KickStarterIE
from .kika import KikaIE
from .kinja import KinjaEmbedIE
from .kinopoisk import KinoPoiskIE
from .kommunetv import KommunetvIE

91
yt_dlp/extractor/kika.py Normal file
View File

@ -0,0 +1,91 @@
from .common import InfoExtractor
from ..utils import (
determine_ext,
parse_duration,
parse_iso8601,
traverse_obj
)
class KikaIE(InfoExtractor):
IE_DESC = 'KiKA.de'
_VALID_URL = r'https?://(?:www\.)?kika\.de/(?:.*)/(?P<id>[a-z-]+-?\d+)'
_GEO_COUNTRIES = ['DE']
_TESTS = [{
'url': 'https://www.kika.de/beutolomaeus-und-der-wahre-weihnachtsmann/videos/eins-der-neue-weihnachtsmann-102',
'md5': '25ceea8790417f3c6dcf1d4342f8a97a',
'info_dict': {
'id': 'eins-der-neue-weihnachtsmann-102',
'ext': 'mp4',
'title': '1. Der neue Weihnachtsmann',
'description': 'md5:61b1e6f32882e8ca2a0ddfd135d03c6b',
'duration': 787,
'timestamp': 1700584500,
'upload_date': '20231121'
}
}, {
'url': 'https://www.kika.de/kaltstart/videos/video92498',
'md5': '710ece827e5055094afeb474beacb7aa',
'info_dict': {
'id': 'video92498',
'ext': 'mp4',
'title': '7. Wo ist Leo?',
'description': 'md5:fb48396a5b75068bcac1df74f1524920',
'duration': 436,
'timestamp': 1702926876,
'upload_date': '20231218'
}
}]
def _real_extract(self, url):
video_id = self._match_id(url)
doc = self._download_json(f'https://www.kika.de/_next-api/proxy/v1/videos/{video_id}', video_id)
video_assets = self._download_json(doc['assets']['url'], video_id)
subtitles = {}
ttml_resource = video_assets.get('videoSubtitle')
if ttml_resource:
subtitles['de'] = [{
'url': ttml_resource,
'ext': 'ttml',
}]
webvtt_resource = video_assets.get('webvttUrl')
if webvtt_resource:
subtitles.setdefault('de', []).append({
'url': webvtt_resource,
'ext': 'vtt'
})
return {
'id': video_id,
'title': doc.get('title'),
'description': doc.get('description'),
'timestamp': parse_iso8601(doc.get('date')),
'duration': parse_duration(doc.get('duration')),
'formats': list(self._extract_formats(video_assets, video_id)),
'subtitles': subtitles
}
def _extract_formats(self, media_info, video_id):
for media in media_info['assets']:
stream_url = media.get('url')
if not stream_url:
continue
ext = determine_ext(stream_url)
if ext == 'm3u8':
yield from self._extract_m3u8_formats(
stream_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
else:
yield {
'url': stream_url,
'format_id': ext,
**traverse_obj(media, {
'width': 'frameWidth',
'height': 'frameHeight',
'filesize': 'fileSize',
'abr': 'bitrateAudio',
'vbr': 'bitrateVideo'
})
}

View File

@ -1,51 +1,35 @@
import random
from .common import InfoExtractor
from ..utils import xpath_text
class MatchTVIE(InfoExtractor):
_VALID_URL = r'https?://matchtv\.ru(?:/on-air|/?#live-player)'
_VALID_URL = [
r'https?://matchtv\.ru/on-air/?(?:$|[?#])',
r'https?://video\.matchtv\.ru/iframe/channel/106/?(?:$|[?#])',
]
_TESTS = [{
'url': 'http://matchtv.ru/#live-player',
'url': 'http://matchtv.ru/on-air/',
'info_dict': {
'id': 'matchtv-live',
'ext': 'flv',
'ext': 'mp4',
'title': r're:^Матч ТВ - Прямой эфир \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
'is_live': True,
'live_status': 'is_live',
},
'params': {
'skip_download': True,
},
}, {
'url': 'http://matchtv.ru/on-air/',
'url': 'https://video.matchtv.ru/iframe/channel/106',
'only_matching': True,
}]
def _real_extract(self, url):
video_id = 'matchtv-live'
video_url = self._download_json(
'http://player.matchtv.ntvplus.tv/player/smil', video_id,
query={
'ts': '',
'quality': 'SD',
'contentId': '561d2c0df7159b37178b4567',
'sign': '',
'includeHighlights': '0',
'userId': '',
'sessionId': random.randint(1, 1000000000),
'contentType': 'channel',
'timeShift': '0',
'platform': 'portal',
},
headers={
'Referer': 'http://player.matchtv.ntvplus.tv/embed-player/NTVEmbedPlayer.swf',
})['data']['videoUrl']
f4m_url = xpath_text(self._download_xml(video_url, video_id), './to')
formats = self._extract_f4m_formats(f4m_url, video_id)
webpage = self._download_webpage('https://video.matchtv.ru/iframe/channel/106', video_id)
video_url = self._html_search_regex(
r'data-config="config=(https?://[^?"]+)[?"]', webpage, 'video URL').replace('/feed/', '/media/') + '.m3u8'
return {
'id': video_id,
'title': 'Матч ТВ - Прямой эфир',
'is_live': True,
'formats': formats,
'formats': self._extract_m3u8_formats(video_url, video_id, 'mp4', live=True),
}

View File

@ -13,8 +13,8 @@
class MDRIE(InfoExtractor):
IE_DESC = 'MDR.DE and KiKA'
_VALID_URL = r'https?://(?:www\.)?(?:mdr|kika)\.de/(?:.*)/[a-z-]+-?(?P<id>\d+)(?:_.+?)?\.html'
IE_DESC = 'MDR.DE'
_VALID_URL = r'https?://(?:www\.)?mdr\.de/(?:.*)/[a-z-]+-?(?P<id>\d+)(?:_.+?)?\.html'
_GEO_COUNTRIES = ['DE']
@ -34,30 +34,6 @@ class MDRIE(InfoExtractor):
'uploader': 'MITTELDEUTSCHER RUNDFUNK',
},
'skip': '404 not found',
}, {
'url': 'http://www.kika.de/baumhaus/videos/video19636.html',
'md5': '4930515e36b06c111213e80d1e4aad0e',
'info_dict': {
'id': '19636',
'ext': 'mp4',
'title': 'Baumhaus vom 30. Oktober 2015',
'duration': 134,
'uploader': 'KIKA',
},
'skip': '404 not found',
}, {
'url': 'http://www.kika.de/sendungen/einzelsendungen/weihnachtsprogramm/videos/video8182.html',
'md5': '5fe9c4dd7d71e3b238f04b8fdd588357',
'info_dict': {
'id': '8182',
'ext': 'mp4',
'title': 'Beutolomäus und der geheime Weihnachtswunsch',
'description': 'md5:b69d32d7b2c55cbe86945ab309d39bbd',
'timestamp': 1482541200,
'upload_date': '20161224',
'duration': 4628,
'uploader': 'KIKA',
},
}, {
# audio with alternative playerURL pattern
'url': 'http://www.mdr.de/kultur/videos-und-audios/audio-radio/operation-mindfuck-robert-wilson100.html',
@ -68,28 +44,7 @@ class MDRIE(InfoExtractor):
'duration': 3239,
'uploader': 'MITTELDEUTSCHER RUNDFUNK',
},
}, {
# empty bitrateVideo and bitrateAudio
'url': 'https://www.kika.de/filme/sendung128372_zc-572e3f45_zs-1d9fb70e.html',
'info_dict': {
'id': '128372',
'ext': 'mp4',
'title': 'Der kleine Wichtel kehrt zurück',
'description': 'md5:f77fafdff90f7aa1e9dca14f662c052a',
'duration': 4876,
'timestamp': 1607823300,
'upload_date': '20201213',
'uploader': 'ZDF',
},
'params': {
'skip_download': True,
},
}, {
'url': 'http://www.kika.de/baumhaus/sendungen/video19636_zc-fea7f8a0_zs-4bf89c60.html',
'only_matching': True,
}, {
'url': 'http://www.kika.de/sendungen/einzelsendungen/weihnachtsprogramm/einzelsendung2534.html',
'only_matching': True,
'skip': '404 not found',
}, {
'url': 'http://www.mdr.de/mediathek/mdr-videos/a/video-1334.html',
'only_matching': True,