news-extractor / annotation-config.yaml
Ümit Gündüz
add dataset and update code documentation
f4f5cfb
"aa":
"css-queries":
"date":
- body div.detay-bg span.tarih
"title":
- body > title
- body div.detay-bg h1
"description":
- body > metadescription
- body div.detay-bg h4
"content":
- body div.detay-icerik div
"aksam":
"css-queries":
"date":
- body > header > time
"title":
- body > title
- body div.container div.content h1.title-1
"description":
- body > metadescription
- body div.container div.content h2.spot
"content":
- body div.container div.content div.newsDetailText article
"cnnturk":
"css-queries":
"date":
- body > header > time
"title":
- body > title
- body > header > h1.detail-content-title
"description":
- body > metadescription
- body > header > h2.detail-header-spot
"content":
- body > article
"cumhuriyet":
"css-queries":
"date":
- body > metadatepublished
- body > header > time
- body div.content div.yayin-tarihi
"title":
- body > title
- body div.content h1.baslik
"description":
- body > metadescription
- body div.content h2.spot
"content":
- body div.content div.haberMetni
"ensonhaber":
"css-queries":
"date":
- body > header > time
"title":
- body > title
- body > header div.article-title h1
"description":
- body > metadescription
- body > header div.article-title h2
"content":
- body div.column-article article
"haber7":
"css-queries":
"date":
- body > header div.news-info div.date span.date-item.added
- body > header div.news-info div.date span.date-item.updated
"title":
- body > title
- body > header h1
"description":
- body > metadescription
- body > header h2
"content":
- body div.container article
"haberglobal":
"css-queries":
"date":
- body > metadateupdated
- body > metadatecreated
- body > metadatemodified
- body > metadatepublished
- body > metaarticlemodifiedtime
- body > metaarticlepublishedtime
- body > header time
"title":
- body > title
- body > header h1
"description":
- body > metadescription
- body > header div.post-detail-header h2
"content":
- body div.container article
"haberler":
"css-queries":
"date":
- body > metaarticlemodifiedtime
- body > metaarticlepublishedtime
- body > metadateupdated
- body > metadatecreated
- body > metadatemodified
- body > metadatepublished
- body > header time
"title":
- body > title
- body > header h1
"description":
- body > metadescription
- body > header h2.haber_spotu
"content":
- body div.hbContainer article
"haberturk":
"css-queries":
"date":
- body > header time
"title":
- body > title
- body > div.container section.newsArticle figcaption h1.title
"description":
- body > metadescription
- body > div.container section.newsArticle figcaption h1.spot-title
"content":
- body > div.container article.content
"hurriyet":
"css-queries":
"date":
- body > header time
- body > div.news-detail-page header time
- body > div.news-detail-page section.news-detail-content div.container div.news-inf span.news-date
"title":
- body > title
- body > div.news-detail-page section.news-detail-content div.container h1.news-detail-title
"description":
- body > metadescription
- body > div.news-detail-page section.news-detail-content div.container div.news-content__inf h2
"content":
- body > div.news-detail-page section.news-detail-content div.container div.news-content
"milliyet":
"css-queries":
"date":
- body > div.news-detail-page header time
"title":
- body > title
- body > div.news-detail-page section.news-detail-content div.container h1.news-detail-title
"description":
- body > metadescription
- body > div.news-detail-page section.news-detail-content div.container div.news-content__inf h2
"content":
- body > div.news-detail-page section.news-detail-content div.container div.news-content
"ntv":
"css-queries":
"date":
- body > metadatemodified
- body > metadatepublished
"title":
- body > title
- body > div.common-container section.news-detail h1.category-detail-title
"description":
- body > metadescription
- body > div.common-container section.news-detail h2.category-detail-sub-title
"content":
- body > div.common-container section.news-detail div.category-detail-content div.content-news-tag-selector
"trthaber":
"css-queries":
"date":
- body > header time
"title":
- body > title
- body > div.container div.news-detail-container h1.news-title
"description":
- body > metadescription
- body > div.container div.news-detail-container h1.news-spot
"content":
- body > div.container div.news-detail-container div.news-content