author: //p[contains(@class, "byline")]/a[contains(@class, "author")] title: //section[contains(concat(' ',normalize-space(@class),' '),' lede ')]//h1 date: //span[contains(@class, "publish-date")]/time[@pubdate]/@datetime body: //div[contains(@class, 'm-article__entry-section')] # for vergecasts, e.g. http://www.theverge.com/2013/8/22/4648566/the-vergecast-090-august-22th-2013-video body: //article body: //div[contains(concat(' ',normalize-space(@class),' '),' l-col__main ')] prune: no #tidy: no strip: //h3 strip: //article/header strip: //*[@id='sticky-menu'] strip: //aside strip: //nav strip: //img[contains(@class, 'vox-lazy-load')] # deal with bad parsing strip: //div[contains(@class, 'story-image')]//div[contains(., 'function(')] strip: //div[contains(@class, 'm-linkset')] strip: //div[contains(@class, 'm-entry__sidebar')] strip: //ul[contains(@class, 'm-article__sources')] strip: //div[contains(@class, 'chorus-emc__content')] strip_id_or_class: gallery strip_id_or_class: article-meta strip_id_or_class: story-navigation strip_id_or_class: slegend strip_id_or_class: related-product-meta strip_id_or_class: comments strip_id_or_class: ui-jump-list strip_id_or_class: pullquote strip_id_or_class: m-ad strip_id_or_class: social-sharing strip_id_or_class: m-video-entry__excerpt strip_id_or_class: hidden strip_id_or_class: m-article__follow-bar strip_id_or_class: m-article__share-buttons strip_id_or_class: l-col__sidebar strip_id_or_class: c-river strip_id_or_class: chorus-ad-placement strip_id_or_class: c-related-list replace_string(