Page MenuHomeDevCentral

No OneTemporary

diff --git a/page.php b/page.php
index 4011979..fa5cf77 100644
--- a/page.php
+++ b/page.php
@@ -1,395 +1,400 @@
<?php
define('LONG_DATE_FORMAT', '%e %B %Y');
define('USER_AGENT', 'WikimediaTools/SourceTemplatesGenerator/0.1');
define('USER_AGENT_FALLBACK', 'Mozilla/5.0');
define('USER_AGENT_FALLBACK_FULL', 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.13) Gecko/20080311 Firefox/2.0.0.13');
require_once('helpers/Encoding.php');
class Page {
/*
* @var string The page URL
*/
public $url;
/**
* @var array Meta tags
*/
public $meta_tags;
/**
* @var string The page content
*/
public $data;
/**
* @var string The page title
*/
public $title;
/**
* @var string The page author
*/
public $author;
/**
* @var Array The page coauthors
*/
public $coauthors = [];
/**
* @var string The site ISSN
*/
public $issn;
//If we use the parameters yyyy mm dd, we describe CONTENT date:
/**
* @var int The page content's year
*/
public $yyyy;
/**
* @var int The page content's month
*/
public $mm;
/**
* @var int The page content's day
*/
public $dd;
//If not, we describe ONLINE RESOURCE PUBLISH date:
/**
* @var string The page publication date in relevant locale
*/
public $date;
/**
* @var int The page publication unixtime
*/
public $unixtime;
/**
* @var bool Indicates if we have to skip year/month/date template parameters
*/
public $skipYMD = false;
/**
* @var bool Indicates if we have to skip month/date (but maybe keep year) template parameters
*/
public $skipMD = false;
/**
* @var bool Indicates if we have to skip author template parameter
*/
public $skipAuthor;
+ /**
+ * @var bool If true, this site uses an anti ad blocker technology to force users to show ads
+ */
+ public $antiAdBlocker = true;
+
/**
* @var mixed If not null, contains an array for another service to use
*/
public $switchTo = null;
/**
* @var string The last error occured while opening and parsing the page
*/
public $error;
/**
* Initializes a new Page instance. If an error occured, you can read it in $this->error.
*
* @param string $url the page URL
*/
function __construct ($url) {
$this->url = $url;
$this->get_data();
if ($this->data) {
$this->analyse();
}
}
function get_data () {
ini_set('user_agent', USER_AGENT);
$data = file_get_contents($this->url);
if (!$data) {
ini_set('user_agent', USER_AGENT_FALLBACK);
if (!$data = @file_get_contents($this->url)) {
$this->error = "Can't read URL";
return;
}
}
$this->data = $data;
$this->encodeData();
}
function encodeData () {
$encoding = mb_detect_encoding($this->data, "ISO-8859-15, ISO-8859-1, UTF-8, ASCII, auto");
if ($encoding && $encoding != 'UTF-8') {
$this->data = Encoding::toUTF8($this->data);
}
}
/**
* Return a new Page instance, or if such class exists, an instance class specialized for your site.
*
* @param $url the page URL
*/
static function load ($url) {
//Classes list are stored in pages/index.dat file
//Each line contains the URL beginning, a tabulation, and the page analyser name
// * class is this name, appended by 'Page'
// * source file is the lowercase version of this name, appended by '.php'
$pages = file('pages/index.dat', true);
foreach ($pages as $line) {
$page = explode("\t", $line);
if (substr($url, 0, strlen($page[0])) == $page[0]) {
$file = strtolower(trim($page[1])) . '.php';
$class = trim($page[1]) . 'Page';
return new $class($url);
}
}
return new Page($url);
}
/**
* Analyses metatags to process content
*/
function analyse () {
//Meta tags (including <meta property="" value=""> and <meta itemprop="" value="" syntax)
$this->meta_tags = $this->get_meta_tags();
$t = $this->meta_tags;
//Title
$this->title = $this->get_title();
//Date
if ($date = $this->getMetaTag($t, 'date', 'pubdate', 'content_create_date')) {
$date = date_parse($date);
$this->yyyy = $date['year'];
$this->mm = $date['month'];
$this->dd = $date['day'];
}
//Site name
$this->site = $this->getMetaTag($t, 'og:site_name');
//Author
$this->author = $this->getMetaTag($t, 'author');
}
/**
* Gets page metatags
*
* @return array an array where the keys are the metatags' names and the values the metatags' values
*/
function get_meta_tags () {
return $this::get_all_meta_tags($this->url);
}
/**
* Gets all metatags, including those using meta property= and meta itemprop= syntax
*
* @return array an array where the keys are the metatags' names and the values the metatags' values
*/
function get_all_meta_tags () {
// Thank you to Bobble Bubble
// See http://php.net/manual/en/function.get-meta-tags.php comments
$pattern = '
~<\s*meta\s
# Lookahead to capture type to $1
(?=[^>]*?
\b(?:name|property|itemprop|http-equiv)\s*=\s*
(?|"\s*([^"]*?)\s*"|\'\s*([^\']*?)\s*\'|
([^"\'>]*?)(?=\s*/?\s*>|\s\w+\s*=))
)
# Capture content to $2
[^>]*?\bcontent\s*=\s*
(?|"\s*([^"]*?)\s*"|\'\s*([^\']*?)\s*\'|
([^"\'>]*?)(?=\s*/?\s*>|\s\w+\s*=))
[^>]*>
~ix';
if (!preg_match_all($pattern, $this->data, $match)) {
return [];
}
$metaTags = array_combine($match[1], $match[2]);
array_walk($metaTags, 'self::clean_tag');
return $metaTags;
}
/**
* Cleans a tag value (callback for array_walk)
*
* @param mixed &$value array item's value
* @param string $key array item's key
*/
static function clean_tag (&$item, $key) {
if (is_array($item)) {
$item = join("; ", $item);
}
return trim($item);
}
/**
* Gets title
*
* @return string The page title
*/
function get_title () {
$title = $this->getMetaTag($this->meta_tags, 'title', 'og:title', 'DC.title', 'Title');
return $title ?: ((preg_match("#<title>(.+)<\/title>#iU", $this->data, $title)) ? trim($title[1]) : '');
}
/**
* Determines if the current page is an article published in a journal.
*
* @return bool true if the current page is an article ; otherwise, false
*/
function is_article () {
return
(array_key_exists('dc_type', $this->meta_tags) && $this->meta_tags['dc_type'] == 'journalArticle')
||
(array_key_exists('dcsext_pn-cat', $this->meta_tags) && $this->meta_tags['dcsext_pn-cat'] == 'Article')
||
array_key_exists('citation_journal_title', $this->meta_tags)
||
array_key_exists('prism_publicationname', $this->meta_tags);
}
/**
* Gets relevant metatag
*
* @param array the metatags
* @param string... the list of acceptable metatags
*
* @return string the first metatag value found
*/
static function getMetaTag () {
$tags = func_get_args();
$metatags = array_shift($tags);
foreach ($tags as $tag) {
$tag_lowercase = strtolower($tag);
foreach ($metatags as $key => $value) {
if ($tag_lowercase == strtolower($key)) return $value;
}
}
return '';
}
/**
* Finds a portion of text included between $before and $after strings on the current page
*
* @param string $before The string at the left of the text to be grabbed
* @param string $after The string at the right of the text to be grabbed
*
* @return string The text found between $before and $after
*/
function between ($before, $after) {
return self::grab($this->data, $before, $after);
}
/**
* Finds a portion of text included between $before and $after strings
*
* @param string $text The text where to find the substring
* @param string $before The string at the left of the text to be grabbed
* @param string $after The string at the right of the text to be grabbed [facultative]
*
* @return string The text found between $before and $after
*/
static function grab ($text, $before, $after = null) {
$pos1 = strpos($text, $before);
if ($pos1 === false) {
return false;
} else {
$pos1 += strlen($before);
}
if ($after === null) {
return substr($text, $pos1);
}
$pos2 = strpos($text, $after, $pos1 + 1);
if ($pos2 === false) {
return false;
}
return substr($text, $pos1, $pos2 - $pos1);
}
/**
* Downloads, through CURL library, accepting cookies.
*
* @param $url The URL to fetch
*/
static function curl_download ($url, $agent = '') {
$ch = curl_init();
$timeout = 5;
$cookie_file = tmpfile();
$cookie_file = tempnam(sys_get_temp_dir(), "cookie-sourcesgen-");
curl_setopt($ch, CURLOPT_COOKIESESSION, true);
curl_setopt($ch, CURLOPT_COOKIEFILE, $cookie_file);
curl_setopt($ch, CURLOPT_COOKIEJAR, $cookie_file);
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
if ($agent != '') {
curl_setopt($ch, CURLOPT_USERAGENT, $agent);
}
$data = curl_exec($ch);
curl_close($ch);
unlink($cookie_file);
return $data;
}
///
/// DATES
///
/**
* @param string $toParse The date to parse
* @param string $tz The timezone to use [optional]
*/
function dateFromDateParse ($toParse, $tz = 'Europe/Paris') {
$old_tz = date_default_timezone_get();
date_default_timezone_set($tz);
$date = date_parse($toParse);
$this->yyyy = $date['year'];
$this->mm = $date['month'];
$this->dd = $date['day'];
$this->unixtime = mktime($date['hour'], $date['minute'], $date['second'], $date['month'], $date['day'], $date['year']);
date_default_timezone_set($old_tz);
}
function extractYYYYMMDDDateFromURL() {
$pattern = "@/([12][0-9]{3})\-([0-9]{2})\-([0-9]{2})/@";
if (preg_match($pattern, $this->url, $matches)) {
$this->yyyy = $matches[1];
$this->mm = $matches[2];
$this->dd = $matches[3];
}
}
function extractYYYYDDMMateFromURL() {
$pattern = "@/([12][0-9]{3})\-([0-9]{2})\-([0-9]{2})/@";
if (preg_match($pattern, $this->url, $matches)) {
$this->yyyy = $matches[1];
$this->mm = $matches[3];
$this->dd = $matches[2];
}
}
}
diff --git a/templates/wikipedia-fr/Lien_web.php b/templates/wikipedia-fr/Lien_web.php
index 39a0bf6..fb2281b 100644
--- a/templates/wikipedia-fr/Lien_web.php
+++ b/templates/wikipedia-fr/Lien_web.php
@@ -1,104 +1,111 @@
<?php
setlocale(LC_TIME, 'fr_FR.UTF-8');
class LienWebTemplate extends Template {
public $author;
public $coauthors;
public $url;
public $title;
public $dd;
public $mm;
public $yyyy;
public $site;
public $pageDate = null;
public $accessdate;
/**
* @var bool Indicates if we've to remove jour/mois/année parameters
*/
public $skipYMD = false;
/**
* @var bool Indicates if we've to remove jour/mois parameters but maybe keep année
*/
public $skipMD = false;
/**
* @var bool Indicates if we've to remove auteur and coauteurs parameters
*/
public $skipAuthor = false;
function __construct () {
$this->name = "Lien web";
$this->accessdate = trim(strftime(LONG_DATE_FORMAT));
}
static function loadFromPage ($page) {
$template = new LienWebTemplate();
$template->author = $page->author;
$template->skipAuthor = $page->skipAuthor;
$template->coauthors = $page->coauthors;
$template->url = $page->url;
$template->title = $page->title;
$template->dd = $page->dd;
$template->mm = $page->mm;
$template->yyyy = $page->yyyy;
$template->site = $page->site;
$template->pageDate = $page->date;
$template->skipYMD = $page->skipYMD;
$template->skipMD = $page->skipMD;
+ $template->antiAdBlocker = $page->antiAdBlocker;
return $template;
}
function computeDate () {
//Legacy code issue
if ($this->pageDate !== "" && $this->pageDate !== null) {
echo '<div data-alert class="alert-box info radius">';
echo "<p>The Page metadata contains the following date information:<br />$this->pageDate</p><p>{{Lien web}} should now use jour, mois, année instead of a date parameter to provide richer machine data.</p>";
echo ' <a href="#" class="close">&times;</a></div>';
}
}
/**
* Gets the month, as a string in current locale
*
* @return string the month name
*/
function getMonth () {
if (!$this->mm) {
return "";
}
if (is_numeric($this->mm)) {
return strftime('%B', mktime(0, 0, 0, (int)$this->mm));
}
return $this->mm;
}
function __toString () {
if (!$this->skipAuthor) {
$this->params['auteur'] = $this->author;
if ($this->coauthors) {
$this->params['coauteurs'] = implode(', ', $this->coauthors);
}
}
$this->params['titre'] = $this->title;
$this->computeDate();
if (!$this->skipYMD && !$this->skipMD) {
$this->params['jour'] = $this->dd;
$this->params['mois'] = $this->getMonth();
}
if (!$this->skipYMD) {
$this->params['année'] = $this->yyyy;
}
$this->params['url'] = $this->url;
$this->params['site'] = $this->site;
$this->params['consulté le'] = $this->accessdate;
- return parent::__toString();
+ $template = parent::__toString();
+
+ if ($this->antiAdBlocker) {
+ $template .= " {{Publicité forcée}}";
+ }
+
+ return $template;
}
}

File Metadata

Mime Type
text/x-diff
Expires
Wed, Oct 22, 13:22 (1 d, 20 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3092238
Default Alt Text
(15 KB)

Event Timeline