No OneTemporary
Actions

Size

19 KB

Referenced Files

None

Subscribers

None

View Options

	diff --git a/page.php b/page.php
	index 4f6c72d..d8b1d9a 100644
	--- a/page.php
	+++ b/page.php
	@@ -1,364 +1,364 @@
	<?php

	define('LONG_DATE_FORMAT', '%e %B %Y');
	define('USER_AGENT', 'WikimediaTools/SourceTemplatesGenerator/0.1');
	define('USER_AGENT_FALLBACK', 'Mozilla/5.0');
	define('USER_AGENT_FALLBACK_FULL', 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.13) Gecko/20080311 Firefox/2.0.0.13');

	require_once('helpers/Encoding.php');

	class Page {
	/*
	* @var string The page URL
	*/
	public $url;

	/**
	* @var array Meta tags
	*/
	public $meta_tags;

	/**
	* @var string The page content
	*/
	public $data;

	/**
	* @var string The page title
	*/
	public $title;

	/**
	* @var string The page author
	*/
	public $author;

	/**
	* @var Array The page coauthors
	*/
	- public $coauthors;
	+ public $coauthors = [];

	/**
	* @var string The site ISSN
	*/
	public $issn;

	//If we use the parameters yyyy mm dd, we describe CONTENT date:

	/**
	* @var int The page content's year
	*/

	public $yyyy;
	/**
	* @var int The page content's month
	*/
	public $mm;

	/**
	* @var int The page content's day
	*/
	public $dd;

	//If not, we describe ONLINE RESOURCE PUBLISH date:

	/**
	* @var string The page publication date in relevant locale
	*/
	public $date;

	/**
	* @var int The page publication unixtime
	*/
	public $unixtime;

	/**
	* @var bool Indicates if we have to skip year/month/date template parameters
	*/
	public $skipYMD = false;

	/**
	* @var bool Indicates if we have to skip month/date (but maybe keep year) template parameters
	*/
	public $skipMD = false;


	/**
	* @var bool Indicates if we have to skip author template parameter
	*/
	public $skipAuthor;

	/**
	* @var mixed If not null, contains an array for anotheser service to use
	*/
	public $switchTo = null;

	/**
	* @var string The last error occured while opening and parsing the page
	*/
	public $error;

	/**
	* Initializes a new Page instance. If an error occured, you can read it in $this->error.
	*
	* @param string $url the page URL
	*/
	function __construct ($url) {
	$this->url = $url;
	$this->get_data();
	if ($this->data) {
	$this->analyse();
	}
	}

	function get_data () {
	ini_set('user_agent', USER_AGENT);
	$data = file_get_contents($this->url);
	if (!$data) {
	ini_set('user_agent', USER_AGENT_FALLBACK);
	if (!$data = @file_get_contents($this->url)) {
	$this->error = "Can't read URL";
	return;
	}
	}
	$encoding = mb_detect_encoding($data, "ISO-8859-15, ISO-8859-1, UTF-8, ASCII, auto");
	if ($encoding && $encoding != 'UTF-8') {
	$this->data = Encoding::toUTF8($data);
	} else {
	$this->data = $data;
	}
	}

	/**
	* Return a new Page instance, or if such class exists, an instance class specialized for your site.
	*
	* @param $url the page URL
	*/
	static function load ($url) {
	//Classes list are stored in pages/index.dat file
	//Each line contains the URL beginning, a tabulation, and the page analyser name
	// * class is this name, appended by 'Page'
	// * source file is the lowercase version of this name, appended by '.php'
	$pages = file('pages/index.dat', true);
	foreach ($pages as $line) {
	$page = explode("\t", $line);
	if (substr($url, 0, strlen($page[0])) == $page[0]) {
	$file = strtolower(trim($page[1])) . '.php';
	$class = trim($page[1]) . 'Page';

	require("pages/$file");
	return new $class($url);
	}
	}
	return new Page($url);
	}

	/**
	* Analyses metatags to process content
	*/
	function analyse () {
	//Meta tags (including <meta property="" value=""> and <meta itemprop="" value="" syntax)
	$this->meta_tags = $this->get_meta_tags();
	$t = $this->meta_tags;

	//Title
	$this->title = $this->get_title();

	//Date
	if ($date = $this->getMetaTag($t, 'date', 'pubdate', 'content_create_date')) {
	$date = date_parse($date);
	$this->yyyy = $date['year'];
	$this->mm = $date['month'];
	$this->dd = $date['day'];
	}

	//Site name
	$this->site = $this->getMetaTag($t, 'og:site_name');

	//Author
	$this->author = $this->getMetaTag($t, 'author');
	}

	/**
	* Gets page metatags
	*
	* @return array an array where the keys are the metatags' names and the values the metatags' values
	*/
	function get_meta_tags () {
	return $this::get_all_meta_tags($this->url);
	}

	/**
	* Gets all metatags, including those using meta property= and meta itemprop= syntax
	*
	* @return array an array where the keys are the metatags' names and the values the metatags' values
	*/
	function get_all_meta_tags () {
	//Thank you to Michael Knapp and Mariano
	//See http://php.net/manual/en/function.get-meta-tags.php comments
	preg_match_all('/<[\s]meta[\s]+.?\b(name\|property\|itemprop)\b="?' . '([^>"])"?[\s]' . 'content="?([^>"])"?[\s][\/]?[\s]*>/si', $this->data, $match);
	if (isset($match) && is_array($match) && count($match) == 4) {
	$originals = $match[0];
	$names = $match[2];
	$values = $match[3];

	if (count($originals) == count($names) && count($names) == count($values)) {
	$metaTags = array();

	for ($i = 0, $limiti = count($names) ; $i < $limiti ; $i++) {
	$key = $names[$i];
	$value = $values[$i];

	//Sets an unique scalar value, or if several identical tag names are offered, an array of values.
	//Some publishers offer several times the same tag to list several values (see T241).
	if (array_key_exists($key, $metaTags)) {
	$currentValue = $metaTags[$key];
	if ($currentValue == $value) {
	continue;
	}
	if (is_array($currentValue)) {
	$metaTags[$key][] = $value;
	} else {
	//Scalar -> array
	$metaTags[$key] = [ $currentValue, $value ];
	}
	} else {
	$metaTags[$key] = $value;
	}
	}
	}
	}

	array_walk($metaTags, [ self, clean_tag ]);

	return $metaTags;
	}

	/**
	* Cleans a tag value (callback for array_walk)
	*
	* @param mixed &$value array item's value
	* @param string $key array item's key
	*/
	static function clean_tag (&$item, $key) {
	if (is_array($item)) {
	$item = join("; ", $item);
	}
	return trim($item);
	}

	/**
	* Gets title
	*
	* @return string The page title
	*/
	function get_title () {
	$title = $this->getMetaTag($this->meta_tags, 'title', 'og:title', 'DC.title', 'Title');
	return $title ?: ((preg_match("#<title>(.+)<\/title>#iU", $this->data, $title)) ? trim($title[1]) : '');
	}

	/**
	* Determines if the current page is an article published in a journal.
	*
	* @return bool true if the current page is an article ; otherwise, false
	*/
	function is_article () {
	return
	(array_key_exists('dc_type', $this->meta_tags) && $this->meta_tags['dc_type'] == 'journalArticle')
	\|\|
	(array_key_exists('dcsext_pn-cat', $this->meta_tags) && $this->meta_tags['dcsext_pn-cat'] == 'Article')
	\|\|
	array_key_exists('citation_journal_title', $this->meta_tags)
	\|\|
	array_key_exists('prism_publicationname', $this->meta_tags);
	}

	/**
	* Gets relevant metatag
	*
	* @param array the metatags
	* @param string... the list of acceptable metatags
	*
	* @return string the first metatag value found
	*/
	static function getMetaTag () {
	$tags = func_get_args();
	$metatags = array_shift($tags);

	foreach ($tags as $tag) {
	$tag_lowercase = strtolower($tag);
	foreach ($metatags as $key => $value) {
	if ($tag_lowercase == strtolower($key)) return $value;
	}
	}

	return '';
	}

	/**
	* Finds a portion of text included between $before and $after strings on the current page
	*
	* @param string $before The string at the left of the text to be grabbed
	* @param string $after The string at the right of the text to be grabbed
	*
	* @return string The text found between $before and $after
	*/
	function between ($before, $after) {
	return self::grab($this->data, $before, $after);
	}

	/**
	* Finds a portion of text included between $before and $after strings
	*
	* @param string $text The text where to find the substring
	* @param string $before The string at the left of the text to be grabbed
	* @param string $after The string at the right of the text to be grabbed [facultative]
	*
	* @return string The text found between $before and $after
	*/
	static function grab ($text, $before, $after = null) {
	$pos1 = strpos($text, $before);
	if ($pos1 === false) {
	return false;
	} else {
	$pos1 += strlen($before);
	}

	if ($after === null) {
	return substr($text, $pos1);
	}

	$pos2 = strpos($text, $after, $pos1 + 1);
	if ($pos2 === false) {
	return false;
	}

	return substr($text, $pos1, $pos2 - $pos1);
	}

	/**
	* Downloads, through CURL library, accepting cookies.
	*
	* @param $url The URL to fetch
	*/
	static function curl_download ($url, $agent = '') {
	$ch = curl_init();
	$timeout = 5;
	$cookie_file = tmpfile();
	$cookie_file = tempnam(sys_get_temp_dir(), "cookie-sourcesgen-");
	curl_setopt($ch, CURLOPT_COOKIESESSION, true);
	curl_setopt($ch, CURLOPT_COOKIEFILE, $cookie_file);
	curl_setopt($ch, CURLOPT_COOKIEJAR, $cookie_file);
	curl_setopt($ch, CURLOPT_URL, $url);
	curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
	curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
	curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
	if ($agent != '') {
	curl_setopt($ch, CURLOPT_USERAGENT, $agent);
	}
	$data = curl_exec($ch);
	curl_close($ch);
	unlink($cookie_file);
	return $data;
	}
	}
	diff --git a/templates/wikipedia-fr/Article.php b/templates/wikipedia-fr/Article.php
	index 329c844..68c7816 100644
	--- a/templates/wikipedia-fr/Article.php
	+++ b/templates/wikipedia-fr/Article.php
	@@ -1,204 +1,208 @@
	<?php

	setlocale(LC_TIME, 'fr_FR.UTF-8');

	/**
	* Represents an {{Article}} template on the French Wikipedia
	*/
	class ArticleTemplate extends Template {
	public $lang;
	public $title;
	public $periodique;
	public $year;
	public $accessdate;

	/**
	* Initializes a new instance of the ArticleTemplate class
	*/
	function __construct () {
	$this->name = "Article";
	$this->accessdate = trim(strftime(LONG_DATE_FORMAT));
	}

	/**
	* Creates a new instance of the ArticleTemplate class from a Page object
	*
	* @param Page $page The page to fill the template with data from
	* @return ArticleTemplate
	*/
	static function loadFromPage ($page) {
	$template = new self();
	$t = $page->meta_tags;

	//Language
	$template->lang = page::getMetaTag($t, 'dc_language', 'citation_language', 'dc.Language');

	//Authors
	- if ($author = $page->author ?: page::getMetaTag($t, 'author', 'dc_creator', 'citation_authors', 'dc_contributor', 'citation_author', 'dc.Creator', 'DCSext.author')) {
	+ if (count($page->coauthors)) {
	+ foreach ($page->coauthors as $author) {
	+ $template->authors[] = explode(', ', $author, 2);
	+ }
	+ } elseif ($author = $page->author ?: page::getMetaTag($t, 'author', 'dc_creator', 'citation_authors', 'dc_contributor', 'citation_author', 'dc.Creator', 'DCSext.author')) {
	//TODO: handle Alpha Beta syntax instead Beta, Alpha
	$author = trim($author, self::getTrimCharacterMask());
	$template->authors[] = explode(', ', $author, 2);
	}

	//Title
	if (!$template->title = page::getMetaTag($t, 'dc_title', 'citation_title')) {
	$template->title = $page->title;
	}

	//Journal, publisher
	$template->journal = $page->journal ?: page::getMetaTag($t, 'prism_publicationname', 'citation_journal_title', 'og:site_name');
	$template->journalLink = $t['dc_source'];
	$template->publisher = $page->publisher ?: page::getMetaTag($t, 'dc_publisher', 'citation_publisher');

	//Issue name, number and volume
	$template->issue = $page->issue ?: page::getMetaTag($t, 'prism_number', 'citation_issue');
	$template->volume = $page->volume ?: page::getMetaTag($t, 'citation_volume');
	if (
	(!$template->issueName = $t['prism_issuename'])
	&&
	array_key_exists('dc_relation_ispartof', $t)
	) {
	$template->issueName = $t['dc_relation_ispartof']
	. " <!-- !!! paramètre à nettoyer !!! -->";
	}

	//Date
	if ($page->yyyy && $page->mm && $page->dd) {
	$template->yyyy = $page->yyyy;
	$template->mm = $page->mm;
	$template->dd = $page->dd;
	} elseif ($page->unixtime) {
	$template->yyyy = date('Y', $page->unixtime);
	$template->mm = date('m', $page->unixtime);
	$template->dd = date('j', $page->unixtime);
	} elseif ($date = page::getMetaTag($t, 'prism_publicationdate', 'dc_date', 'citation_date', 'datePublished', 'DC.date.issued')) {
	if ($date[4] == '/' \|\| $date[4] == '-' \|\| $date[4] == '.') {
	$template->yyyy = substr($date, 0, 4);
	$template->mm = substr($date, 5, 2);
	$template->dd = substr($date, 8, 2);
	} else {
	$template->yyyy = substr($date, 6, 4);
	$template->mm = substr($date, 0, 2);
	$template->dd = substr($date, 3, 2);
	}
	} else {
	$template->yyyy = $page->yyyy ?: page::getMetaTag($t, 'citation_year', 'citation_publication_date');
	}

	//Pages
	if ($page->pages) {
	$template->pageStart = $page->pages;
	} else {
	$template->pageStart = page::getMetaTag($t, 'prism_startingpage', 'citation_firstpage', 'citation_first_page');
	$template->pageEnd = page::getMetaTag($t, 'prism_endingpage', 'citation_lastpage', 'citation_last_page');
	}

	//ISBN, ISSN, URLs
	$template->issn = $page->issn ?: page::getMetaTag($t, 'prism_issn', 'citation_issn');
	$template->isbn = page::getMetaTag($t, 'citation_isbn');
	$template->doi = $page->doi ?: page::getMetaTag($t, 'citation_doi');

	$template->summary = page::getMetaTag($t, 'citation_abstract_html_url');
	$template->url = self::getTextURL($page->url, $t);

	return $template;
	}

	function __toString () {
	//Langue
	$this->params['langue'] = $this->lang;

	//Auteur
	if (!count($this->authors)) {
	//Per http://fr.wikipedia.org/w/?&diff=93455862, print
	//one blank set of lines for author when the article
	//metadata doesn't offer author information.
	$this->authors = [['', '']];
	}
	$k = 1;
	foreach ($this->authors as $author) {
	$this->params["prénom$k"] = $author[1];
	$this->params["nom$k"] = $author[0];
	$this->params["lien auteur$k"] = '';
	$k++;
	}

	//Titre, périodique, éditeur, volume, etc.
	$this->params['titre'] = $this->title;
	$this->params['périodique'] = $this->journal;
	//TODO: vérifier si l'aticle existe sur fr.wikip et contient l'infobox Presse ou est rattaché à une catégorie fille de [[Catégorie:Revue scientifique]]
	//$this->params['lien périodique'] = $this->journal;
	$this->params['éditeur'] = $this->publisher;
	if ($this->volume) $this->params['volume'] = $this->volume;
	$this->params['numéro'] = $this->issue;
	if ($this->issueName) $this->params['titre numéro'] = $this->issueName;

	//Date
	if ($this->mm && $this->dd) {
	$date = mktime(12, 0, 0, (int)$this->mm, (int)$this->dd, (int)$this->yyyy);
	$this->params['jour'] = trim(strftime('%e', $date));
	$this->params['mois'] = strftime('%B', $date);
	}
	$this->params['année'] = $this->yyyy;

	//Pages, ISSN, ISBN, DOI, URL, consulté le
	$this->params['pages'] = $this->pageEnd ? ($this->pageStart . '-' . $this->pageEnd) : $this->pageStart;
	if ($this->issn) $this->params['ISSN'] = $this->issn;
	if ($this->isbn) $this->params['ISBN'] = $this->isbn;
	if ($this->doi) $this->params['doi'] = $this->doi;
	$this->params['url texte'] = $this->url;
	if (self::isSummaryPertinent($this->url, $this->summary)) {
	$this->params['résumé'] = $this->summary;
	}
	$this->params['consulté le'] = trim(strftime(LONG_DATE_FORMAT));

	return parent::__toString();
	}

	/**
	* Gets article full text URL
	*
	* @param string $url the article current URL
	*
	* @return string the article fulltext URL
	*/
	static function getTextURL ($url, $metatags) {
	if (strpos($url, '.revues.org/') > 0) {
	//revues.org PDF generation is broken
	return $url;
	}

	if ($text_url = page::getMetaTag($metatags, 'citation_pdf_url', 'citation_fulltext_html_url')) {
	return $text_url;
	}

	return $url;
	}

	/**
	* Determines if a summary is pertinent to include in parameters
	*
	* @param string $url_article Article URL
	* @param string $url_summary Summary URL
	*
	* @return bool true if the summary URL should be included in templat ; otherwise, false
	*/
	static function isSummaryPertinent ($url_article, $url_summary) {
	//Empty summary or identical to URL are rejected
	if ($url_summary == '' \|\| $url_summary == $url_article) return false;

	//This site is indexed through /resume.php but gives /article.php as summary URL in metadata
	if (substr($url_article, 0, 32) == "http://www.cairn.info/resume.php") return false;

	return true;
	}

	/**
	* Gets a trim character mask to clean pure alphanumeric data fields from extraneous ponctuation
	*
	* @return a character mask to use for trim() avoiding whitespaces and some other ponctuation
	*/
	static function getTrimCharacterMask() {
	return "\t\n\r\0\x0B,; ";
	}
	}

File Metadata

Mime Type: text/x-diff
Expires: Wed, Mar 18, 14:27 (7 h, 12 m)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 3538925
Default Alt Text: (19 KB)

No OneTemporaryActions

View Options

File Metadata

Event Timeline

No OneTemporary
Actions