Page MenuHomeDevCentral

No OneTemporary

diff --git a/index.php b/index.php
index b4f1b19..29b3739 100644
--- a/index.php
+++ b/index.php
@@ -1,56 +1,65 @@
<!-- Content -->
<div id="content">
<h1 class="icoTitle"><img src="/_pict/ico/forms.png" alt="Tools - form generation"/>{{Lien web}}</h1>
<form method="post">
<label for="URL">URL: </label><input type="text" size="80" name="URL" id="URL" value="<?= array_key_exists('URL', $_REQUEST) ? $_REQUEST['URL'] : '' ?>" />
<input type="submit" value="OK">
</form>
<?php
if (array_key_exists('URL', $_REQUEST)) {
include('page.php');
+ //Does the specified URL valid and exist?
$url = $_REQUEST['URL'];
+ if (!filter_var($url, FILTER_VALIDATE_URL)) {
+ message_die(GENERAL_ERROR, "$url isn't a valid URL.", 'URL issue');
+ }
+
+ //Get page information
setlocale(LC_TIME, 'fr_FR.UTF-8');
$page = Page::load($url);
+ if ($page->error) {
+ message_die(GENERAL_ERROR, "Can't open $url", 'URL issue');
+ }
if ($page->is_article()) {
echo "<h3>Note</h3><p>Cette URL pointe vers un article de revue, aussi le modèle {{Article}} est indiqué.</p>";
}
//Template
echo " <h3>Template</h3> \n <textarea id=\"template\" rows=20 cols=80>\n";
require('templates/template.php');
if ($page->is_article()) {
require('templates/wikipedia-fr/Article.php');
$template = ArticleTemplate::loadFromPage($page);
} else {
require('templates/wikipedia-fr/Lien_web.php');
$template = LienWebTemplate::loadFromPage($page);
}
echo $template, '</textarea>';
//Meta tags
echo "\n\n <h3>Meta tags</h3>\n <table cellpadding=8>\n <tr><th>Tag</th><th>Value</th></tr>";
foreach ($page->meta_tags as $key => $value) {
echo " <tr><td>$key</td><td>$value</td></tr>";
}
echo "\n </table>";
}
?>
<h3>How to improve this tool?</h3>
<p>A little PHP knowledge will allow you to customize and improve this tool. I will be happy to accept patches in this goal.</p>
<p>If you wish to adapt this tool to be used on another website (a Wikipedia project in another language or outside Wikipedia), please see the template.php file and samples in the templates/ folder.</p>
<p>If you wish to add websites analysis, please add the URL in index.dat, then create a class which extends Page ; see page.php and the pages/ folder.</p>
<p><strong>Source code:</strong> [ <a href="http://hg.dereckson.be/source-templates-generator">git repository</a> | <a href="https://bitbucket.org/dereckson/source-templates-generator/get/master.zip">download current snapshot</a> ]</p>
</div>
<!-- left menu -->
<div id="leftMenu">
<ul class="navMenu">
<li><a href="http://fr.wikipedia.org/wiki/Modèle:Lien web">{{Lien web}}</a></li>
<li><a href="http://fr.wikipedia.org/wiki/Modèle:Article">{{Article}}</a></li>
<li><a href="http://www.prismstandard.org/specifications/">PRISM</a></li>
<li><a href="http://dublincore.org/">Dublin Core</a></li>
<li><a href="http://scholar.google.com/intl/en/scholar/inclusion.html">Google Scholar</a></li>
<li><a href="http://ogp.me/">Open Graph</a></li>
</ul>
</div>
diff --git a/page.php b/page.php
index df31cc6..cc056c7 100644
--- a/page.php
+++ b/page.php
@@ -1,98 +1,157 @@
<?php
define('LONG_DATE_FORMAT', '%e %B %Y');
+define('USER_AGENT', 'WikimediaTools/SourceTemplatesGenerator/0.1');
+define('USER_AGENT_FALLBACK', 'Mozilla/5.0');
class Page {
public $url;
/**
* @var array Meta tags
*/
public $meta_tags;
/**
* @var string The page content
*/
public $data;
public $title;
public $author;
public $yyyy;
public $mm;
public $dd;
public $skipYMD;
public $skipAuthor;
+ public $error;
+
function __construct ($url) {
$this->url = $url;
- $this->data = file_get_contents($url);
+ ini_set('user_agent', USER_AGENT);
+ $this->data = @file_get_contents($url);
+ if (!$this->data) {
+ ini_set('user_agent', USER_AGENT_FALLBACK);
+ if (!$this->data = @file_get_contents($url)) {
+ $this->error = "Can't read URL";
+ return;
+ }
+ }
$this->analyse();
}
static function load ($url) {
$pages = file('pages/index.dat', true);
foreach ($pages as $line) {
$page = explode("\t", $line);
if (substr($url, 0, strlen($page[0])) == $page[0]) {
$file = strtolower(trim($page[1])) . '.php';
$class = trim($page[1]) . 'Page';
require("pages/$file");
return new $class($url);
}
}
return new Page($url);
}
+ /**
+ * Analyses metatags to process content
+ */
function analyse () {
$this->meta_tags = $this->get_meta_tags();
$this->title = $this->get_title();
+
+ if (array_key_exists('date', $this->meta_tags)) {
+ $date = date_parse($this->meta_tags['date']);
+ $this->yyyy = $date['year'];
+ $this->mm = $date['month'];
+ $this->dd = $date['day'];
+ }
}
+ /**
+ * Gets page metatags
+ *
+ * @return array an array where the keys are the metatags' names and the values the metatags' values
+ */
function get_meta_tags () {
return get_meta_tags($this->url);
}
+ /**
+ * Gets all metatags, including those using meta property= and meta itemprop= syntax
+ *
+ * @return array an array where the keys are the metatags' names and the values the metatags' values
+ */
function get_all_meta_tags () {
- //Thank you to Michael Knapp and Mariano
- //See http://php.net/manual/en/function.get-meta-tags.php comments
+ //Thank you to Michael Knapp and Mariano
+ //See http://php.net/manual/en/function.get-meta-tags.php comments
preg_match_all('/<[\s]*meta[\s]*\b(name|property|itemprop)\b="?' . '([^>"]*)"?[\s]*' . 'content="?([^>"]*)"?[\s]*[\/]?[\s]*>/si', $this->data, $match);
- if (isset($match) && is_array($match) && count($match) == 4)
- {
+ if (isset($match) && is_array($match) && count($match) == 4) {
$originals = $match[0];
$names = $match[2];
$values = $match[3];
- if (count($originals) == count($names) && count($names) ==
-count($values))
- {
+ if (count($originals) == count($names) && count($names) == count($values)) {
$metaTags = array();
- for ($i=0, $limiti=count($names); $i < $limiti; $i++)
- {
+ for ($i=0, $limiti = count($names) ; $i < $limiti ; $i++) {
$metaTags[$names[$i]] = $values[$i];
}
}
}
return $metaTags;
}
/**
* Gets title
*
* @return string The page title
*/
function get_title () {
+ if (array_key_exists('title', $this->meta_tags)) return $this->meta_tags['title'];
return (preg_match("#<title>(.+)<\/title>#iU", $this->data, $title)) ? trim($title[1]) : '';
}
+ /**
+ * Determines if the current page is an article published in a journal.
+ *
+ * @return bool true if the current page is an article ; otherwise, false
+ */
function is_article () {
- if (array_key_exists('dc_type', $this->meta_tags) && $this->meta_tags['dc_type'] == 'journalArticle') {
- return true;
- }
- return false;
+ return
+ (array_key_exists('dc_type', $this->meta_tags) && $this->meta_tags['dc_type'] == 'journalArticle')
+ ||
+ (array_key_exists('dcsext_pn-cat', $this->meta_tags) && $this->meta_tags['dcsext_pn-cat'] == 'Article')
+ ||
+ array_key_exists('citation_journal_title', $this->meta_tags)
+ ||
+ array_key_exists('prism_publicationname', $this->meta_tags);
}
+
+ /**
+ * Gets relevant metatag
+ *
+ * @param array the metatags
+ * @param string... the list of acceptable metatags
+ *
+ * @return string the first metatag value found
+ */
+ static function getMetaTag () {
+ $tags = func_get_args();
+ $metatags = array_shift($tags);
+
+ foreach ($tags as $tag) {
+ if (array_key_exists($tag, $metatags)) {
+ return $metatags[$tag];
+ }
+ }
+
+ return '';
+ }
}
diff --git a/templates/wikipedia-fr/Article.php b/templates/wikipedia-fr/Article.php
index 15ce559..8fdf198 100644
--- a/templates/wikipedia-fr/Article.php
+++ b/templates/wikipedia-fr/Article.php
@@ -1,131 +1,160 @@
<?php
setlocale(LC_TIME, 'fr_FR.UTF-8');
class ArticleTemplate extends Template {
public $lang;
public $title;
public $periodique;
public $year;
public $accessdate;
function __construct () {
$this->name = "Article";
$this->accessdate = trim(strftime(LONG_DATE_FORMAT));
}
static function loadFromPage ($page) {
$template = new self();
$t = $page->meta_tags;
//Language
- $template->lang = self::getMetaTag($t, 'dc_language', 'citation_language');
+ $template->lang = page::getMetaTag($t, 'dc_language', 'citation_language');
//Authors
- if ($author = self::getMetaTag($t, 'author', 'dc_creator', 'citation_authors')) {
+ if ($author = page::getMetaTag($t, 'author', 'dc_creator', 'citation_authors', 'dc_contributor', 'citation_author')) {
+ //TODO: handle Alpha Beta syntax instead Beta, Alpha
$template->authors[] = explode(', ', $author, 2);
}
//Title
- if (!$template->title = self::getMetaTag($t, 'dc_title', 'citation_title')) {
+ if (!$template->title = page::getMetaTag($t, 'dc_title', 'citation_title')) {
$template->title = $page->title;
}
//Journal, publisher
- $template->journal = self::getMetaTag($t, 'prism_publicationname', 'citation_journal_title');
+ $template->journal = page::getMetaTag($t, 'prism_publicationname', 'citation_journal_title');
$template->journalLink = $t['dc_source'];
- $template->publisher = self::getMetaTag($t, 'dc_publisher', 'citation_publisher');
+ $template->publisher = page::getMetaTag($t, 'dc_publisher', 'citation_publisher');
- //Issue name and number
- $template->issue = self::getMetaTag($t, 'prism_number', 'citation_issue');
+ //Issue name, number and volume
+ $template->issue = page::getMetaTag($t, 'prism_number', 'citation_issue');
+ $template->volume = page::getMetaTag($t, 'citation_volume');
if (
(!$template->issueName = $t['prism_issuename'])
&&
array_key_exists('dc_relation_ispartof', $t)
) {
$template->issueName = $t['dc_relation_ispartof']
. " <!-- !!! paramètre à nettoyer !!! -->";
}
//Date
- $date = self::getMetaTag($t, 'prism_publicationdate', 'dc_date', 'citation_date');
- $template->yyyy = substr($date, 0, 4);
- $template->mm = substr($date, 5, 2);
- $template->dd = substr($date, 8, 2);
+ if ($date = page::getMetaTag($t, 'prism_publicationdate', 'dc_date', 'citation_date')) {
+ $template->yyyy = substr($date, 0, 4);
+ $template->mm = substr($date, 5, 2);
+ $template->dd = substr($date, 8, 2);
+ } else {
+ $template->yyyy = page::getMetaTag($t, 'citation_year');
+ }
//Pages
- $template->pageStart = self::getMetaTag($t, 'prism_startingpage' , 'citation_firstpage');
- $template->pageEnd = self::getMetaTag($t, 'prism_endingpage', 'citation_lastpage');
+ $template->pageStart = page::getMetaTag($t, 'prism_startingpage', 'citation_firstpage');
+ $template->pageEnd = page::getMetaTag($t, 'prism_endingpage', 'citation_lastpage');
+
+ //ISBN, ISSN, URLs
+ $template->issn = page::getMetaTag($t, 'prism_issn', 'citation_issn');
+ $template->isbn = page::getMetaTag($t, 'citation_isbn');
+ $template->doi = page::getMetaTag($t, 'citation_doi');
- //ISBN, ISSN, URL
- $template->issn = self::getMetaTag($t, 'prism_issn', 'citation_issn');
- $template->isbn = self::getMetaTag($t, 'citation_isbn');
- $template->summary = self::getMetaTag($t, 'citation_abstract_html_url');
- $template->url = $page->url;
+ $template->summary = page::getMetaTag($t, 'citation_abstract_html_url');
+ $template->url = self::getTextURL($page->url, $t);
return $template;
}
function __toString () {
//Langue
$this->params['langue'] = $this->lang;
//Authors
- $k = 1;
- foreach ($this->authors as $author) {
- $this->params["prénom$k"] = $author[1];
- $this->params["nom$k"] = $author[0];
- $this->params["lien auteur$k"] = '';
- $k++;
+ if (count($this->authors)) {
+ $k = 1;
+ foreach ($this->authors as $author) {
+ $this->params["prénom$k"] = $author[1];
+ $this->params["nom$k"] = $author[0];
+ $this->params["lien auteur$k"] = '';
+ $k++;
+ }
}
//Titre, périodique, éditeur, volume, etc.
$this->params['titre'] = $this->title;
$this->params['périodique'] = $this->journal;
//TODO: vérifier si l'aticle existe sur fr.wikip et contient l'infobox Presse ou est rattaché à une catégorie fille de [[Catégorie:Revue scientifique]]
$this->params['lien périodique'] = $this->journal;
$this->params['éditeur'] = $this->publisher;
+ if ($this->volume) $this->params['volume'] = $this->volume;
$this->params['numéro'] = $this->issue;
- $this->params['titre numéro'] = $this->issueName;
+ if ($this->issueName) $this->params['titre numéro'] = $this->issueName;
//Date
- $date = mktime(12, 0, 0, $this->mm, $this->dd, $this->yyyy);
- $this->params['jour'] = trim(strftime('%e', $date));
- $this->params['mois'] = strftime('%B', $date);
+ if ($this->mm && $this->dd) {
+ $date = mktime(12, 0, 0, $this->mm, $this->dd, $this->yyyy);
+ $this->params['jour'] = trim(strftime('%e', $date));
+ $this->params['mois'] = strftime('%B', $date);
+ }
$this->params['année'] = $this->yyyy;
- //Pages, ISSN, ISBN, URL, consulté le
+ //Pages, ISSN, ISBN, DOI, URL, consulté le
$this->params['pages'] = $this->pageEnd ? ($this->pageStart . '-' . $this->pageEnd) : $this->pageStart;
- $this->params['ISSN'] = $this->issn;
- $this->params['ISBN'] = $this->isbn;
+ if ($this->issn) $this->params['ISSN'] = $this->issn;
+ if ($this->isbn) $this->params['ISBN'] = $this->isbn;
+ if ($this->doi) $this->params['doi'] = $this->doi;
$this->params['url texte'] = $this->url;
- if ($this->summary != '' && $this->summary != $this->url) {
+ if (self::isSummaryPertinent($this->url, $this->summary)) {
$this->params['résumé'] = $this->summary;
}
$this->params['consulté le'] = trim(strftime(LONG_DATE_FORMAT));
return parent::__toString();
}
/**
- * Gets relevant metatag
+ * Gets article full text URL
+ *
+ * @param string $url the article current URL
*
- * @param array the metatags
- * @param string... the list of acceptable metatags
- *
- * @return string the first metatag value found
+ * @return string the article fulltext URL
*/
- static function getMetaTag () {
- $tags = func_get_args();
- $metatags = array_shift($tags);
-
- foreach ($tags as $tag) {
- if (array_key_exists($tag, $metatags)) {
- return $metatags[$tag];
- }
+ static function getTextURL ($url, $metatags) {
+ if (strpos($url, '.revues.org/') > 0) {
+ //revues.org PDF generation is broken
+ return $url;
}
- return '';
+ if ($text_url = page::getMetaTag($metatags, 'citation_pdf_url', 'citation_fulltext_html_url')) {
+ return $text_url;
+ }
+
+ return $url;
}
+ /**
+ * Determines if a summary is pertinent to include in parameters
+ *
+ * @param string $url_article Article URL
+ * @param string $url_summary Summary URL
+ *
+ * @return bool true if the summary URL should be included in templat ; otherwise, false
+ */
+ static function isSummaryPertinent ($url_article, $url_summary) {
+ //Empty summary or identical to URL are rejected
+ if ($url_summary == '' || $url_summary == $url_article) return false;
+
+ //This site is indexed through /resume.php but gives /article.php as summary URL in metadata
+ if (substr($url_article, 0, 32) == "http://www.cairn.info/resume.php") return false;
+
+ return true;
+ }
}
?>
diff --git a/templates/wikipedia-fr/Lien_web.php b/templates/wikipedia-fr/Lien_web.php
index a2be729..c785563 100644
--- a/templates/wikipedia-fr/Lien_web.php
+++ b/templates/wikipedia-fr/Lien_web.php
@@ -1,60 +1,60 @@
<?php
setlocale(LC_TIME, 'fr_FR.UTF-8');
class LienWebTemplate extends Template {
public $author;
public $url;
public $title;
public $dd;
public $mm;
public $yyyy;
public $site;
public $publishdate;
public $accessdate;
/**
* @var bool Indicates if we've to remove jour/mois/année parameters
*/
public $skipYMD = false;
function __construct () {
$this->name = "Lien web";
$this->accessdate = trim(strftime(LONG_DATE_FORMAT));
}
static function loadFromPage ($page) {
$template = new LienWebTemplate();
$template->author = $page->author;
$template->skipAuthor = $page->skipAuthor;
$template->url = $page->url;
$template->title = $page->title;
- $template->dd = $page->yyyy;
- $template->mm = $page->yyyy;
+ $template->dd = $page->dd;
+ $template->mm = $page->mm;
$template->yyyy = $page->yyyy;
$template->site = $page->site;
$template->publishdate = $page->date;
$template->skipYMD = $page->skipYMD;
return $template;
}
function __toString () {
if (!$this->skipAuthor) {
$this->params['auteur'] = $this->author;
}
$this->params['titre'] = $this->title;
if (!$this->skipYMD) {
$this->params['jour'] = $this->mm;
$this->params['mois'] = $this->dd;
$this->params['année'] = $this->yyyy;
}
$this->params['url'] = $this->url;
$this->params['site'] = $this->site;
$this->params['en ligne le'] = $this->publishdate;
$this->params['consulté le'] = $this->accessdate;
return parent::__toString();
}
}
?>

File Metadata

Mime Type
text/x-diff
Expires
Thu, Sep 18, 01:15 (1 d, 2 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2989689
Default Alt Text
(17 KB)

Event Timeline