Page MenuHomeDevCentral

No OneTemporary

diff --git a/autoload.php b/autoload.php
new file mode 100644
index 0000000..bb6f052
--- /dev/null
+++ b/autoload.php
@@ -0,0 +1,18 @@
+<?php
+
+/**
+ * Autoload function to register into the __autoload stack
+ */
+function sourcetemplatesgenerator_autoloader ($class) {
+ switch ($class) {
+ case 'Book': require('book.php'); return;
+ case 'Page': require('page.php'); return;
+
+ case 'Template': require('templates/template.php'); return;
+ case 'ArticleTemplate': require('templates/wikipedia-fr/Article.php'); return;
+ case 'LienWebTemplate': require('templates/wikipedia-fr/Lien_web.php'); return;
+ case 'OuvrageTemplate': require('templates/wikipedia-fr/Ouvrage.php'); return;
+ }
+}
+
+spl_autoload_register('sourcetemplatesgenerator_autoloader');
diff --git a/book.php b/book.php
new file mode 100644
index 0000000..bb2a4c8
--- /dev/null
+++ b/book.php
@@ -0,0 +1,59 @@
+<?php
+
+class Book {
+ /**
+ * OCLC number
+ */
+ public $OCLC;
+
+ public function queryWorldCatFromOCLC () {
+ $oclc = (string)(int)$this->OCLC;
+
+ $url = 'http://www.worldcat.org/oclc/' . $oclc;
+ $rdf = new EasyRdf_Graph($url . '.rdf');
+ $rdf->load();
+ $resources = $rdf->resources();
+ $book = $resources[$url];
+
+ //Core info
+ $this->Title = (string)$book->getLiteral('schema:name');
+ $this->Publisher = (string)$book->get('schema:publisher')->get('schema:name');
+
+ //Publishing date
+ $date = (string)$book->getLiteral('schema:datePublished');
+ if (strlen($date) == 4) {
+ $this->YYYY = $date;
+ } else {
+ echo '<div class="alert-box">Publishing date: ', $date, " / check the template, the code doesn't know how to parse this format and only made a guess. ",
+ '<a href="" class="close">&times;</a></div>';
+ $date = date_parse($date);
+ $this->YYYY = $date['year'];
+ $this->MM = $date['month'];
+ $this->DD = $date['day'];
+ }
+
+ //Authors
+ $this->Authors = [];
+ //TODO: look type mapping
+ $contributors = $book->allResources('schema:contributor');
+ foreach ($contributors as $contributor) {
+ $this->Authors[] = [
+ (string)$contributor->get('schema:givenName'),
+ (string)$contributor->get('schema:familyName')
+ ];
+ }
+
+ //Kludge for library:placeOfPublication
+ //We have generally two links, one for the city, one for the country.
+ //Only the city has a schema:name, the country is only a reference.
+ $rdf_content = file_get_contents($url . '.rdf');
+ if (preg_match_all('@<library:placeOfPublication rdf:resource="(.*)"/>@', $rdf_content, $matches)) {
+ foreach ($matches[1] as $place) {
+ if ($cityCandidate = (string)$resources[$place]->get('schema:name')) {
+ $this->Place = $cityCandidate;
+ break;
+ }
+ }
+ }
+ }
+}
diff --git a/composer.json b/composer.json
index 9c9ac80..e56c463 100644
--- a/composer.json
+++ b/composer.json
@@ -1,8 +1,9 @@
{
"name": "dereckson/source-templates-generator",
"description": "Generates sources and references templates for Wikipedia",
"type": "package",
"require": {
- "php": ">=5.4"
+ "php": ">=5.4",
+ "easyrdf/easyrdf": "*"
}
}
diff --git a/index.php b/index.php
index 617e30d..098fc61 100644
--- a/index.php
+++ b/index.php
@@ -1,127 +1,142 @@
<?php
require 'vendor/autoload.php';
+require 'autoload.php';
//Get default form settings
$format = 0;
if (array_key_exists('format', $_REQUEST)) {
$format = $_REQUEST['format'];
setcookie('format', $_REQUEST['format'], time() + 2592000);
} elseif (array_key_exists('format', $_COOKIE)) {
$format = $_COOKIE['format'];
}
?>
<!-- Content -->
<h2>Get source template for this URL</h2>
<form method="post" class="custom">
<label for="URL">URL: </label>
<div class="row collapse">
<div class="ten mobile-three columns">
<input type="text" name="URL" id="URL" value="<?= array_key_exists('URL', $_REQUEST) ? $_REQUEST['URL'] : '' ?>" />
</div>
<div class="two mobile-one columns">
<input type="submit" class="button expand postfix" value="Generate template" />
</div>
</div>
<div class="row collapse">
<div class="six columns">
<label>Prints the template:</label>
<label for="format_multiline"><input type="radio" name="format" id="format_multiline" value="0" <?= $format ? '' : 'checked ' ?>/> in multi-lines mode</label>
<label for="format_oneline_spaced"><input type="radio" name="format" id="format_oneline_spaced" value="1" <?= ($format == 1) ? 'checked ' : '' ?>/> in one line (with spaces)</label>
<label for="format_oneline_nospace"><input type="radio" name="format" id="format_oneline_nospace" value="2" <?= ($format == 2) ? 'checked ' : '' ?>/> in one line (without space)</label>
<label for="format_oneline_spacebeforepipe"><input type="radio" name="format" id="format_oneline_spacebeforepipe" value="3" <?= ($format == 3) ? 'checked ' : '' ?>/> in one line (without space, except before |)</label>
</div>
<div class="six columns">
<label>Project:</label>
<select>
<option value="fr.wikipedia">French Wikipedia</option>
</select>
<label for="force_article"><input type="checkbox" name="force_article" id="force_article" /> Force {{Article}} template</label>
</div>
</div>
</form>
<?php
if (array_key_exists('URL', $_REQUEST)) {
include('page.php');
//Does the specified URL valid and exist?
$url = $_REQUEST['URL'];
if (!filter_var($url, FILTER_VALIDATE_URL)) {
message_die(GENERAL_ERROR, "$url isn't a valid URL.", 'URL issue');
}
//Gets page information
setlocale(LC_TIME, 'fr_FR.UTF-8');
$page = Page::load($url);
if ($page->error) {
message_die(GENERAL_ERROR, "Can't open $url", 'URL issue');
}
$force_article = array_key_exists('force_article', $_REQUEST) && $_REQUEST['force_article'];
if (!$force_article && $page->is_article()) {
echo "<h3>Note</h3><p>Cette URL pointe vers un article de revue, aussi le modèle <a href=\"https://fr.wikipedia.org/wiki/Template:Article\">{{Article}}</a> est indiqué.</p>";
}
+ if ($page->switchTo != null) {
+ $documentObject = new $page->switchTo['document']['class'];
+ foreach ($page->switchTo['document']['params'] as $key => $value) {
+ $documentObject->$key = $value;
+ }
+ call_user_func([$documentObject, $page->switchTo['document']['method']]);
+ }
+
//Gets template
- require('templates/template.php');
- if ($force_article || $page->is_article()) {
- require('templates/wikipedia-fr/Article.php');
+ if ($page->switchTo != null) {
+ switch ($page->switchTo['template']) {
+ case 'book':
+ $template = OuvrageTemplate::loadFromBook($documentObject);
+ break;
+
+ default:
+ $template = "DEBUG: please add a template logic for this switch object:\n\n" . print_r($page->switchTo, true);
+ }
+ } elseif ($force_article || $page->is_article()) {
$template = ArticleTemplate::loadFromPage($page);
} else {
- require('templates/wikipedia-fr/Lien_web.php');
$template = LienWebTemplate::loadFromPage($page);
}
//Reformats template if needed
switch ($_REQUEST['format']) {
case 1:
$template = str_replace("\n", '', $template);
break;
case 2:
$template = str_replace("\n | ", '|', $template);
$template = str_replace(" = ", '=', $template);
break;
case 3:
$template = str_replace("\n | ", ' |', $template);
$template = str_replace(" = ", '=', $template);
break;
}
//Prints template
echo " <h3>Template</h3> \n <textarea id=\"template\" rows=16 cols=80>\n$template</textarea>";
//Meta tags
if (count($page->meta_tags)) {
echo "\n\n <h3>Meta tags</h3>\n <table class=\"twelve\" cellpadding=\"8\">\n <thead>\n <tr><th>Tag</th><th>Value</th></tr>\n </thead>\n <tbody>";
foreach ($page->meta_tags as $key => $value) {
echo "\n <tr><td>$key</td><td>$value</td></tr>";
}
echo "\n </tbody>\n </table>";
}
}
?>
<h2>Documentation</h2>
<div class="row">
<div class="three columns">
<h3>References</h3>
<ul class="menu">
<li><a href="http://fr.wikipedia.org/wiki/Modèle:Lien web">{{Lien web}}</a></li>
<li><a href="http://fr.wikipedia.org/wiki/Modèle:Article">{{Article}}</a></li>
<li><a href="http://www.prismstandard.org/specifications/">PRISM</a></li>
<li><a href="http://dublincore.org/">Dublin Core</a></li>
<li><a href="http://scholar.google.com/intl/en/scholar/inclusion.html">Google Scholar</a></li>
<li><a href="http://ogp.me/">Open Graph</a></li>
</ul>
</div>
<div class="nine columns">
<h3>How to improve this tool?</h3>
<p>A little PHP knowledge will allow you to customize and improve this tool. I will be happy to accept patches in this goal.</p>
<p>If you wish to adapt this tool to be used on another website (a Wikipedia project in another language or outside Wikipedia), please see the template.php file and samples in the templates/ folder.</p>
<p>If you wish to add websites analysis, please add the URL in index.dat, then create a class which extends Page ; see page.php and the pages/ folder.</p>
<p><strong>Source code:</strong> [ <a href="http://hg.dereckson.be/source-templates-generator">git repository</a> | <a href="https://bitbucket.org/dereckson/source-templates-generator/get/master.zip">download current snapshot</a> ]</p>
</div>
</div>
<script src="/javascripts/jquery.foundation.forms.js"></script>
diff --git a/page.php b/page.php
index ed46bc3..bb39cfd 100644
--- a/page.php
+++ b/page.php
@@ -1,311 +1,315 @@
<?php
define('LONG_DATE_FORMAT', '%e %B %Y');
define('USER_AGENT', 'WikimediaTools/SourceTemplatesGenerator/0.1');
define('USER_AGENT_FALLBACK', 'Mozilla/5.0');
define('USER_AGENT_FALLBACK_FULL', 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.13) Gecko/20080311 Firefox/2.0.0.13');
require_once('helpers/Encoding.php');
class Page {
/*
* @var string The page URL
*/
public $url;
/**
* @var array Meta tags
*/
public $meta_tags;
/**
* @var string The page content
*/
public $data;
/**
* @var string The page title
*/
public $title;
/**
* @var string The page author
*/
public $author;
/**
* @var Array The page coauthors
*/
public $coauthors;
/**
* @var string The site ISSN
*/
public $issn;
//If we use the parameters yyyy mm dd, we describe CONTENT date:
/**
* @var int The page content's year
*/
public $yyyy;
/**
* @var int The page content's month
*/
public $mm;
/**
* @var int The page content's day
*/
public $dd;
//If not, we describe ONLINE RESOURCE PUBLISH date:
/**
* @var string The page publication date in relevant locale
*/
public $date;
/**
* @var int The page publication unixtime
*/
public $unixtime;
/**
* @var bool Indicates if we have to skip year/month/date template parameters
*/
public $skipYMD;
/**
* @var bool Indicates if we have to skip author template parameter
*/
public $skipAuthor;
+ /**
+ * @var mixed If not null, contains an array for anotheser service to use
+ */
+ public $switchTo = null;
/**
* @var string The last error occured while opening and parsing the page
*/
public $error;
/**
* Initializes a new Page instance. If an error occured, you can read it in $this->error.
*
* @param string $url the page URL
*/
function __construct ($url) {
$this->url = $url;
$this->get_data();
if ($this->data) {
$this->analyse();
}
}
function get_data () {
ini_set('user_agent', USER_AGENT);
$data = file_get_contents($this->url);
if (!$data) {
ini_set('user_agent', USER_AGENT_FALLBACK);
if (!$data = @file_get_contents($this->url)) {
$this->error = "Can't read URL";
return;
}
}
$encoding = mb_detect_encoding($data, "ISO-8859-15, ISO-8859-1, UTF-8, ASCII, auto");
if ($encoding && $encoding != 'UTF-8') {
$this->data = Encoding::toUTF8($data);
} else {
$this->data = $data;
}
}
/**
* Return a new Page instance, or if such class exists, an instance class specialized for your site.
*
* @param $url the page URL
*/
static function load ($url) {
//Classes list are stored in pages/index.dat file
//Each line contains the URL beginning, a tabulation, and the page analyser name
// * class is this name, appended by 'Page'
// * source file is the lowercase version of this name, appended by '.php'
$pages = file('pages/index.dat', true);
foreach ($pages as $line) {
$page = explode("\t", $line);
if (substr($url, 0, strlen($page[0])) == $page[0]) {
$file = strtolower(trim($page[1])) . '.php';
$class = trim($page[1]) . 'Page';
require("pages/$file");
return new $class($url);
}
}
return new Page($url);
}
/**
* Analyses metatags to process content
*/
function analyse () {
//Meta tags (including <meta property="" value=""> and <meta itemprop="" value="" syntax)
$this->meta_tags = $this->get_meta_tags();
$t = $this->meta_tags;
//Title
$this->title = $this->get_title();
//Date
if ($date = $this->getMetaTag($t, 'date', 'pubdate', 'content_create_date')) {
$date = date_parse($date);
$this->yyyy = $date['year'];
$this->mm = $date['month'];
$this->dd = $date['day'];
}
//Site name
$this->site = $this->getMetaTag($t, 'og:site_name');
//Author
$this->author = $this->getMetaTag($t, 'author');
}
/**
* Gets page metatags
*
* @return array an array where the keys are the metatags' names and the values the metatags' values
*/
function get_meta_tags () {
return $this::get_all_meta_tags($this->url);
}
/**
* Gets all metatags, including those using meta property= and meta itemprop= syntax
*
* @return array an array where the keys are the metatags' names and the values the metatags' values
*/
function get_all_meta_tags () {
//Thank you to Michael Knapp and Mariano
//See http://php.net/manual/en/function.get-meta-tags.php comments
preg_match_all('/<[\s]*meta[\s]*\b(name|property|itemprop)\b="?' . '([^>"]*)"?[\s]*' . 'content="?([^>"]*)"?[\s]*[\/]?[\s]*>/si', $this->data, $match);
if (isset($match) && is_array($match) && count($match) == 4) {
$originals = $match[0];
$names = $match[2];
$values = $match[3];
if (count($originals) == count($names) && count($names) == count($values)) {
$metaTags = array();
for ($i=0, $limiti = count($names) ; $i < $limiti ; $i++) {
$metaTags[$names[$i]] = $values[$i];
}
}
}
return $metaTags;
}
/**
* Gets title
*
* @return string The page title
*/
function get_title () {
$title = $this->getMetaTag($this->meta_tags, 'title', 'og:title', 'DC.title', 'Title');
return $title ?: ((preg_match("#<title>(.+)<\/title>#iU", $this->data, $title)) ? trim($title[1]) : '');
}
/**
* Determines if the current page is an article published in a journal.
*
* @return bool true if the current page is an article ; otherwise, false
*/
function is_article () {
return
(array_key_exists('dc_type', $this->meta_tags) && $this->meta_tags['dc_type'] == 'journalArticle')
||
(array_key_exists('dcsext_pn-cat', $this->meta_tags) && $this->meta_tags['dcsext_pn-cat'] == 'Article')
||
array_key_exists('citation_journal_title', $this->meta_tags)
||
array_key_exists('prism_publicationname', $this->meta_tags);
}
/**
* Gets relevant metatag
*
* @param array the metatags
* @param string... the list of acceptable metatags
*
* @return string the first metatag value found
*/
static function getMetaTag () {
$tags = func_get_args();
$metatags = array_shift($tags);
foreach ($tags as $tag) {
$tag_lowercase = strtolower($tag);
foreach ($metatags as $key => $value) {
if ($tag_lowercase == strtolower($key)) return $value;
}
}
return '';
}
/**
* Finds a portion of text included between $before and $after strings on the current page
*
* @param string $before The string at the left of the text to be grabbed
* @param string $after The string at the right of the text to be grabbed
*
* @return string The text found between $before and $after
*/
function between ($before, $after) {
return self::grab($this->data, $before, $after);
}
/**
* Finds a portion of text included between $before and $after strings
*
* @param string $text The text where to find the substring
* @param string $before The string at the left of the text to be grabbed
* @param string $after The string at the right of the text to be grabbed
*
* @return string The text found between $before and $after
*/
static function grab ($text, $before, $after) {
$pos1 = strpos($text, $before);
if ($pos1 === false) { return false; } else { $pos1 += strlen($before); }
$pos2 = strpos($text, $after, $pos1 + 1);
if ($pos2 === false) { return false; }
return substr($text, $pos1, $pos2 - $pos1);
}
/**
* Downloads, through CURL library, accepting cookies.
*
* @param $url The URL to fetch
*/
static function curl_download ($url, $agent = '') {
$ch = curl_init();
$timeout = 5;
$cookie_file = tmpfile();
$cookie_file = tempnam(sys_get_temp_dir(), "cookie-sourcesgen-");
curl_setopt($ch, CURLOPT_COOKIESESSION, true);
curl_setopt($ch, CURLOPT_COOKIEFILE, $cookie_file);
curl_setopt($ch, CURLOPT_COOKIEJAR, $cookie_file);
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
if ($agent != '') curl_setopt($ch, CURLOPT_USERAGENT, $agent);
$data = curl_exec($ch);
curl_close($ch);
unlink($cookie_file);
return $data;
}
}
diff --git a/pages/index.dat b/pages/index.dat
index 6cd254b..005599a 100644
--- a/pages/index.dat
+++ b/pages/index.dat
@@ -1,12 +1,13 @@
http://www.erudit.org/ Erudit
http://www.lalibre.be/ LaLibreBelgique
http://www.lesoir.be/ LeSoir
http://archives.lesoir.be/ LeSoir
http://www.rue89.com/ Rue89
http://www.tandfonline.com TaylorAndFrancis
http://www.lemonde.fr/ LeMonde
http://www.lefigaro.fr/ LeFigaro
http://www.chroniquesautomatiques.com/ LesChroniquesAutomatiques
http://www.jstor.org/ JSTOR
http://www.persee.fr/ Persee
http://www.nytimes.com/ NewYorkTimes
+http://www.worldcat.org/ WorldCat
diff --git a/pages/worldcat.php b/pages/worldcat.php
new file mode 100644
index 0000000..1f12516
--- /dev/null
+++ b/pages/worldcat.php
@@ -0,0 +1,20 @@
+<?php
+
+class WorldCatPage extends Page {
+ function analyse () {
+ parent::analyse();
+
+ if (substr($this->url, 0, 30) == "http://www.worldcat.org/title/" && preg_match("@/oclc/([0-9]*)@", $this->url, $matches)) {
+ $this->switchTo = [
+ 'document' => [
+ 'class' => 'Book',
+ 'params' => [
+ 'OCLC' => $matches[1]
+ ],
+ 'method' => 'queryWorldCatFromOCLC',
+ ],
+ 'template' => 'book',
+ ];
+ }
+ }
+}
diff --git a/templates/wikipedia-fr/Ouvrage.php b/templates/wikipedia-fr/Ouvrage.php
new file mode 100644
index 0000000..bfed9d7
--- /dev/null
+++ b/templates/wikipedia-fr/Ouvrage.php
@@ -0,0 +1,40 @@
+<?php
+setlocale(LC_TIME, 'fr_FR.UTF-8');
+
+class OuvrageTemplate extends Template {
+ public $accessdate;
+
+ function __construct () {
+ $this->name = "Ouvrage";
+ $this->accessdate = trim(strftime(LONG_DATE_FORMAT));
+ }
+
+ static function loadFromBook ($book) {
+ $template = new self;
+
+ $i = 1;
+ foreach ($book->Authors as $author) {
+ $template->params["prénom$i"] = $author[0];
+ $template->params["nom$i"] = $author[1];
+ $i++;
+ }
+
+ $template->params['titre'] = $book->Title;
+ $template->params['éditeur'] = $book->Publisher;
+ $template->params['lieu'] = $book->Place;
+
+ $template->params['année'] = $book->YYYY;
+ if ($book->MM) { $template->params['mois'] = strftime('%B', mktime(0, 0, 0, $book->MM)); }
+ if ($book->DD) { $template->params['jour'] = $book->DD; }
+
+ $template->params['oclc'] = (int)$book->OCLC;
+
+ return $template;
+ }
+
+ function __toString () {
+ $this->params['consulté le'] = $this->accessdate;
+
+ return parent::__toString();
+ }
+}

File Metadata

Mime Type
text/x-diff
Expires
Thu, Sep 18, 12:07 (18 h, 8 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2990773
Default Alt Text
(22 KB)

Event Timeline