Page Menu
Home
DevCentral
Search
Configure Global Search
Log In
Files
F11724615
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
11 KB
Referenced Files
None
Subscribers
None
View Options
diff --git a/helpers/namecase.php b/helpers/namecase.php
new file mode 100644
index 0000000..83ddc0e
--- /dev/null
+++ b/helpers/namecase.php
@@ -0,0 +1,94 @@
+<?php
+
+/**
+ * Properly capitalize a name.
+ *
+ * @package NameCase
+ * @version 1.0.2
+ * @author Alex Dunae, Dialect <alex[at]dialect[dot]ca>
+ * @copyright Copyright (c) 2008, Alex Dunae
+ * @license http://www.gnu.org/licenses/gpl-3.0.txt
+ * @link http://dialect.ca/code/name-case/
+ */
+
+/**
+ * Apply properly capitalization rules to a name.
+ *
+ * @param string $str
+ * @returns string
+ */
+function name_case($str) {
+ // basic check for e-mail addresses to allow copy-and-paste of e-mail lists
+ if(strpos($str, '@') && strpos($str, '.'))
+ return $str;
+
+ if(function_exists('mb_convert_encoding'))
+ $str = @mb_convert_encoding($str, 'UTF-8', 'auto');
+
+ $processed_chunks = array();
+
+ // build name chunks
+ $buffer = '';
+ for($i = 0; $i < strlen($str); $i++) {
+ // check for delimiters
+ if(preg_match('/[\s]+/', $str[$i]) > 0 || $str[$i] == '-' ||
+ $str[$i] == '.' || $str[$i] == ',') {
+ $processed_chunks[] = _process_name_case_chunk($buffer . $str[$i]);
+ $buffer = '';
+ } else {
+ $buffer .= $str[$i];
+ }
+ }
+
+ $processed_chunks[] = _process_name_case_chunk($buffer);
+ return trim(implode('', $processed_chunks));
+}
+
+/**
+ * Process the chunks generated by the namecase function.
+ *
+ * This function should not be called directly.
+ *
+ * @param string $str
+ * @returns string
+ * @see name_case
+ */
+function _process_name_case_chunk($str) {
+ // Surname prefixes
+ if(preg_match('/^(van|von|der|la|d[aeio]|d[ao]s|dit)[\s,]*$/i', $str))
+ return strtolower($str);
+
+ // Ordinal suffixes (I - VIII only)
+ if(preg_match('/^(i{3}|i{1,2}v?|v?i{1,2})[\s,]*$/i', $str))
+ return strtoupper($str);
+
+ if(function_exists('mb_convert_case'))
+ $str = mb_convert_case($str, MB_CASE_TITLE, 'UTF-8');
+ else
+ $str = ucfirst(strtolower($str));
+
+ // Second letter capitalized, like D'Angelo, McDonald, St. John, 0'Neil
+ if(preg_match('/(^|\s)+(Mc|[DO]\'|St\.|St[\.]?[\s]|Dewolf)/i', $str)) {
+ $str[2] = strtoupper($str[2]);
+ return $str;
+ }
+
+ // Third letter capitalized, like MacDonald, MacRae
+ if(preg_match('/(^|\s*)(Mac)(allist|arth|b|c(allu|art|ask|l|r|ull)|d|f|g|i(nn|nty|saa|v)|kinn|kn|l(a|ea|eo)|m|na[mu]|n[ei]|ph|q|ra|sw|ta|w)/i', $str)) {
+ // not h,
+ $str[3] = strtoupper($str[3]);
+ return $str;
+ }
+
+ return $str;
+}
+
+/* References
+ * - http://www.zu.ac.ae/publications/editorial/arabic.html
+ * - http://snippets.dzone.com/posts/show/2010
+ * - http://www.johncardinal.com/tmgutil/capitalizenames.htm
+ * - http://freejava.info/capitalize-english-names/
+ * - http://www.census.gov/genealogy/names/names_files.html
+ */
+
+?>
diff --git a/index.php b/index.php
index d701d0c..0b9e847 100644
--- a/index.php
+++ b/index.php
@@ -1,50 +1,50 @@
<!-- Content -->
<div id="content">
<h1 class="icoTitle"><img src="/_pict/ico/forms.png" alt="Tools - form generation"/>{{Lien web}}</h1>
<form method="post">
- <label for="URL">URL: </label><input type="text" size="80" name="URL" id="URL" value="<?= $_REQUEST['URL'] ?>" />
+ <label for="URL">URL: </label><input type="text" size="80" name="URL" id="URL" value="<?= array_key_exists('URL', $_REQUEST) ? $_REQUEST['URL'] : '' ?>" />
</form>
<?php
if (array_key_exists('URL', $_REQUEST)) {
include('page.php');
$url = $_REQUEST['URL'];
setlocale(LC_TIME, 'fr_FR.UTF-8');
$page = Page::load($url);
if ($page->is_article()) {
echo "<h3>Note</h3><p>Cette URL pointe vers un article de revue, aussi le modèle {{Article}} est indiqué.</p>";
}
//Template
echo " <h3>Template</h3> \n <textarea id=\"template\" rows=20 cols=80>\n";
require('templates/template.php');
if ($page->is_article()) {
require('templates/wikipedia-fr/Article.php');
$template = ArticleTemplate::loadFromPage($page);
} else {
require('templates/wikipedia-fr/Lien_web.php');
$template = LienWebTemplate::loadFromPage($page);
}
echo $template, '</textarea>';
//Meta tags
echo "\n\n <h3>Meta tags</h3>\n <table cellpadding=8>\n <tr><th>Tag</th><th>Value</th></tr>";
foreach ($page->meta_tags as $key => $value) {
echo " <tr><td>$key</td><td>$value</td></tr>";
}
echo "\n </table>";
}
?>
</div>
<!-- left menu -->
<div id="leftMenu">
<ul class="navMenu">
<li><a href="http://fr.wikipedia.org/wiki/Modèle:Lien web">{{Lien web}}</a></li>
<li><a href="http://fr.wikipedia.org/wiki/Modèle:Article">{{Article}}</a></li>
<li><a href="http://www.prismstandard.org/specifications/">PRISM</a></li>
<li><a href="http://dublincore.org/">Dublin Core</a></li>
- <li><a href="http://scholar.google.com/intl/en/scholar/inclusion.html">Google
-Scholar</a></li>
+ <li><a href="http://scholar.google.com/intl/en/scholar/inclusion.html">Google Scholar</a></li>
+ <li><a href="http://ogp.me/">Open Graph</a></li>
</ul>
</div>
diff --git a/page.php b/page.php
index 3f6b365..df31cc6 100644
--- a/page.php
+++ b/page.php
@@ -1,91 +1,98 @@
<?php
define('LONG_DATE_FORMAT', '%e %B %Y');
class Page {
public $url;
/**
* @var array Meta tags
*/
public $meta_tags;
/**
* @var string The page content
*/
public $data;
public $title;
+ public $author;
+ public $yyyy;
+ public $mm;
+ public $dd;
+
+ public $skipYMD;
+ public $skipAuthor;
function __construct ($url) {
$this->url = $url;
$this->data = file_get_contents($url);
$this->analyse();
}
static function load ($url) {
$pages = file('pages/index.dat', true);
foreach ($pages as $line) {
$page = explode("\t", $line);
if (substr($url, 0, strlen($page[0])) == $page[0]) {
$file = strtolower(trim($page[1])) . '.php';
$class = trim($page[1]) . 'Page';
require("pages/$file");
return new $class($url);
}
}
return new Page($url);
}
function analyse () {
$this->meta_tags = $this->get_meta_tags();
$this->title = $this->get_title();
}
function get_meta_tags () {
return get_meta_tags($this->url);
}
function get_all_meta_tags () {
//Thank you to Michael Knapp and Mariano
//See http://php.net/manual/en/function.get-meta-tags.php comments
preg_match_all('/<[\s]*meta[\s]*\b(name|property|itemprop)\b="?' . '([^>"]*)"?[\s]*' . 'content="?([^>"]*)"?[\s]*[\/]?[\s]*>/si', $this->data, $match);
if (isset($match) && is_array($match) && count($match) == 4)
{
$originals = $match[0];
$names = $match[2];
$values = $match[3];
if (count($originals) == count($names) && count($names) ==
count($values))
{
$metaTags = array();
for ($i=0, $limiti=count($names); $i < $limiti; $i++)
{
$metaTags[$names[$i]] = $values[$i];
}
}
}
return $metaTags;
}
/**
* Gets title
*
* @return string The page title
*/
function get_title () {
return (preg_match("#<title>(.+)<\/title>#iU", $this->data, $title)) ? trim($title[1]) : '';
}
function is_article () {
if (array_key_exists('dc_type', $this->meta_tags) && $this->meta_tags['dc_type'] == 'journalArticle') {
return true;
}
return false;
}
}
diff --git a/pages/index.dat b/pages/index.dat
index 8cce940..8949989 100644
--- a/pages/index.dat
+++ b/pages/index.dat
@@ -1 +1,2 @@
http://www.rue89.com/ Rue89
+http://www.lesoir.be/ LeSoir
diff --git a/pages/lesoir.php b/pages/lesoir.php
new file mode 100644
index 0000000..6c5debe
--- /dev/null
+++ b/pages/lesoir.php
@@ -0,0 +1,43 @@
+<?php
+
+//Page analysis for www.lesoir.be
+class LeSoirPage extends Page {
+ function analyse () {
+ parent::analyse();
+
+ //Hardcoded known info
+ $this->site = "Le Soir";
+ $this->skipYMD = true;
+
+ //Gets date
+ //meta tag 'archi_id' has t-YYYYMMDD-HHMMhh as format (where hh = AM/PM)
+ // e.g. t-20120722-0211PM
+ $yyyy = substr($this->meta_tags['archi_id'], 2, 4);
+ $mm = substr($this->meta_tags['archi_id'], 6, 2);
+ $dd = substr($this->meta_tags['archi_id'], 8, 2);
+ $this->date = strftime(LONG_DATE_FORMAT, mktime(0, 0, 0, $mm, $dd, $yyyy));
+
+ //Gets author
+ //TODO: ensure no article has more than one author
+ $pos1 = strpos($this->data, '<p class="info st_signature">') + 29;
+ $pos2 = strpos($this->data, '</p>', $pos1);
+ $author = substr($this->data, $pos1, $pos2 - $pos1);
+ if ($author == "Rédaction en ligne") {
+ $this->skipAuthor = true;
+ } else {
+ require_once('helpers/namecase.php');
+ $this->author = name_case($author);
+ }
+ }
+
+ function get_title () {
+ return $this->meta_tags['og:title'];
+ }
+
+ function get_meta_tags () {
+ //Rue89 doesn't always use <meta name="" value=""> but sometimes property= or itemprop=
+ return $this->get_all_meta_tags();
+ }
+}
+
+?>
diff --git a/templates/wikipedia-fr/Lien_web.php b/templates/wikipedia-fr/Lien_web.php
index 4958538..a2be729 100644
--- a/templates/wikipedia-fr/Lien_web.php
+++ b/templates/wikipedia-fr/Lien_web.php
@@ -1,57 +1,60 @@
<?php
setlocale(LC_TIME, 'fr_FR.UTF-8');
class LienWebTemplate extends Template {
public $author;
public $url;
public $title;
public $dd;
public $mm;
public $yyyy;
public $site;
public $publishdate;
public $accessdate;
/**
* @var bool Indicates if we've to remove jour/mois/année parameters
*/
public $skipYMD = false;
function __construct () {
$this->name = "Lien web";
$this->accessdate = trim(strftime(LONG_DATE_FORMAT));
}
static function loadFromPage ($page) {
$template = new LienWebTemplate();
$template->author = $page->author;
+ $template->skipAuthor = $page->skipAuthor;
$template->url = $page->url;
$template->title = $page->title;
$template->dd = $page->yyyy;
$template->mm = $page->yyyy;
$template->yyyy = $page->yyyy;
$template->site = $page->site;
$template->publishdate = $page->date;
$template->skipYMD = $page->skipYMD;
return $template;
}
function __toString () {
- $this->params['auteur'] = $this->author;
- $this->params['url'] = $this->url;
+ if (!$this->skipAuthor) {
+ $this->params['auteur'] = $this->author;
+ }
$this->params['titre'] = $this->title;
if (!$this->skipYMD) {
$this->params['jour'] = $this->mm;
$this->params['mois'] = $this->dd;
$this->params['année'] = $this->yyyy;
}
+ $this->params['url'] = $this->url;
$this->params['site'] = $this->site;
$this->params['en ligne le'] = $this->publishdate;
$this->params['consulté le'] = $this->accessdate;
return parent::__toString();
}
}
?>
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Thu, Sep 18, 16:34 (14 h, 10 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2991083
Default Alt Text
(11 KB)
Attached To
Mode
rSTG Source templates generator
Attached
Detach File
Event Timeline
Log In to Comment