Page MenuHomeDevCentral

No OneTemporary

diff --git a/helpers/namecase.php b/helpers/namecase.php
new file mode 100644
index 0000000..83ddc0e
--- /dev/null
+++ b/helpers/namecase.php
@@ -0,0 +1,94 @@
+<?php
+
+/**
+ * Properly capitalize a name.
+ *
+ * @package NameCase
+ * @version 1.0.2
+ * @author Alex Dunae, Dialect <alex[at]dialect[dot]ca>
+ * @copyright Copyright (c) 2008, Alex Dunae
+ * @license http://www.gnu.org/licenses/gpl-3.0.txt
+ * @link http://dialect.ca/code/name-case/
+ */
+
+/**
+ * Apply properly capitalization rules to a name.
+ *
+ * @param string $str
+ * @returns string
+ */
+function name_case($str) {
+ // basic check for e-mail addresses to allow copy-and-paste of e-mail lists
+ if(strpos($str, '@') && strpos($str, '.'))
+ return $str;
+
+ if(function_exists('mb_convert_encoding'))
+ $str = @mb_convert_encoding($str, 'UTF-8', 'auto');
+
+ $processed_chunks = array();
+
+ // build name chunks
+ $buffer = '';
+ for($i = 0; $i < strlen($str); $i++) {
+ // check for delimiters
+ if(preg_match('/[\s]+/', $str[$i]) > 0 || $str[$i] == '-' ||
+ $str[$i] == '.' || $str[$i] == ',') {
+ $processed_chunks[] = _process_name_case_chunk($buffer . $str[$i]);
+ $buffer = '';
+ } else {
+ $buffer .= $str[$i];
+ }
+ }
+
+ $processed_chunks[] = _process_name_case_chunk($buffer);
+ return trim(implode('', $processed_chunks));
+}
+
+/**
+ * Process the chunks generated by the namecase function.
+ *
+ * This function should not be called directly.
+ *
+ * @param string $str
+ * @returns string
+ * @see name_case
+ */
+function _process_name_case_chunk($str) {
+ // Surname prefixes
+ if(preg_match('/^(van|von|der|la|d[aeio]|d[ao]s|dit)[\s,]*$/i', $str))
+ return strtolower($str);
+
+ // Ordinal suffixes (I - VIII only)
+ if(preg_match('/^(i{3}|i{1,2}v?|v?i{1,2})[\s,]*$/i', $str))
+ return strtoupper($str);
+
+ if(function_exists('mb_convert_case'))
+ $str = mb_convert_case($str, MB_CASE_TITLE, 'UTF-8');
+ else
+ $str = ucfirst(strtolower($str));
+
+ // Second letter capitalized, like D'Angelo, McDonald, St. John, 0'Neil
+ if(preg_match('/(^|\s)+(Mc|[DO]\'|St\.|St[\.]?[\s]|Dewolf)/i', $str)) {
+ $str[2] = strtoupper($str[2]);
+ return $str;
+ }
+
+ // Third letter capitalized, like MacDonald, MacRae
+ if(preg_match('/(^|\s*)(Mac)(allist|arth|b|c(allu|art|ask|l|r|ull)|d|f|g|i(nn|nty|saa|v)|kinn|kn|l(a|ea|eo)|m|na[mu]|n[ei]|ph|q|ra|sw|ta|w)/i', $str)) {
+ // not h,
+ $str[3] = strtoupper($str[3]);
+ return $str;
+ }
+
+ return $str;
+}
+
+/* References
+ * - http://www.zu.ac.ae/publications/editorial/arabic.html
+ * - http://snippets.dzone.com/posts/show/2010
+ * - http://www.johncardinal.com/tmgutil/capitalizenames.htm
+ * - http://freejava.info/capitalize-english-names/
+ * - http://www.census.gov/genealogy/names/names_files.html
+ */
+
+?>
diff --git a/index.php b/index.php
index d701d0c..0b9e847 100644
--- a/index.php
+++ b/index.php
@@ -1,50 +1,50 @@
<!-- Content -->
<div id="content">
<h1 class="icoTitle"><img src="/_pict/ico/forms.png" alt="Tools - form generation"/>{{Lien web}}</h1>
<form method="post">
- <label for="URL">URL: </label><input type="text" size="80" name="URL" id="URL" value="<?= $_REQUEST['URL'] ?>" />
+ <label for="URL">URL: </label><input type="text" size="80" name="URL" id="URL" value="<?= array_key_exists('URL', $_REQUEST) ? $_REQUEST['URL'] : '' ?>" />
</form>
<?php
if (array_key_exists('URL', $_REQUEST)) {
include('page.php');
$url = $_REQUEST['URL'];
setlocale(LC_TIME, 'fr_FR.UTF-8');
$page = Page::load($url);
if ($page->is_article()) {
echo "<h3>Note</h3><p>Cette URL pointe vers un article de revue, aussi le modèle {{Article}} est indiqué.</p>";
}
//Template
echo " <h3>Template</h3> \n <textarea id=\"template\" rows=20 cols=80>\n";
require('templates/template.php');
if ($page->is_article()) {
require('templates/wikipedia-fr/Article.php');
$template = ArticleTemplate::loadFromPage($page);
} else {
require('templates/wikipedia-fr/Lien_web.php');
$template = LienWebTemplate::loadFromPage($page);
}
echo $template, '</textarea>';
//Meta tags
echo "\n\n <h3>Meta tags</h3>\n <table cellpadding=8>\n <tr><th>Tag</th><th>Value</th></tr>";
foreach ($page->meta_tags as $key => $value) {
echo " <tr><td>$key</td><td>$value</td></tr>";
}
echo "\n </table>";
}
?>
</div>
<!-- left menu -->
<div id="leftMenu">
<ul class="navMenu">
<li><a href="http://fr.wikipedia.org/wiki/Modèle:Lien web">{{Lien web}}</a></li>
<li><a href="http://fr.wikipedia.org/wiki/Modèle:Article">{{Article}}</a></li>
<li><a href="http://www.prismstandard.org/specifications/">PRISM</a></li>
<li><a href="http://dublincore.org/">Dublin Core</a></li>
- <li><a href="http://scholar.google.com/intl/en/scholar/inclusion.html">Google
-Scholar</a></li>
+ <li><a href="http://scholar.google.com/intl/en/scholar/inclusion.html">Google Scholar</a></li>
+ <li><a href="http://ogp.me/">Open Graph</a></li>
</ul>
</div>
diff --git a/page.php b/page.php
index 3f6b365..df31cc6 100644
--- a/page.php
+++ b/page.php
@@ -1,91 +1,98 @@
<?php
define('LONG_DATE_FORMAT', '%e %B %Y');
class Page {
public $url;
/**
* @var array Meta tags
*/
public $meta_tags;
/**
* @var string The page content
*/
public $data;
public $title;
+ public $author;
+ public $yyyy;
+ public $mm;
+ public $dd;
+
+ public $skipYMD;
+ public $skipAuthor;
function __construct ($url) {
$this->url = $url;
$this->data = file_get_contents($url);
$this->analyse();
}
static function load ($url) {
$pages = file('pages/index.dat', true);
foreach ($pages as $line) {
$page = explode("\t", $line);
if (substr($url, 0, strlen($page[0])) == $page[0]) {
$file = strtolower(trim($page[1])) . '.php';
$class = trim($page[1]) . 'Page';
require("pages/$file");
return new $class($url);
}
}
return new Page($url);
}
function analyse () {
$this->meta_tags = $this->get_meta_tags();
$this->title = $this->get_title();
}
function get_meta_tags () {
return get_meta_tags($this->url);
}
function get_all_meta_tags () {
//Thank you to Michael Knapp and Mariano
//See http://php.net/manual/en/function.get-meta-tags.php comments
preg_match_all('/<[\s]*meta[\s]*\b(name|property|itemprop)\b="?' . '([^>"]*)"?[\s]*' . 'content="?([^>"]*)"?[\s]*[\/]?[\s]*>/si', $this->data, $match);
if (isset($match) && is_array($match) && count($match) == 4)
{
$originals = $match[0];
$names = $match[2];
$values = $match[3];
if (count($originals) == count($names) && count($names) ==
count($values))
{
$metaTags = array();
for ($i=0, $limiti=count($names); $i < $limiti; $i++)
{
$metaTags[$names[$i]] = $values[$i];
}
}
}
return $metaTags;
}
/**
* Gets title
*
* @return string The page title
*/
function get_title () {
return (preg_match("#<title>(.+)<\/title>#iU", $this->data, $title)) ? trim($title[1]) : '';
}
function is_article () {
if (array_key_exists('dc_type', $this->meta_tags) && $this->meta_tags['dc_type'] == 'journalArticle') {
return true;
}
return false;
}
}
diff --git a/pages/index.dat b/pages/index.dat
index 8cce940..8949989 100644
--- a/pages/index.dat
+++ b/pages/index.dat
@@ -1 +1,2 @@
http://www.rue89.com/ Rue89
+http://www.lesoir.be/ LeSoir
diff --git a/pages/lesoir.php b/pages/lesoir.php
new file mode 100644
index 0000000..6c5debe
--- /dev/null
+++ b/pages/lesoir.php
@@ -0,0 +1,43 @@
+<?php
+
+//Page analysis for www.lesoir.be
+class LeSoirPage extends Page {
+ function analyse () {
+ parent::analyse();
+
+ //Hardcoded known info
+ $this->site = "Le Soir";
+ $this->skipYMD = true;
+
+ //Gets date
+ //meta tag 'archi_id' has t-YYYYMMDD-HHMMhh as format (where hh = AM/PM)
+ // e.g. t-20120722-0211PM
+ $yyyy = substr($this->meta_tags['archi_id'], 2, 4);
+ $mm = substr($this->meta_tags['archi_id'], 6, 2);
+ $dd = substr($this->meta_tags['archi_id'], 8, 2);
+ $this->date = strftime(LONG_DATE_FORMAT, mktime(0, 0, 0, $mm, $dd, $yyyy));
+
+ //Gets author
+ //TODO: ensure no article has more than one author
+ $pos1 = strpos($this->data, '<p class="info st_signature">') + 29;
+ $pos2 = strpos($this->data, '</p>', $pos1);
+ $author = substr($this->data, $pos1, $pos2 - $pos1);
+ if ($author == "R&#233;daction en ligne") {
+ $this->skipAuthor = true;
+ } else {
+ require_once('helpers/namecase.php');
+ $this->author = name_case($author);
+ }
+ }
+
+ function get_title () {
+ return $this->meta_tags['og:title'];
+ }
+
+ function get_meta_tags () {
+ //Rue89 doesn't always use <meta name="" value=""> but sometimes property= or itemprop=
+ return $this->get_all_meta_tags();
+ }
+}
+
+?>
diff --git a/templates/wikipedia-fr/Lien_web.php b/templates/wikipedia-fr/Lien_web.php
index 4958538..a2be729 100644
--- a/templates/wikipedia-fr/Lien_web.php
+++ b/templates/wikipedia-fr/Lien_web.php
@@ -1,57 +1,60 @@
<?php
setlocale(LC_TIME, 'fr_FR.UTF-8');
class LienWebTemplate extends Template {
public $author;
public $url;
public $title;
public $dd;
public $mm;
public $yyyy;
public $site;
public $publishdate;
public $accessdate;
/**
* @var bool Indicates if we've to remove jour/mois/année parameters
*/
public $skipYMD = false;
function __construct () {
$this->name = "Lien web";
$this->accessdate = trim(strftime(LONG_DATE_FORMAT));
}
static function loadFromPage ($page) {
$template = new LienWebTemplate();
$template->author = $page->author;
+ $template->skipAuthor = $page->skipAuthor;
$template->url = $page->url;
$template->title = $page->title;
$template->dd = $page->yyyy;
$template->mm = $page->yyyy;
$template->yyyy = $page->yyyy;
$template->site = $page->site;
$template->publishdate = $page->date;
$template->skipYMD = $page->skipYMD;
return $template;
}
function __toString () {
- $this->params['auteur'] = $this->author;
- $this->params['url'] = $this->url;
+ if (!$this->skipAuthor) {
+ $this->params['auteur'] = $this->author;
+ }
$this->params['titre'] = $this->title;
if (!$this->skipYMD) {
$this->params['jour'] = $this->mm;
$this->params['mois'] = $this->dd;
$this->params['année'] = $this->yyyy;
}
+ $this->params['url'] = $this->url;
$this->params['site'] = $this->site;
$this->params['en ligne le'] = $this->publishdate;
$this->params['consulté le'] = $this->accessdate;
return parent::__toString();
}
}
?>

File Metadata

Mime Type
text/x-diff
Expires
Thu, Sep 18, 16:34 (14 h, 10 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2991083
Default Alt Text
(11 KB)

Event Timeline