...

View Full Version : Easy Question .... I think



mnybud
08-28-2006, 02:44 PM
Hi I have this plugin for Wordpress that fetches content from Wikipedia:
http://dev.wp-plugins.org/wiki/GetWIKI
(full script below)

I am trying to do this:
1. remove all hyperlinks from the grabbed Wiki content
2. remove specific sections of the grabbed content like the external link section and error messages

Here is an example of something I would like to remove from the grabbed content:
<div class="messagebox cleanup metadata">This article or section does not cite its references or sources.
<small>You can help Wikipedia by introducing appropriate citations.</small></div>

here is the wordpress plugin code....

<?php
/*
Plugin Name: GetWIKI
Version: 1.0
Plugin URI: http://saj.in/blog/techtalk/82/getwiki-plugin-for-wordpress.asp
Author: Sajin Kunhambu
Author URI: http://saj.in/
Description: Get a WIKI article anywhere on yout blog (e.g. ~GetWIKI(Your_Search_Term)~ )
*/

//Server Configuration
$host = "en.wikipedia.org";
$port = 80;
$path = "/wiki/";
//Plugin Configuration
$use_cache = true;
$cache_life = 10080;
$edit_link = false;
$retrieved_link = false;
$copy_left = "<div class=\"gfdl\">&copy; This material from <a href=\"Wikipediahttp://en.wikipedia.org\">Wikipedia[/url] is licensed under the <a href=\"GFDL.http://www.gnu.org/copyleft/fdl.html\">GFDL[/url].</div>";
if( !function_exists(cache_recall) || !function_exists(cache_store) ) {
// caching function not available
$use_cache = false;
}

function cleanUp( $article ) {
global $edit_link,$retrieved_link,$copy_left;
$article = str_replace("\n","",$article);
if(preg_match("/^.*(\<\!\-\- start content \-\-\>.*\<\!\-\- end content \-\-\>).*$/i",$article,$match)!=0) $article = $match[1];
$article = preg_replace("#\<\!\-\-.*\-\-\>#imseU","",$article);
$article = preg_replace("#\[\!\&\#.*\]#imseU","",$article);
if(!$retrieved_link) $article = preg_replace("#\<div\sclass=\"printfooter\".*\<\/div\>#imseU","",$article);
if(!$edit_link) $article = preg_replace("#\s*\<div\s*class=\"editsection\".*\<\/div\>\s*#imseU","",$article);
$article = str_replace("/w/","http://en.wikipedia.org/w/",$article);
$article = str_replace("/wiki/","http://en.wikipedia.org/wiki/",$article);
$article = str_replace("/skins-1.5/","http://en.wikipedia.org/skins-1.5/",$article);
$article = "<div class=\"wiki\">".$article.$copy_left."</div>";
return $article;
}

function getArticle( $title ) {
global $host,$port,$path,$use_cache,$cache_life;
if($use_cache) {
$function_string = "getArticle(".$title.")";
if($article = cache_recall($function_string,$cache_life)) return $article;
}
$out = "GET $path$title HTTP/1.0\r\nHost: $host\r\nUser-Agent: GetWiki for WordPress\r\n\r\n";
$fp = fsockopen($host, $port, $errno, $errstr, 30);
fwrite($fp, $out);
$article = "";
while (!feof($fp)) {
$article .= fgets($fp, 128);
}
if(substr($article,0,12)=="HTTP/1.0 301")
{
if(preg_match("/^.*Location\:\s(\S*).*$/im",$article,$match)!=0) {
$article = str_replace("http://en.wikipedia.org/wiki/","",$match[1]);
$article = getArticle( $article );
} else {
$article = "== WIKI Error ==";
}
}
fclose($fp);
$article = cleanUp($article);
if($use_cache) cache_store($function_string,$article);
return $article;
}

function wikify( $text ) {
$text = preg_replace(
"#\~GetWIKI\((\S*)\)\~#imseU",
"getArticle('$1')",
$text
);
return $text;
}

function wiki_css() {
echo "
<style type='text/css'>
div.wiki {
border: 1px dashed silver;
background-color: #f0f0f0;
}
div.gfdl {
font-size: 80%;
}
</style>
";
}


//echo wikify("~GetWIKI(user:Sajin)~");
add_action('wp_head', 'wiki_css');
add_filter('the_content', 'wikify', 2);
add_filter('the_excerpt', 'wikify', 2);



Anyone help with this?

mnybud
08-28-2006, 04:29 PM
anyone?



EZ Archive Ads Plugin for vBulletin Copyright 2006 Computer Help Forum