<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0"
	xmlns:content="http://purl.org/rss/1.0/modules/content/"
	xmlns:wfw="http://wellformedweb.org/CommentAPI/"
	xmlns:dc="http://purl.org/dc/elements/1.1/"
	xmlns:atom="http://www.w3.org/2005/Atom"
	xmlns:sy="http://purl.org/rss/1.0/modules/syndication/"
	xmlns:slash="http://purl.org/rss/1.0/modules/slash/"
	>

<channel>
	<title>Yousef Ourabi &#187; Semantic Web</title>
	<atom:link href="http://yousefourabi.com/blog/category/semantic-web/feed/" rel="self" type="application/rss+xml" />
	<link>http://yousefourabi.com/blog</link>
	<description></description>
	<lastBuildDate>Mon, 26 Apr 2010 03:07:13 +0000</lastBuildDate>
	<generator>http://wordpress.org/?v=2.9.2</generator>
	<language>en</language>
	<sy:updatePeriod>hourly</sy:updatePeriod>
	<sy:updateFrequency>1</sy:updateFrequency>
			<item>
		<title>On demand indexing</title>
		<link>http://yousefourabi.com/blog/2008/07/on-demand-indexing/</link>
		<comments>http://yousefourabi.com/blog/2008/07/on-demand-indexing/#comments</comments>
		<pubDate>Mon, 28 Jul 2008 15:34:37 +0000</pubDate>
		<dc:creator>Yousef Ourabi</dc:creator>
				<category><![CDATA[Semantic Web]]></category>

		<guid isPermaLink="false">http://yousefourabi.com/semantic-web/on-demand-indexing</guid>
		<description><![CDATA[The world is full of buzz around the launch of cuil whose premise is more effecient indexing &#8212; what about indexing on demmand &#8212; more after I get off the bus




	
	
	
	
	


]]></description>
			<content:encoded><![CDATA[<p>The world is full of buzz around the launch of cuil whose premise is more effecient indexing &#8212; what about indexing on demmand &#8212; more after I get off the bus</p>

<div class="sociable">

<ul>
	<li class="sociablefirst"><a rel="nofollow"  href="http://digg.com/submit?phase=2&amp;url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2008%2F07%2Fon-demand-indexing%2F&amp;title=On%20demand%20indexing&amp;bodytext=The%20world%20is%20full%20of%20buzz%20around%20the%20launch%20of%20cuil%20whose%20premise%20is%20more%20effecient%20indexing%20--%20what%20about%20indexing%20on%20demmand%20--%20more%20after%20I%20get%20off%20the%20bus%0A" title="Digg"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/digg.png" title="Digg" alt="Digg" class="sociable-hovers" /></a></li>
	<li><a rel="nofollow"  href="http://delicious.com/post?url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2008%2F07%2Fon-demand-indexing%2F&amp;title=On%20demand%20indexing&amp;notes=The%20world%20is%20full%20of%20buzz%20around%20the%20launch%20of%20cuil%20whose%20premise%20is%20more%20effecient%20indexing%20--%20what%20about%20indexing%20on%20demmand%20--%20more%20after%20I%20get%20off%20the%20bus%0A" title="del.icio.us"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/delicious.png" title="del.icio.us" alt="del.icio.us" class="sociable-hovers" /></a></li>
	<li><a rel="nofollow"  href="http://www.stumbleupon.com/submit?url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2008%2F07%2Fon-demand-indexing%2F&amp;title=On%20demand%20indexing" title="StumbleUpon"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/stumbleupon.png" title="StumbleUpon" alt="StumbleUpon" class="sociable-hovers" /></a></li>
	<li><a rel="nofollow"  href="http://technorati.com/faves?add=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2008%2F07%2Fon-demand-indexing%2F" title="Technorati"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/technorati.png" title="Technorati" alt="Technorati" class="sociable-hovers" /></a></li>
	<li class="sociablelast"><a rel="nofollow"  href="http://reddit.com/submit?url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2008%2F07%2Fon-demand-indexing%2F&amp;title=On%20demand%20indexing" title="Reddit"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/reddit.png" title="Reddit" alt="Reddit" class="sociable-hovers" /></a></li>
</ul>
</div>
]]></content:encoded>
			<wfw:commentRss>http://yousefourabi.com/blog/2008/07/on-demand-indexing/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
		</item>
		<item>
		<title>FreeBase Meetup Tomorrow</title>
		<link>http://yousefourabi.com/blog/2008/06/freebase-meetup-tomorrow/</link>
		<comments>http://yousefourabi.com/blog/2008/06/freebase-meetup-tomorrow/#comments</comments>
		<pubDate>Tue, 17 Jun 2008 04:41:20 +0000</pubDate>
		<dc:creator>Yousef Ourabi</dc:creator>
				<category><![CDATA[Semantic Web]]></category>

		<guid isPermaLink="false">http://yousefourabi.com/?p=154</guid>
		<description><![CDATA[I&#8217;ll be heading to the FreeBase (I keep on writing FreeBSD&#8230;) meetup tomorrow at their SF HQ&#8230; Check out a description here: http://blog.freebase.com/2008/06/12/speakers-at-next-tuesdays-freebase-user-group/




	
	
	
	
	


]]></description>
			<content:encoded><![CDATA[<p>I&#8217;ll be heading to the FreeBase (I keep on writing FreeBSD&#8230;) meetup tomorrow at their SF HQ&#8230; Check out a description here: <a href="http://blog.freebase.com/2008/06/12/speakers-at-next-tuesdays-freebase-user-group/">http://blog.freebase.com/2008/06/12/speakers-at-next-tuesdays-freebase-user-group/</a></p>

<div class="sociable">

<ul>
	<li class="sociablefirst"><a rel="nofollow"  href="http://digg.com/submit?phase=2&amp;url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2008%2F06%2Ffreebase-meetup-tomorrow%2F&amp;title=FreeBase%20Meetup%20Tomorrow&amp;bodytext=I%27ll%20be%20heading%20to%20the%20FreeBase%20%28I%20keep%20on%20writing%20FreeBSD...%29%20meetup%20tomorrow%20at%20their%20SF%20HQ...%20Check%20out%20a%20description%20here%3A%20http%3A%2F%2Fblog.freebase.com%2F2008%2F06%2F12%2Fspeakers-at-next-tuesdays-freebase-user-group%2F" title="Digg"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/digg.png" title="Digg" alt="Digg" class="sociable-hovers" /></a></li>
	<li><a rel="nofollow"  href="http://delicious.com/post?url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2008%2F06%2Ffreebase-meetup-tomorrow%2F&amp;title=FreeBase%20Meetup%20Tomorrow&amp;notes=I%27ll%20be%20heading%20to%20the%20FreeBase%20%28I%20keep%20on%20writing%20FreeBSD...%29%20meetup%20tomorrow%20at%20their%20SF%20HQ...%20Check%20out%20a%20description%20here%3A%20http%3A%2F%2Fblog.freebase.com%2F2008%2F06%2F12%2Fspeakers-at-next-tuesdays-freebase-user-group%2F" title="del.icio.us"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/delicious.png" title="del.icio.us" alt="del.icio.us" class="sociable-hovers" /></a></li>
	<li><a rel="nofollow"  href="http://www.stumbleupon.com/submit?url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2008%2F06%2Ffreebase-meetup-tomorrow%2F&amp;title=FreeBase%20Meetup%20Tomorrow" title="StumbleUpon"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/stumbleupon.png" title="StumbleUpon" alt="StumbleUpon" class="sociable-hovers" /></a></li>
	<li><a rel="nofollow"  href="http://technorati.com/faves?add=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2008%2F06%2Ffreebase-meetup-tomorrow%2F" title="Technorati"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/technorati.png" title="Technorati" alt="Technorati" class="sociable-hovers" /></a></li>
	<li class="sociablelast"><a rel="nofollow"  href="http://reddit.com/submit?url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2008%2F06%2Ffreebase-meetup-tomorrow%2F&amp;title=FreeBase%20Meetup%20Tomorrow" title="Reddit"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/reddit.png" title="Reddit" alt="Reddit" class="sociable-hovers" /></a></li>
</ul>
</div>
]]></content:encoded>
			<wfw:commentRss>http://yousefourabi.com/blog/2008/06/freebase-meetup-tomorrow/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
		</item>
		<item>
		<title>What is next for OpenSocial</title>
		<link>http://yousefourabi.com/blog/2008/05/what-is-next-for-opensocial/</link>
		<comments>http://yousefourabi.com/blog/2008/05/what-is-next-for-opensocial/#comments</comments>
		<pubDate>Thu, 29 May 2008 18:01:13 +0000</pubDate>
		<dc:creator>Yousef Ourabi</dc:creator>
				<category><![CDATA[Semantic Web]]></category>

		<guid isPermaLink="false">http://yousefourabi.com/?p=145</guid>
		<description><![CDATA[I&#8217;m at the Google IO conference, currently in the &#8220;What&#8217;s next for OpenSocial&#8221; presentation. I&#8217;m going to try and live blog this:
Speaker: How to balance dictatorship and anarchy&#8230;
OpenSocial 0.8 has been release, see http://opensocial.org
Enhancements to JS and XML definitions, RESTful APIs, Templating language 
JS/XML:
Main thing: Cleanup and convenience  (Gadgets XML, gedgets.* opensocial.*)
InlinedMessage bundles (languages, [...]]]></description>
			<content:encoded><![CDATA[<p>I&#8217;m at the Google IO conference, currently in the &#8220;What&#8217;s next for OpenSocial&#8221; presentation. I&#8217;m going to try and live blog this:</p>
<p>Speaker: How to balance dictatorship and anarchy&#8230;</p>
<p>OpenSocial 0.8 has been release, see <a href="http://opensocial.org">http://opensocial.org</a></p>
<p>Enhancements to JS and XML definitions, RESTful APIs, Templating language </p>
<p>JS/XML:<br />
Main thing: Cleanup and convenience  (Gadgets XML, gedgets.* opensocial.*)<br />
InlinedMessage bundles (languages, i18ln)</p>
<p>Server rendering Gadgets can preload data (ServerSide), improves perceived performance since Gadget doesn&#8217;t need to make async XHR to query data</p>
<p>OAuth supported</p>
<p>John Panzer on stage for talk on RESTful APis</p>
<p>Access to new environments (Mobile, etc)<br />
Discovery via XRDS-Simple (Same as OpenID)<br />
Authentication + Authorization: OAuth (Authorizing third party code to act on behalf of user)</p>
<p>Data: People, Activites, AppData, Groups<br />
Fomats: JSON, Atom XML</p>
<p>RESTful API provides facilities for: Messaging, Concurrency, Batching</p>
<p>Templating Language and tags:<br />
Goals: Easy to develop, Secure (XSS Security preventions), Fast (Pre-Process server side), OpenSocial-Style, Can build an entire application</p>
<p>Template mime type is text/os-template &#8212; defines inline template section, template is valid xml snippet , uses JSP expression language. </p>
<p>Containers can provide their own template tag libraries&#8230;(YouTube could provide its own tags for exmaple.</p>

<div class="sociable">

<ul>
	<li class="sociablefirst"><a rel="nofollow"  href="http://digg.com/submit?phase=2&amp;url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2008%2F05%2Fwhat-is-next-for-opensocial%2F&amp;title=What%20is%20next%20for%20OpenSocial&amp;bodytext=I%27m%20at%20the%20Google%20IO%20conference%2C%20currently%20in%20the%20%22What%27s%20next%20for%20OpenSocial%22%20presentation.%20I%27m%20going%20to%20try%20and%20live%20blog%20this%3A%0D%0A%0D%0ASpeaker%3A%20How%20to%20balance%20dictatorship%20and%20anarchy...%0D%0A%0D%0AOpenSocial%200.8%20has%20been%20release%2C%20see%20http%3A%2F%2Fopensocial.org%0D%0A%0D%0A" title="Digg"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/digg.png" title="Digg" alt="Digg" class="sociable-hovers" /></a></li>
	<li><a rel="nofollow"  href="http://delicious.com/post?url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2008%2F05%2Fwhat-is-next-for-opensocial%2F&amp;title=What%20is%20next%20for%20OpenSocial&amp;notes=I%27m%20at%20the%20Google%20IO%20conference%2C%20currently%20in%20the%20%22What%27s%20next%20for%20OpenSocial%22%20presentation.%20I%27m%20going%20to%20try%20and%20live%20blog%20this%3A%0D%0A%0D%0ASpeaker%3A%20How%20to%20balance%20dictatorship%20and%20anarchy...%0D%0A%0D%0AOpenSocial%200.8%20has%20been%20release%2C%20see%20http%3A%2F%2Fopensocial.org%0D%0A%0D%0A" title="del.icio.us"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/delicious.png" title="del.icio.us" alt="del.icio.us" class="sociable-hovers" /></a></li>
	<li><a rel="nofollow"  href="http://www.stumbleupon.com/submit?url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2008%2F05%2Fwhat-is-next-for-opensocial%2F&amp;title=What%20is%20next%20for%20OpenSocial" title="StumbleUpon"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/stumbleupon.png" title="StumbleUpon" alt="StumbleUpon" class="sociable-hovers" /></a></li>
	<li><a rel="nofollow"  href="http://technorati.com/faves?add=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2008%2F05%2Fwhat-is-next-for-opensocial%2F" title="Technorati"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/technorati.png" title="Technorati" alt="Technorati" class="sociable-hovers" /></a></li>
	<li class="sociablelast"><a rel="nofollow"  href="http://reddit.com/submit?url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2008%2F05%2Fwhat-is-next-for-opensocial%2F&amp;title=What%20is%20next%20for%20OpenSocial" title="Reddit"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/reddit.png" title="Reddit" alt="Reddit" class="sociable-hovers" /></a></li>
</ul>
</div>
]]></content:encoded>
			<wfw:commentRss>http://yousefourabi.com/blog/2008/05/what-is-next-for-opensocial/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
		</item>
		<item>
		<title>Heritrix Conference Call</title>
		<link>http://yousefourabi.com/blog/2008/05/heritrix-conference-call/</link>
		<comments>http://yousefourabi.com/blog/2008/05/heritrix-conference-call/#comments</comments>
		<pubDate>Thu, 29 May 2008 05:02:42 +0000</pubDate>
		<dc:creator>Yousef Ourabi</dc:creator>
				<category><![CDATA[Semantic Web]]></category>

		<guid isPermaLink="false">http://yousefourabi.com/?p=144</guid>
		<description><![CDATA[The internet archive is trying to reach out and connect with the crawling / harvesting community that uses its open source crawler Heritrix
The first call will occur via a &#8216;Skypecast&#8217;, at the following time:
1500GMT Wednesday May 28th
(8a San Francisco / 11a WashingtonDC / 4p London)
The call will open with a brief overview of what&#8217;s new [...]]]></description>
			<content:encoded><![CDATA[<p>The internet archive is trying to reach out and connect with the crawling / harvesting community that uses its open source crawler <a href="http://crawler.archive.org/">Heritrix</a></p>
<p>The first call will occur via a &#8216;Skypecast&#8217;, at the following time:</p>
<blockquote><p>1500GMT Wednesday May 28th<br />
(8a San Francisco / 11a WashingtonDC / 4p London)</p>
<p>The call will open with a brief overview of what&#8217;s new in the latest<br />
Heritrix releases (1.14.0 and 2.0.0), and some of the priorities for<br />
future releases. Then, it will be an open discussion of whatever<br />
participants want to talk about.</p></blockquote>
<p><a href="http://webteam.archive.org/confluence/display/Heritrix/Community+Calls">http://webteam.archive.org/confluence/display/Heritrix/Community+Calls</a></p>

<div class="sociable">

<ul>
	<li class="sociablefirst"><a rel="nofollow"  href="http://digg.com/submit?phase=2&amp;url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2008%2F05%2Fheritrix-conference-call%2F&amp;title=Heritrix%20Conference%20Call&amp;bodytext=The%20internet%20archive%20is%20trying%20to%20reach%20out%20and%20connect%20with%20the%20crawling%20%2F%20harvesting%20community%20that%20uses%20its%20open%20source%20crawler%20Heritrix%0D%0A%0D%0AThe%20first%20call%20will%20occur%20via%20a%20%27Skypecast%27%2C%20at%20the%20following%20time%3A%0D%0A%0D%0A1500GMT%20Wednesday%20May%2028th%0D%0A%288a%20San%20" title="Digg"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/digg.png" title="Digg" alt="Digg" class="sociable-hovers" /></a></li>
	<li><a rel="nofollow"  href="http://delicious.com/post?url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2008%2F05%2Fheritrix-conference-call%2F&amp;title=Heritrix%20Conference%20Call&amp;notes=The%20internet%20archive%20is%20trying%20to%20reach%20out%20and%20connect%20with%20the%20crawling%20%2F%20harvesting%20community%20that%20uses%20its%20open%20source%20crawler%20Heritrix%0D%0A%0D%0AThe%20first%20call%20will%20occur%20via%20a%20%27Skypecast%27%2C%20at%20the%20following%20time%3A%0D%0A%0D%0A1500GMT%20Wednesday%20May%2028th%0D%0A%288a%20San%20" title="del.icio.us"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/delicious.png" title="del.icio.us" alt="del.icio.us" class="sociable-hovers" /></a></li>
	<li><a rel="nofollow"  href="http://www.stumbleupon.com/submit?url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2008%2F05%2Fheritrix-conference-call%2F&amp;title=Heritrix%20Conference%20Call" title="StumbleUpon"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/stumbleupon.png" title="StumbleUpon" alt="StumbleUpon" class="sociable-hovers" /></a></li>
	<li><a rel="nofollow"  href="http://technorati.com/faves?add=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2008%2F05%2Fheritrix-conference-call%2F" title="Technorati"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/technorati.png" title="Technorati" alt="Technorati" class="sociable-hovers" /></a></li>
	<li class="sociablelast"><a rel="nofollow"  href="http://reddit.com/submit?url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2008%2F05%2Fheritrix-conference-call%2F&amp;title=Heritrix%20Conference%20Call" title="Reddit"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/reddit.png" title="Reddit" alt="Reddit" class="sociable-hovers" /></a></li>
</ul>
</div>
]]></content:encoded>
			<wfw:commentRss>http://yousefourabi.com/blog/2008/05/heritrix-conference-call/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
		</item>
		<item>
		<title>Blekko launches&#8230;something</title>
		<link>http://yousefourabi.com/blog/2008/05/blekko-launchessomething/</link>
		<comments>http://yousefourabi.com/blog/2008/05/blekko-launchessomething/#comments</comments>
		<pubDate>Wed, 14 May 2008 22:25:17 +0000</pubDate>
		<dc:creator>Yousef Ourabi</dc:creator>
				<category><![CDATA[Semantic Web]]></category>
		<category><![CDATA[Startups]]></category>

		<guid isPermaLink="false">http://yousefourabi.com/?p=128</guid>
		<description><![CDATA[Blekko &#8212; allegedly a search engine has just scored another round of funding according to Mike Arrington at Techcrunch &#8212; Blekko raised 3 million at a 23 million post money valuation.
Mike is (implicitly) comparing Blekko to Cuill. Cuill (pronounced &#8220;cool&#8221;) also in stealth mode claims to have a much cheaper, and more efficient way of [...]]]></description>
			<content:encoded><![CDATA[<p><a title="Blekko search engine" href="http://www.blekko.com/">Blekko</a> &#8212; allegedly a search engine has just scored another round of funding according to <a title="Blekko scores funding from Marc Andreessen" href="http://www.techcrunch.com/2008/05/14/stealth-search-engine-blekko-gets-money-from-marc-andreessen-softtech/">Mike Arrington at Techcrunch</a> &#8212; Blekko raised 3 million at a 23 million post money valuation.</p>
<p>Mike is (implicitly) comparing Blekko to <a title="Cuill" href="http://cuill.com/">Cuill</a>. <a title="Cuill" href="http://cuill.com/">Cuill </a>(pronounced &#8220;cool&#8221;) also in stealth mode claims to have a much cheaper, and more efficient way of indexing the web compared to Google.  I think this is a bit premature since no one really know what <a href="http://cuill.com/">Cuill </a>or <a href="http://www.blekko.com/">Blekko</a> are doing.</p>
<p>Links</p>
<p><a href="http://www.techcrunch.com/2008/05/14/stealth-search-engine-blekko-gets-money-from-marc-andreessen-softtech/">Blekko raises round from </a><a onclick="javascript:urchinTracker ('/outbound/www.crunchbase.com');" href="http://www.crunchbase.com/person/marc-andreessen">Marc Andreessen</a></p>
<p><a href="http://www.techcrunch.com/2008/01/02/the-next-google-search-challenger-blekko/">Google Challenger: Blekko</a></p>

<div class="sociable">

<ul>
	<li class="sociablefirst"><a rel="nofollow"  href="http://digg.com/submit?phase=2&amp;url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2008%2F05%2Fblekko-launchessomething%2F&amp;title=Blekko%20launches...something&amp;bodytext=Blekko%20--%20allegedly%20a%20search%20engine%20has%20just%20scored%20another%20round%20of%20funding%20according%20to%20Mike%20Arrington%20at%20Techcrunch%20--%20Blekko%20raised%203%20million%20at%20a%2023%20million%20post%20money%20valuation.%0D%0A%0D%0AMike%20is%20%28implicitly%29%20comparing%20Blekko%20to%20Cuill.%20Cuill%20%28pronounc" title="Digg"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/digg.png" title="Digg" alt="Digg" class="sociable-hovers" /></a></li>
	<li><a rel="nofollow"  href="http://delicious.com/post?url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2008%2F05%2Fblekko-launchessomething%2F&amp;title=Blekko%20launches...something&amp;notes=Blekko%20--%20allegedly%20a%20search%20engine%20has%20just%20scored%20another%20round%20of%20funding%20according%20to%20Mike%20Arrington%20at%20Techcrunch%20--%20Blekko%20raised%203%20million%20at%20a%2023%20million%20post%20money%20valuation.%0D%0A%0D%0AMike%20is%20%28implicitly%29%20comparing%20Blekko%20to%20Cuill.%20Cuill%20%28pronounc" title="del.icio.us"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/delicious.png" title="del.icio.us" alt="del.icio.us" class="sociable-hovers" /></a></li>
	<li><a rel="nofollow"  href="http://www.stumbleupon.com/submit?url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2008%2F05%2Fblekko-launchessomething%2F&amp;title=Blekko%20launches...something" title="StumbleUpon"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/stumbleupon.png" title="StumbleUpon" alt="StumbleUpon" class="sociable-hovers" /></a></li>
	<li><a rel="nofollow"  href="http://technorati.com/faves?add=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2008%2F05%2Fblekko-launchessomething%2F" title="Technorati"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/technorati.png" title="Technorati" alt="Technorati" class="sociable-hovers" /></a></li>
	<li class="sociablelast"><a rel="nofollow"  href="http://reddit.com/submit?url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2008%2F05%2Fblekko-launchessomething%2F&amp;title=Blekko%20launches...something" title="Reddit"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/reddit.png" title="Reddit" alt="Reddit" class="sociable-hovers" /></a></li>
</ul>
</div>
]]></content:encoded>
			<wfw:commentRss>http://yousefourabi.com/blog/2008/05/blekko-launchessomething/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
		</item>
		<item>
		<title>Powerset launches</title>
		<link>http://yousefourabi.com/blog/2008/05/powerset-launches/</link>
		<comments>http://yousefourabi.com/blog/2008/05/powerset-launches/#comments</comments>
		<pubDate>Tue, 13 May 2008 17:43:01 +0000</pubDate>
		<dc:creator>Yousef Ourabi</dc:creator>
				<category><![CDATA[Semantic Web]]></category>
		<category><![CDATA[Startups]]></category>

		<guid isPermaLink="false">http://yousefourabi.com/?p=125</guid>
		<description><![CDATA[Powerset finally came out of private beta this Sunday with a search product around Wikipedia, and Freebase.
It will be interesting to see how competitor Hakia responds.
I&#8217;m a little underwhelmed with powerset, especially since it is searching semi-structured data where some of the relationships are more explicit than out in the wild.
http://20bits.com/2008/05/12/powerset-launches-verdict-meh/
http://venturebeat.com/2008/05/12/powerset-opens-to-everyone-now-whats-next/
http://www.techcrunch.com/2008/05/11/powerset-launches-showcase-for-user-search-experience/
http://gigaom.com/2008/05/11/powerset-is-live/




	
	
	
	
	


]]></description>
			<content:encoded><![CDATA[<p>Powerset finally came out of private beta this Sunday with a search product around Wikipedia, and Freebase.</p>
<p>It will be interesting to see how competitor Hakia responds.</p>
<p>I&#8217;m a little underwhelmed with powerset, especially since it is searching semi-structured data where some of the relationships are more explicit than out in the wild.</p>
<p>http://20bits.com/2008/05/12/powerset-launches-verdict-meh/</p>
<p>http://venturebeat.com/2008/05/12/powerset-opens-to-everyone-now-whats-next/</p>
<p>http://www.techcrunch.com/2008/05/11/powerset-launches-showcase-for-user-search-experience/</p>
<p>http://gigaom.com/2008/05/11/powerset-is-live/</p>

<div class="sociable">

<ul>
	<li class="sociablefirst"><a rel="nofollow"  href="http://digg.com/submit?phase=2&amp;url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2008%2F05%2Fpowerset-launches%2F&amp;title=Powerset%20launches&amp;bodytext=Powerset%20finally%20came%20out%20of%20private%20beta%20this%20Sunday%20with%20a%20search%20product%20around%20Wikipedia%2C%20and%20Freebase.%0D%0A%0D%0AIt%20will%20be%20interesting%20to%20see%20how%20competitor%20Hakia%20responds.%0D%0A%0D%0AI%27m%20a%20little%20underwhelmed%20with%20powerset%2C%20especially%20since%20it%20is%20searching%20s" title="Digg"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/digg.png" title="Digg" alt="Digg" class="sociable-hovers" /></a></li>
	<li><a rel="nofollow"  href="http://delicious.com/post?url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2008%2F05%2Fpowerset-launches%2F&amp;title=Powerset%20launches&amp;notes=Powerset%20finally%20came%20out%20of%20private%20beta%20this%20Sunday%20with%20a%20search%20product%20around%20Wikipedia%2C%20and%20Freebase.%0D%0A%0D%0AIt%20will%20be%20interesting%20to%20see%20how%20competitor%20Hakia%20responds.%0D%0A%0D%0AI%27m%20a%20little%20underwhelmed%20with%20powerset%2C%20especially%20since%20it%20is%20searching%20s" title="del.icio.us"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/delicious.png" title="del.icio.us" alt="del.icio.us" class="sociable-hovers" /></a></li>
	<li><a rel="nofollow"  href="http://www.stumbleupon.com/submit?url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2008%2F05%2Fpowerset-launches%2F&amp;title=Powerset%20launches" title="StumbleUpon"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/stumbleupon.png" title="StumbleUpon" alt="StumbleUpon" class="sociable-hovers" /></a></li>
	<li><a rel="nofollow"  href="http://technorati.com/faves?add=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2008%2F05%2Fpowerset-launches%2F" title="Technorati"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/technorati.png" title="Technorati" alt="Technorati" class="sociable-hovers" /></a></li>
	<li class="sociablelast"><a rel="nofollow"  href="http://reddit.com/submit?url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2008%2F05%2Fpowerset-launches%2F&amp;title=Powerset%20launches" title="Reddit"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/reddit.png" title="Reddit" alt="Reddit" class="sociable-hovers" /></a></li>
</ul>
</div>
]]></content:encoded>
			<wfw:commentRss>http://yousefourabi.com/blog/2008/05/powerset-launches/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
		</item>
		<item>
		<title>Setting http headers with Apache and mod_headers</title>
		<link>http://yousefourabi.com/blog/2008/04/setting-http-headers-with-apache-and-mod_headers/</link>
		<comments>http://yousefourabi.com/blog/2008/04/setting-http-headers-with-apache-and-mod_headers/#comments</comments>
		<pubDate>Sat, 05 Apr 2008 19:05:45 +0000</pubDate>
		<dc:creator>Yousef Ourabi</dc:creator>
				<category><![CDATA[Semantic Web]]></category>

		<guid isPermaLink="false">http://yousefourabi.com/semantic-web/setting-http-headers-with-apache-and-mod_headers</guid>
		<description><![CDATA[Continuing my HTTP bender, I&#8217;d like to discuss some fun / necessary things you can do to manipulate HTTP headers using the Apache mod_headers module.
If you were to make a request to Slashdot and examine the Http headers using either HttpLiveHeaders or FireBug you would notice one of two unusual headers: X-Bender, and X-Fry with [...]]]></description>
			<content:encoded><![CDATA[<p>Continuing my HTTP bender, I&#8217;d like to discuss some fun / necessary things you can do to manipulate HTTP headers using the Apache mod_headers module.</p>
<p>If you were to make a request to Slashdot and examine the Http headers using either HttpLiveHeaders or FireBug you would notice one of two unusual headers: X-Bender, and X-Fry with quirky messages. These are pop-culture references to the show &#8220;<a href="http://en.wikipedia.org/wiki/Futurama" title="Futurama">Futurama</a>&#8220;.</p>
<p>There are a few ways you could accomplish this &#8212; if you were already working with mod_perl, mod_python, or PHP it would be trivial to manipulate the headers programatically. However, it&#8217;s also possible to do so with a simple one liner in your Apache conf.</p>
<p>First you&#8217;ll need to enable mod_headers &#8212; On my Debian server this was done with the following command: a2enmod headers, after which I had to restart Apache.</p>
<p>Then simply add something like the following line in your conf, of course replacing &#8220;HeaderNameHere&#8221; and &#8220;Header Value&#8221; with whatever you want.</p>
<p><code>           Header add HeaderNameHere "Header Value here"</code></p>
<p>The &#8220;Header&#8221; directive has a number of possible actions including: set, unset, append, add and echo. The documentation is fairly well written and can be found here: <a href="http://httpd.apache.org/docs/2.0/mod/mod_headers.html" title="mod_headers documentation">mod_headers documentation</a></p>
<p>One useful application would be in a load-balanced  apache setup, you could  set the actual hostname as a header for debugging purposes. You can echo environment variables like so: <code>%{FOOBAR}e</code>  which would display the &#8220;FOOBAR&#8221; variable.</p>
<p>Have fun tweaking Apache headers, hope this helps.</p>

<div class="sociable">

<ul>
	<li class="sociablefirst"><a rel="nofollow"  href="http://digg.com/submit?phase=2&amp;url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2008%2F04%2Fsetting-http-headers-with-apache-and-mod_headers%2F&amp;title=Setting%20http%20headers%20with%20Apache%20and%20mod_headers&amp;bodytext=Continuing%20my%20HTTP%20bender%2C%20I%27d%20like%20to%20discuss%20some%20fun%20%2F%20necessary%20things%20you%20can%20do%20to%20manipulate%20HTTP%20headers%20using%20the%20Apache%20mod_headers%20module.%0D%0A%0D%0AIf%20you%20were%20to%20make%20a%20request%20to%20Slashdot%20and%20examine%20the%20Http%20headers%20using%20either%20HttpLiveHeade" title="Digg"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/digg.png" title="Digg" alt="Digg" class="sociable-hovers" /></a></li>
	<li><a rel="nofollow"  href="http://delicious.com/post?url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2008%2F04%2Fsetting-http-headers-with-apache-and-mod_headers%2F&amp;title=Setting%20http%20headers%20with%20Apache%20and%20mod_headers&amp;notes=Continuing%20my%20HTTP%20bender%2C%20I%27d%20like%20to%20discuss%20some%20fun%20%2F%20necessary%20things%20you%20can%20do%20to%20manipulate%20HTTP%20headers%20using%20the%20Apache%20mod_headers%20module.%0D%0A%0D%0AIf%20you%20were%20to%20make%20a%20request%20to%20Slashdot%20and%20examine%20the%20Http%20headers%20using%20either%20HttpLiveHeade" title="del.icio.us"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/delicious.png" title="del.icio.us" alt="del.icio.us" class="sociable-hovers" /></a></li>
	<li><a rel="nofollow"  href="http://www.stumbleupon.com/submit?url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2008%2F04%2Fsetting-http-headers-with-apache-and-mod_headers%2F&amp;title=Setting%20http%20headers%20with%20Apache%20and%20mod_headers" title="StumbleUpon"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/stumbleupon.png" title="StumbleUpon" alt="StumbleUpon" class="sociable-hovers" /></a></li>
	<li><a rel="nofollow"  href="http://technorati.com/faves?add=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2008%2F04%2Fsetting-http-headers-with-apache-and-mod_headers%2F" title="Technorati"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/technorati.png" title="Technorati" alt="Technorati" class="sociable-hovers" /></a></li>
	<li class="sociablelast"><a rel="nofollow"  href="http://reddit.com/submit?url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2008%2F04%2Fsetting-http-headers-with-apache-and-mod_headers%2F&amp;title=Setting%20http%20headers%20with%20Apache%20and%20mod_headers" title="Reddit"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/reddit.png" title="Reddit" alt="Reddit" class="sociable-hovers" /></a></li>
</ul>
</div>
]]></content:encoded>
			<wfw:commentRss>http://yousefourabi.com/blog/2008/04/setting-http-headers-with-apache-and-mod_headers/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
		</item>
		<item>
		<title>Search.com redesign</title>
		<link>http://yousefourabi.com/blog/2008/04/searchcom-redesign/</link>
		<comments>http://yousefourabi.com/blog/2008/04/searchcom-redesign/#comments</comments>
		<pubDate>Fri, 04 Apr 2008 05:22:32 +0000</pubDate>
		<dc:creator>Yousef Ourabi</dc:creator>
				<category><![CDATA[Semantic Web]]></category>

		<guid isPermaLink="false">http://yousefourabi.com/semantic-web/searchcom-redesign</guid>
		<description><![CDATA[Search.com has just launched a redesign &#8212; very cool. Check it out: http://www.search.com




	
	
	
	
	


]]></description>
			<content:encoded><![CDATA[<p>Search.com has just launched a redesign &#8212; very cool. Check it out: http://www.search.com</p>

<div class="sociable">

<ul>
	<li class="sociablefirst"><a rel="nofollow"  href="http://digg.com/submit?phase=2&amp;url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2008%2F04%2Fsearchcom-redesign%2F&amp;title=Search.com%20redesign&amp;bodytext=Search.com%20has%20just%20launched%20a%20redesign%20--%20very%20cool.%20Check%20it%20out%3A%20http%3A%2F%2Fwww.search.com" title="Digg"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/digg.png" title="Digg" alt="Digg" class="sociable-hovers" /></a></li>
	<li><a rel="nofollow"  href="http://delicious.com/post?url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2008%2F04%2Fsearchcom-redesign%2F&amp;title=Search.com%20redesign&amp;notes=Search.com%20has%20just%20launched%20a%20redesign%20--%20very%20cool.%20Check%20it%20out%3A%20http%3A%2F%2Fwww.search.com" title="del.icio.us"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/delicious.png" title="del.icio.us" alt="del.icio.us" class="sociable-hovers" /></a></li>
	<li><a rel="nofollow"  href="http://www.stumbleupon.com/submit?url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2008%2F04%2Fsearchcom-redesign%2F&amp;title=Search.com%20redesign" title="StumbleUpon"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/stumbleupon.png" title="StumbleUpon" alt="StumbleUpon" class="sociable-hovers" /></a></li>
	<li><a rel="nofollow"  href="http://technorati.com/faves?add=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2008%2F04%2Fsearchcom-redesign%2F" title="Technorati"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/technorati.png" title="Technorati" alt="Technorati" class="sociable-hovers" /></a></li>
	<li class="sociablelast"><a rel="nofollow"  href="http://reddit.com/submit?url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2008%2F04%2Fsearchcom-redesign%2F&amp;title=Search.com%20redesign" title="Reddit"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/reddit.png" title="Reddit" alt="Reddit" class="sociable-hovers" /></a></li>
</ul>
</div>
]]></content:encoded>
			<wfw:commentRss>http://yousefourabi.com/blog/2008/04/searchcom-redesign/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
		</item>
		<item>
		<title>Handy wordpress plugins</title>
		<link>http://yousefourabi.com/blog/2008/04/handy-wordpress-plugins/</link>
		<comments>http://yousefourabi.com/blog/2008/04/handy-wordpress-plugins/#comments</comments>
		<pubDate>Fri, 04 Apr 2008 03:52:34 +0000</pubDate>
		<dc:creator>Yousef Ourabi</dc:creator>
				<category><![CDATA[Semantic Web]]></category>

		<guid isPermaLink="false">http://yousefourabi.com/semantic-web/handy-wordpress-plugins</guid>
		<description><![CDATA[I try not to re-hash content from other sites, but here&#8217;s a link that is too tasty to pass up on
15 handy wordpress plugins (for power users) 




	
	
	
	
	


]]></description>
			<content:encoded><![CDATA[<p>I try not to re-hash content from other sites, but here&#8217;s a link that is too tasty to pass up on</p>
<p><a href="http://sixrevisions.com/tools-of-the-trade/helpful_wordpress_plugins_advanced_users/" title="Handy wordpress plugins">15 handy wordpress plugins (for power users) </a></p>

<div class="sociable">

<ul>
	<li class="sociablefirst"><a rel="nofollow"  href="http://digg.com/submit?phase=2&amp;url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2008%2F04%2Fhandy-wordpress-plugins%2F&amp;title=Handy%20wordpress%20plugins&amp;bodytext=I%20try%20not%20to%20re-hash%20content%20from%20other%20sites%2C%20but%20here%27s%20a%20link%20that%20is%20too%20tasty%20to%20pass%20up%20on%0D%0A%0D%0A15%20handy%20wordpress%20plugins%20%28for%20power%20users%29%C2%A0" title="Digg"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/digg.png" title="Digg" alt="Digg" class="sociable-hovers" /></a></li>
	<li><a rel="nofollow"  href="http://delicious.com/post?url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2008%2F04%2Fhandy-wordpress-plugins%2F&amp;title=Handy%20wordpress%20plugins&amp;notes=I%20try%20not%20to%20re-hash%20content%20from%20other%20sites%2C%20but%20here%27s%20a%20link%20that%20is%20too%20tasty%20to%20pass%20up%20on%0D%0A%0D%0A15%20handy%20wordpress%20plugins%20%28for%20power%20users%29%C2%A0" title="del.icio.us"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/delicious.png" title="del.icio.us" alt="del.icio.us" class="sociable-hovers" /></a></li>
	<li><a rel="nofollow"  href="http://www.stumbleupon.com/submit?url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2008%2F04%2Fhandy-wordpress-plugins%2F&amp;title=Handy%20wordpress%20plugins" title="StumbleUpon"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/stumbleupon.png" title="StumbleUpon" alt="StumbleUpon" class="sociable-hovers" /></a></li>
	<li><a rel="nofollow"  href="http://technorati.com/faves?add=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2008%2F04%2Fhandy-wordpress-plugins%2F" title="Technorati"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/technorati.png" title="Technorati" alt="Technorati" class="sociable-hovers" /></a></li>
	<li class="sociablelast"><a rel="nofollow"  href="http://reddit.com/submit?url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2008%2F04%2Fhandy-wordpress-plugins%2F&amp;title=Handy%20wordpress%20plugins" title="Reddit"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/reddit.png" title="Reddit" alt="Reddit" class="sociable-hovers" /></a></li>
</ul>
</div>
]]></content:encoded>
			<wfw:commentRss>http://yousefourabi.com/blog/2008/04/handy-wordpress-plugins/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
		</item>
		<item>
		<title>Digg please return semantically accurate http status code</title>
		<link>http://yousefourabi.com/blog/2008/04/digg-please-return-semantically-accurate-http-status-code/</link>
		<comments>http://yousefourabi.com/blog/2008/04/digg-please-return-semantically-accurate-http-status-code/#comments</comments>
		<pubDate>Thu, 03 Apr 2008 05:38:25 +0000</pubDate>
		<dc:creator>Yousef Ourabi</dc:creator>
				<category><![CDATA[Semantic Web]]></category>

		<guid isPermaLink="false">http://yousefourabi.com/semantic-web/digg-please-return-semantically-accurate-http-status-code</guid>
		<description><![CDATA[Dear Digg,
I&#8217;m a huge fan.  I check our site a few times a day. However, there is one thing that has really been bugging me. The maintenance page. Whenever you throw up that oh-so-useful page full of links to the favorite sites of your employees&#8230; you return an 200 status code.
Think of me not [...]]]></description>
			<content:encoded><![CDATA[<p>Dear Digg,</p>
<p>I&#8217;m a huge fan.  I check our site a few times a day. However, there is one thing that has really been bugging me. The maintenance page. Whenever you throw up that oh-so-useful page full of links to the favorite sites of your employees&#8230; you return an 200 status code.</p>
<p>Think of me not as a big fan, but rather an eager web-crawler from Google or some new startup. When I find my way to your page, and your error page is return a 200 Status (OK) &#8212; I take it to mean that everything is hunky dory, and this content is really what you are all about.</p>
<p>Let me school you on this radical bit of information: You can return other status codes &#8212; such as 500 &#8212; and return a pretty html document containing all the links to your favorite sites.</p>
<p>Speaking of documents&#8230;</p>
<p>There is one document in particular I would like to refer(get it&#8230;) you to: rfc 2616. Section 10 is particularly relevant titled &#8220;Status Code definitions.&#8221;  The most relevant among this list are the following status code: 200, 304, and 503. I mention these because there seems to be a certain level of confusion among your engineers.</p>
<p>200: OK &#8212; This is what we meant to serve out, and feel free to associate this context with our URI</p>
<p>304: Not Modified: A cache setting, meaning &#8220;No Update&#8221; use cached version</p>
<p>503: Service Unavailable: Bingo! Doesn&#8217;t that sound like something we would want to return as our status when we have our maintenance page up? For extra browny points, consider sending Retry-After header with some amount of time&#8230;</p>
<p>Here is the output from Live HTTP headers on one of these requests.</p>
<p>Thanks again!</p>
<p>Yousef</p>
<p><font size="2"> GET / HTTP/1.1<br />
Host: digg.com<br />
User-Agent: Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.13)<br />
Gecko/20080311 Firefox/2.0.0.13<br />
Accept:<br />
text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plai<br />
n;q=0.8,image/png,*/*;q=0.5<br />
Accept-Language: en<br />
Accept-Encoding: gzip,deflate<br />
Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7<br />
Keep-Alive: 300<br />
Connection: keep-alive</font></p>
<p><font size="2">HTTP/1.x 200 OK<br />
Date: Mon, 31 Mar 2008 23:38:53 GMT<br />
Server: Apache<br />
Last-Modified: Mon, 31 Mar 2008 20:52:50 GMT<br />
Etag: &#8220;714c87-15e9-449c1d5b88c80&#8243;<br />
Accept-Ranges: bytes<br />
Keep-Alive: timeout=5, max=10000<br />
Connection: Keep-Alive<br />
Content-Type: text/html; charset=UTF-8<br />
Cache-Control: private<br />
Content-Encoding: gzip<br />
Content-Length: 2069<br />
&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;-<br />
<a href="https://owa.cnet.com/exchweb/bin/redir.asp?URL=http://digg.com/img/down.png" target="_blank">http://digg.com/img/down.png</a></font></p>
<p><font size="2">GET /img/down.png HTTP/1.1<br />
Host: digg.com<br />
User-Agent: Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.13)<br />
Gecko/20080311 Firefox/2.0.0.13<br />
Accept: image/png,*/*;q=0.5<br />
Accept-Language: en<br />
Accept-Encoding: gzip,deflate<br />
Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7<br />
Keep-Alive: 300<br />
Connection: keep-alive<br />
Referer: <a href="https://owa.cnet.com/exchweb/bin/redir.asp?URL=http://digg.com/" target="_blank">http://digg.com/</a></font></p>
<p><font size="2">HTTP/1.x 200 OK<br />
Age: 2027<br />
Date: Mon, 31 Mar 2008 23:04:19 GMT<br />
Connection: Keep-Alive<br />
Via: NS-CACHE: 100<br />
Etag: &#8220;720139-89b-41414d62cbc00&#8243;<br />
Server: Apache<br />
Last-Modified: Thu, 18 May 2006 19:13:52 GMT<br />
Accept-Ranges: bytes<br />
Content-Length: 2203<br />
Keep-Alive: timeout=5, max=9998<br />
Content-Type: image/png<br />
&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;-<br />
<a href="https://owa.cnet.com/exchweb/bin/redir.asp?URL=http://digg.com/favicon.ico" target="_blank">http://digg.com/favicon.ico</a></font></p>
<p><font size="2">GET /favicon.ico HTTP/1.1<br />
Host: digg.com<br />
User-Agent: Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.13)<br />
Gecko/20080311 Firefox/2.0.0.13<br />
Accept: image/png,*/*;q=0.5<br />
Accept-Language: en<br />
Accept-Encoding: gzip,deflate<br />
Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7<br />
Keep-Alive: 300<br />
Connection: keep-alive</font></p>
<p><font size="2">HTTP/1.x 200 OK<br />
Age: 2932<br />
Date: Mon, 31 Mar 2008 22:49:13 GMT<br />
Connection: Keep-Alive<br />
Via: NS-CACHE: 100<br />
Etag: &#8220;2884541-47e-418dfb6884480&#8243;<br />
Server: Apache<br />
Last-Modified: Tue, 18 Jul 2006 18:57:06 GMT<br />
Accept-Ranges: bytes<br />
Content-Length: 1150<br />
Keep-Alive: timeout=5, max=9997<br />
Content-Type: image/x-ico</font></p>

<div class="sociable">

<ul>
	<li class="sociablefirst"><a rel="nofollow"  href="http://digg.com/submit?phase=2&amp;url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2008%2F04%2Fdigg-please-return-semantically-accurate-http-status-code%2F&amp;title=Digg%20please%20return%20semantically%20accurate%20http%20status%20code&amp;bodytext=Dear%20Digg%2C%0D%0A%0D%0AI%27m%20a%20huge%20fan.%20%20I%20check%20our%20site%20a%20few%20times%20a%20day.%20However%2C%20there%20is%20one%20thing%20that%20has%20really%20been%20bugging%20me.%20The%20maintenance%20page.%20Whenever%20you%20throw%20up%20that%20oh-so-useful%20page%20full%20of%20links%20to%20the%20favorite%20sites%20of%20your%20employees.." title="Digg"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/digg.png" title="Digg" alt="Digg" class="sociable-hovers" /></a></li>
	<li><a rel="nofollow"  href="http://delicious.com/post?url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2008%2F04%2Fdigg-please-return-semantically-accurate-http-status-code%2F&amp;title=Digg%20please%20return%20semantically%20accurate%20http%20status%20code&amp;notes=Dear%20Digg%2C%0D%0A%0D%0AI%27m%20a%20huge%20fan.%20%20I%20check%20our%20site%20a%20few%20times%20a%20day.%20However%2C%20there%20is%20one%20thing%20that%20has%20really%20been%20bugging%20me.%20The%20maintenance%20page.%20Whenever%20you%20throw%20up%20that%20oh-so-useful%20page%20full%20of%20links%20to%20the%20favorite%20sites%20of%20your%20employees.." title="del.icio.us"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/delicious.png" title="del.icio.us" alt="del.icio.us" class="sociable-hovers" /></a></li>
	<li><a rel="nofollow"  href="http://www.stumbleupon.com/submit?url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2008%2F04%2Fdigg-please-return-semantically-accurate-http-status-code%2F&amp;title=Digg%20please%20return%20semantically%20accurate%20http%20status%20code" title="StumbleUpon"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/stumbleupon.png" title="StumbleUpon" alt="StumbleUpon" class="sociable-hovers" /></a></li>
	<li><a rel="nofollow"  href="http://technorati.com/faves?add=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2008%2F04%2Fdigg-please-return-semantically-accurate-http-status-code%2F" title="Technorati"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/technorati.png" title="Technorati" alt="Technorati" class="sociable-hovers" /></a></li>
	<li class="sociablelast"><a rel="nofollow"  href="http://reddit.com/submit?url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2008%2F04%2Fdigg-please-return-semantically-accurate-http-status-code%2F&amp;title=Digg%20please%20return%20semantically%20accurate%20http%20status%20code" title="Reddit"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/reddit.png" title="Reddit" alt="Reddit" class="sociable-hovers" /></a></li>
</ul>
</div>
]]></content:encoded>
			<wfw:commentRss>http://yousefourabi.com/blog/2008/04/digg-please-return-semantically-accurate-http-status-code/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
		</item>
		<item>
		<title>evernote has me excited</title>
		<link>http://yousefourabi.com/blog/2008/02/evernote-has-me-excited/</link>
		<comments>http://yousefourabi.com/blog/2008/02/evernote-has-me-excited/#comments</comments>
		<pubDate>Fri, 22 Feb 2008 06:03:34 +0000</pubDate>
		<dc:creator>Yousef Ourabi</dc:creator>
				<category><![CDATA[Semantic Web]]></category>

		<guid isPermaLink="false">http://yousefourabi.com/semantic-web/evernote-has-me-excited</guid>
		<description><![CDATA[I rarely get pumped out about tech startups &#8212; they are a dime a dozen &#8212; and the ideas behind them are usually bad.
Evernote is an exciting startup &#8212; when I day-dream about the future of the web &#8212; this is the sort of thing I imagine.
It&#8217;s a service that lets you take pictures of [...]]]></description>
			<content:encoded><![CDATA[<p>I rarely get pumped out about tech startups &#8212; they are a dime a dozen &#8212; and the ideas behind them are usually bad.</p>
<p>Evernote is an exciting startup &#8212; when I day-dream about the future of the web &#8212; this is the sort of thing I imagine.</p>
<p>It&#8217;s a service that lets you take pictures of anything, and upload them. Simple enough. The twist is that it has out-of-this-world optical character recognition and will find snippets of text wherever they appear on the image &#8212; and then evernote creates a full-text search engine (think google) of that text &#8211;<br />
This is a totally amazing startup, and I won&#8217;t be surprised if they get snapped up by one of the big boys.</p>
<p>Keep your eye on their site:  http://www.evernote.com/</p>

<div class="sociable">

<ul>
	<li class="sociablefirst"><a rel="nofollow"  href="http://digg.com/submit?phase=2&amp;url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2008%2F02%2Fevernote-has-me-excited%2F&amp;title=evernote%20has%20me%20excited&amp;bodytext=I%20rarely%20get%20pumped%20out%20about%20tech%20startups%20--%20they%20are%20a%20dime%20a%20dozen%20--%20and%20the%20ideas%20behind%20them%20are%20usually%20bad.%0D%0A%0D%0AEvernote%20is%20an%20exciting%20startup%20--%20when%20I%20day-dream%20about%20the%20future%20of%20the%20web%20--%20this%20is%20the%20sort%20of%20thing%20I%20imagine.%0D%0A%0D%0AIt%27s%20a%20" title="Digg"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/digg.png" title="Digg" alt="Digg" class="sociable-hovers" /></a></li>
	<li><a rel="nofollow"  href="http://delicious.com/post?url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2008%2F02%2Fevernote-has-me-excited%2F&amp;title=evernote%20has%20me%20excited&amp;notes=I%20rarely%20get%20pumped%20out%20about%20tech%20startups%20--%20they%20are%20a%20dime%20a%20dozen%20--%20and%20the%20ideas%20behind%20them%20are%20usually%20bad.%0D%0A%0D%0AEvernote%20is%20an%20exciting%20startup%20--%20when%20I%20day-dream%20about%20the%20future%20of%20the%20web%20--%20this%20is%20the%20sort%20of%20thing%20I%20imagine.%0D%0A%0D%0AIt%27s%20a%20" title="del.icio.us"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/delicious.png" title="del.icio.us" alt="del.icio.us" class="sociable-hovers" /></a></li>
	<li><a rel="nofollow"  href="http://www.stumbleupon.com/submit?url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2008%2F02%2Fevernote-has-me-excited%2F&amp;title=evernote%20has%20me%20excited" title="StumbleUpon"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/stumbleupon.png" title="StumbleUpon" alt="StumbleUpon" class="sociable-hovers" /></a></li>
	<li><a rel="nofollow"  href="http://technorati.com/faves?add=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2008%2F02%2Fevernote-has-me-excited%2F" title="Technorati"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/technorati.png" title="Technorati" alt="Technorati" class="sociable-hovers" /></a></li>
	<li class="sociablelast"><a rel="nofollow"  href="http://reddit.com/submit?url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2008%2F02%2Fevernote-has-me-excited%2F&amp;title=evernote%20has%20me%20excited" title="Reddit"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/reddit.png" title="Reddit" alt="Reddit" class="sociable-hovers" /></a></li>
</ul>
</div>
]]></content:encoded>
			<wfw:commentRss>http://yousefourabi.com/blog/2008/02/evernote-has-me-excited/feed/</wfw:commentRss>
		<slash:comments>2</slash:comments>
		</item>
		<item>
		<title>my favorite firefox extensions</title>
		<link>http://yousefourabi.com/blog/2008/02/my-favorite-firefox-extensions/</link>
		<comments>http://yousefourabi.com/blog/2008/02/my-favorite-firefox-extensions/#comments</comments>
		<pubDate>Tue, 19 Feb 2008 04:36:21 +0000</pubDate>
		<dc:creator>Yousef Ourabi</dc:creator>
				<category><![CDATA[Semantic Web]]></category>

		<guid isPermaLink="false">http://yousefourabi.com/semantic-web/my-favorite-firefox-extensions</guid>
		<description><![CDATA[ChatZilla
Download Statusbar
Firebug
Forcastfox
Google Browser Sync
Live HTTP Headers
Web Developer
YSlow




	
	
	
	
	


]]></description>
			<content:encoded><![CDATA[<p>ChatZilla</p>
<p>Download Statusbar</p>
<p>Firebug</p>
<p>Forcastfox</p>
<p>Google Browser Sync</p>
<p>Live HTTP Headers</p>
<p>Web Developer</p>
<p>YSlow</p>

<div class="sociable">

<ul>
	<li class="sociablefirst"><a rel="nofollow"  href="http://digg.com/submit?phase=2&amp;url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2008%2F02%2Fmy-favorite-firefox-extensions%2F&amp;title=my%20favorite%20firefox%20extensions&amp;bodytext=ChatZilla%0D%0A%0D%0ADownload%20Statusbar%0D%0A%0D%0AFirebug%0D%0A%0D%0AForcastfox%0D%0A%0D%0AGoogle%20Browser%20Sync%0D%0A%0D%0ALive%20HTTP%20Headers%0D%0A%0D%0AWeb%20Developer%0D%0A%0D%0AYSlow" title="Digg"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/digg.png" title="Digg" alt="Digg" class="sociable-hovers" /></a></li>
	<li><a rel="nofollow"  href="http://delicious.com/post?url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2008%2F02%2Fmy-favorite-firefox-extensions%2F&amp;title=my%20favorite%20firefox%20extensions&amp;notes=ChatZilla%0D%0A%0D%0ADownload%20Statusbar%0D%0A%0D%0AFirebug%0D%0A%0D%0AForcastfox%0D%0A%0D%0AGoogle%20Browser%20Sync%0D%0A%0D%0ALive%20HTTP%20Headers%0D%0A%0D%0AWeb%20Developer%0D%0A%0D%0AYSlow" title="del.icio.us"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/delicious.png" title="del.icio.us" alt="del.icio.us" class="sociable-hovers" /></a></li>
	<li><a rel="nofollow"  href="http://www.stumbleupon.com/submit?url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2008%2F02%2Fmy-favorite-firefox-extensions%2F&amp;title=my%20favorite%20firefox%20extensions" title="StumbleUpon"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/stumbleupon.png" title="StumbleUpon" alt="StumbleUpon" class="sociable-hovers" /></a></li>
	<li><a rel="nofollow"  href="http://technorati.com/faves?add=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2008%2F02%2Fmy-favorite-firefox-extensions%2F" title="Technorati"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/technorati.png" title="Technorati" alt="Technorati" class="sociable-hovers" /></a></li>
	<li class="sociablelast"><a rel="nofollow"  href="http://reddit.com/submit?url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2008%2F02%2Fmy-favorite-firefox-extensions%2F&amp;title=my%20favorite%20firefox%20extensions" title="Reddit"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/reddit.png" title="Reddit" alt="Reddit" class="sociable-hovers" /></a></li>
</ul>
</div>
]]></content:encoded>
			<wfw:commentRss>http://yousefourabi.com/blog/2008/02/my-favorite-firefox-extensions/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
		</item>
		<item>
		<title>Mahout Machine Learning Lucene Subproject</title>
		<link>http://yousefourabi.com/blog/2008/01/mahout-machine-learning-lucene-subproject/</link>
		<comments>http://yousefourabi.com/blog/2008/01/mahout-machine-learning-lucene-subproject/#comments</comments>
		<pubDate>Wed, 30 Jan 2008 05:31:56 +0000</pubDate>
		<dc:creator>Yousef Ourabi</dc:creator>
				<category><![CDATA[Semantic Web]]></category>

		<guid isPermaLink="false">http://yousefourabi.com/semantic-web/mahout-machine-learning-lucene-subproject</guid>
		<description><![CDATA[Grant Ingersoll of the Apache Lucene project. Lucene developed open source search libraries, and Mahuts goals are to develop Machine Learning libraries around hadoop, and hbase.
This is definitely a project worth watching.  It&#8217;s just starting (was accepted to apache on the 28th) so if you are looking to jump in, now is the time.




	
	
	
	
	


]]></description>
			<content:encoded><![CDATA[<p>Grant Ingersoll of the Apache Lucene project. Lucene developed open source search libraries, and Mahuts goals are to develop Machine Learning libraries around hadoop, and hbase.</p>
<p>This is definitely a project worth watching.  It&#8217;s just starting (was accepted to apache on the 28th) so if you are looking to jump in, now is the time.</p>

<div class="sociable">

<ul>
	<li class="sociablefirst"><a rel="nofollow"  href="http://digg.com/submit?phase=2&amp;url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2008%2F01%2Fmahout-machine-learning-lucene-subproject%2F&amp;title=Mahout%20Machine%20Learning%20Lucene%20Subproject&amp;bodytext=Grant%20Ingersoll%20of%20the%20Apache%20Lucene%20project.%20Lucene%20developed%20open%20source%20search%20libraries%2C%20and%20Mahuts%20goals%20are%20to%20develop%20Machine%20Learning%20libraries%20around%20hadoop%2C%20and%20hbase.%0D%0A%0D%0AThis%20is%20definitely%20a%20project%20worth%20watching.%C2%A0%20It%27s%20just%20starting%20%28wa" title="Digg"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/digg.png" title="Digg" alt="Digg" class="sociable-hovers" /></a></li>
	<li><a rel="nofollow"  href="http://delicious.com/post?url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2008%2F01%2Fmahout-machine-learning-lucene-subproject%2F&amp;title=Mahout%20Machine%20Learning%20Lucene%20Subproject&amp;notes=Grant%20Ingersoll%20of%20the%20Apache%20Lucene%20project.%20Lucene%20developed%20open%20source%20search%20libraries%2C%20and%20Mahuts%20goals%20are%20to%20develop%20Machine%20Learning%20libraries%20around%20hadoop%2C%20and%20hbase.%0D%0A%0D%0AThis%20is%20definitely%20a%20project%20worth%20watching.%C2%A0%20It%27s%20just%20starting%20%28wa" title="del.icio.us"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/delicious.png" title="del.icio.us" alt="del.icio.us" class="sociable-hovers" /></a></li>
	<li><a rel="nofollow"  href="http://www.stumbleupon.com/submit?url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2008%2F01%2Fmahout-machine-learning-lucene-subproject%2F&amp;title=Mahout%20Machine%20Learning%20Lucene%20Subproject" title="StumbleUpon"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/stumbleupon.png" title="StumbleUpon" alt="StumbleUpon" class="sociable-hovers" /></a></li>
	<li><a rel="nofollow"  href="http://technorati.com/faves?add=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2008%2F01%2Fmahout-machine-learning-lucene-subproject%2F" title="Technorati"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/technorati.png" title="Technorati" alt="Technorati" class="sociable-hovers" /></a></li>
	<li class="sociablelast"><a rel="nofollow"  href="http://reddit.com/submit?url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2008%2F01%2Fmahout-machine-learning-lucene-subproject%2F&amp;title=Mahout%20Machine%20Learning%20Lucene%20Subproject" title="Reddit"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/reddit.png" title="Reddit" alt="Reddit" class="sociable-hovers" /></a></li>
</ul>
</div>
]]></content:encoded>
			<wfw:commentRss>http://yousefourabi.com/blog/2008/01/mahout-machine-learning-lucene-subproject/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
		</item>
		<item>
		<title>Wikia Wikia Wikia</title>
		<link>http://yousefourabi.com/blog/2008/01/wikia-wikia-wikia/</link>
		<comments>http://yousefourabi.com/blog/2008/01/wikia-wikia-wikia/#comments</comments>
		<pubDate>Fri, 11 Jan 2008 08:05:09 +0000</pubDate>
		<dc:creator>Yousef Ourabi</dc:creator>
				<category><![CDATA[Semantic Web]]></category>

		<guid isPermaLink="false">http://yousefourabi.com/semantic-web/wikia-wikia-wikia</guid>
		<description><![CDATA[Lot&#8217;s of movement at Wikia today.
1) Foowi, the &#8220;Social&#8221; aspects have gone open source. See: http://svn.swlabs.org/foowi
2) I&#8217;ve released a few patches for the re-designed grub crawler, that aim to make things easier to use. The patch only slightly breaks things. See here: http://lists.wikia.com/pipermail/grub-dev/2008-January/thread.html
3) The Nutch stuff is supposed to be open sourced in the coming [...]]]></description>
			<content:encoded><![CDATA[<p>Lot&#8217;s of movement at Wikia today.</p>
<p>1) Foowi, the &#8220;Social&#8221; aspects have gone open source. See: <a href="https://owa.cnet.com/exchweb/bin/redir.asp?URL=http://svn.swlabs.org/foowi" target="_blank">http://svn.swlabs.org/foowi</a></p>
<p>2) I&#8217;ve released a few patches for the re-designed grub crawler, that aim to make things easier to use. The patch only slightly breaks things. See here: http://lists.wikia.com/pipermail/grub-dev/2008-January/thread.html</p>
<p>3) The Nutch stuff is supposed to be open sourced in the coming days.</p>
<p>4) I really want someone to explain the &#8220;interaction&#8221; between Visvo, whom I&#8217;ve previously blogged about here:  http://lists.wikia.com/pipermail/grub-dev/2008-January/thread.html and Wikia. Denis Kubes, the CTO of Visvo is doing a lot of Wikia work. Hats off. But still, are they two and the same? What is the deal here.</p>
<p>I&#8217;m late on my next FSM article, which is going to be the coolest yet I believe.</p>
<p>I&#8217;m sleepy. More tomorrow.</p>
<p>-Yousef</p>

<div class="sociable">

<ul>
	<li class="sociablefirst"><a rel="nofollow"  href="http://digg.com/submit?phase=2&amp;url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2008%2F01%2Fwikia-wikia-wikia%2F&amp;title=Wikia%20Wikia%20Wikia&amp;bodytext=Lot%27s%20of%20movement%20at%20Wikia%20today.%0D%0A%0D%0A1%29%20Foowi%2C%20the%20%22Social%22%20aspects%20have%20gone%20open%20source.%20See%3A%20http%3A%2F%2Fsvn.swlabs.org%2Ffoowi%0D%0A%0D%0A2%29%20I%27ve%20released%20a%20few%20patches%20for%20the%20re-designed%20grub%20crawler%2C%20that%20aim%20to%20make%20things%20easier%20to%20use.%20The%20patch%20only%20slig" title="Digg"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/digg.png" title="Digg" alt="Digg" class="sociable-hovers" /></a></li>
	<li><a rel="nofollow"  href="http://delicious.com/post?url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2008%2F01%2Fwikia-wikia-wikia%2F&amp;title=Wikia%20Wikia%20Wikia&amp;notes=Lot%27s%20of%20movement%20at%20Wikia%20today.%0D%0A%0D%0A1%29%20Foowi%2C%20the%20%22Social%22%20aspects%20have%20gone%20open%20source.%20See%3A%20http%3A%2F%2Fsvn.swlabs.org%2Ffoowi%0D%0A%0D%0A2%29%20I%27ve%20released%20a%20few%20patches%20for%20the%20re-designed%20grub%20crawler%2C%20that%20aim%20to%20make%20things%20easier%20to%20use.%20The%20patch%20only%20slig" title="del.icio.us"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/delicious.png" title="del.icio.us" alt="del.icio.us" class="sociable-hovers" /></a></li>
	<li><a rel="nofollow"  href="http://www.stumbleupon.com/submit?url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2008%2F01%2Fwikia-wikia-wikia%2F&amp;title=Wikia%20Wikia%20Wikia" title="StumbleUpon"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/stumbleupon.png" title="StumbleUpon" alt="StumbleUpon" class="sociable-hovers" /></a></li>
	<li><a rel="nofollow"  href="http://technorati.com/faves?add=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2008%2F01%2Fwikia-wikia-wikia%2F" title="Technorati"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/technorati.png" title="Technorati" alt="Technorati" class="sociable-hovers" /></a></li>
	<li class="sociablelast"><a rel="nofollow"  href="http://reddit.com/submit?url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2008%2F01%2Fwikia-wikia-wikia%2F&amp;title=Wikia%20Wikia%20Wikia" title="Reddit"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/reddit.png" title="Reddit" alt="Reddit" class="sociable-hovers" /></a></li>
</ul>
</div>
]]></content:encoded>
			<wfw:commentRss>http://yousefourabi.com/blog/2008/01/wikia-wikia-wikia/feed/</wfw:commentRss>
		<slash:comments>2</slash:comments>
		</item>
		<item>
		<title>Grub Crawler Goes RESTful</title>
		<link>http://yousefourabi.com/blog/2008/01/grub-crawler-goes-restful/</link>
		<comments>http://yousefourabi.com/blog/2008/01/grub-crawler-goes-restful/#comments</comments>
		<pubDate>Mon, 07 Jan 2008 07:09:46 +0000</pubDate>
		<dc:creator>Yousef Ourabi</dc:creator>
				<category><![CDATA[Semantic Web]]></category>

		<guid isPermaLink="false">http://yousefourabi.com/semantic-web/grub-crawler-goes-restful</guid>
		<description><![CDATA[This is a short blerb, but Jeremie aka Jer outlines his plan on taking the recently aquired from looksmart crawler &#8220;grub&#8221; and replacing its heavy SOAP communication protocol with essentially RESTful interface.
http://lists.wikia.com/pipermail/grub-dev/2007-November/000010.html




	
	
	
	
	


]]></description>
			<content:encoded><![CDATA[<p>This is a short blerb, but Jeremie aka Jer outlines his plan on taking the recently aquired from looksmart crawler &#8220;grub&#8221; and replacing its heavy SOAP communication protocol with essentially RESTful interface.</p>
<p>http://lists.wikia.com/pipermail/grub-dev/2007-November/000010.html</p>

<div class="sociable">

<ul>
	<li class="sociablefirst"><a rel="nofollow"  href="http://digg.com/submit?phase=2&amp;url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2008%2F01%2Fgrub-crawler-goes-restful%2F&amp;title=Grub%20Crawler%20Goes%20RESTful&amp;bodytext=This%20is%20a%20short%20blerb%2C%20but%20Jeremie%20aka%20Jer%20outlines%20his%20plan%20on%20taking%20the%20recently%20aquired%20from%20looksmart%20crawler%20%22grub%22%20and%20replacing%20its%20heavy%20SOAP%20communication%20protocol%20with%20essentially%20RESTful%20interface.%0D%0A%0D%0Ahttp%3A%2F%2Flists.wikia.com%2Fpipermail%2Fgrub" title="Digg"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/digg.png" title="Digg" alt="Digg" class="sociable-hovers" /></a></li>
	<li><a rel="nofollow"  href="http://delicious.com/post?url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2008%2F01%2Fgrub-crawler-goes-restful%2F&amp;title=Grub%20Crawler%20Goes%20RESTful&amp;notes=This%20is%20a%20short%20blerb%2C%20but%20Jeremie%20aka%20Jer%20outlines%20his%20plan%20on%20taking%20the%20recently%20aquired%20from%20looksmart%20crawler%20%22grub%22%20and%20replacing%20its%20heavy%20SOAP%20communication%20protocol%20with%20essentially%20RESTful%20interface.%0D%0A%0D%0Ahttp%3A%2F%2Flists.wikia.com%2Fpipermail%2Fgrub" title="del.icio.us"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/delicious.png" title="del.icio.us" alt="del.icio.us" class="sociable-hovers" /></a></li>
	<li><a rel="nofollow"  href="http://www.stumbleupon.com/submit?url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2008%2F01%2Fgrub-crawler-goes-restful%2F&amp;title=Grub%20Crawler%20Goes%20RESTful" title="StumbleUpon"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/stumbleupon.png" title="StumbleUpon" alt="StumbleUpon" class="sociable-hovers" /></a></li>
	<li><a rel="nofollow"  href="http://technorati.com/faves?add=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2008%2F01%2Fgrub-crawler-goes-restful%2F" title="Technorati"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/technorati.png" title="Technorati" alt="Technorati" class="sociable-hovers" /></a></li>
	<li class="sociablelast"><a rel="nofollow"  href="http://reddit.com/submit?url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2008%2F01%2Fgrub-crawler-goes-restful%2F&amp;title=Grub%20Crawler%20Goes%20RESTful" title="Reddit"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/reddit.png" title="Reddit" alt="Reddit" class="sociable-hovers" /></a></li>
</ul>
</div>
]]></content:encoded>
			<wfw:commentRss>http://yousefourabi.com/blog/2008/01/grub-crawler-goes-restful/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
		</item>
		<item>
		<title>The problem with Zimbra</title>
		<link>http://yousefourabi.com/blog/2007/12/the-problem-with-zimbra/</link>
		<comments>http://yousefourabi.com/blog/2007/12/the-problem-with-zimbra/#comments</comments>
		<pubDate>Sat, 01 Dec 2007 07:39:52 +0000</pubDate>
		<dc:creator>Yousef Ourabi</dc:creator>
				<category><![CDATA[Semantic Web]]></category>

		<guid isPermaLink="false">http://yousefourabi.com/semantic-web/the-problem-with-zimbra</guid>
		<description><![CDATA[Zimba is the full open source &#8220;collaboration&#8221; suite (email&#8230;calendar&#8230;) though it really should be classified as a semantic application due to its &#8220;Documents&#8221; feature which allows you to create notebooks with &#8220;pages&#8221; (aka light weight web-pages) that you can easily drag and drop snippets of text into.
It is relatively easy to setup and run (I [...]]]></description>
			<content:encoded><![CDATA[<p>Zimba is the full open source &#8220;collaboration&#8221; suite (email&#8230;calendar&#8230;) though it really should be classified as a semantic application due to its &#8220;Documents&#8221; feature which allows you to create notebooks with &#8220;pages&#8221; (aka light weight web-pages) that you can easily drag and drop snippets of text into.</p>
<p>It is relatively easy to setup and run (I manage my own Zimbra server), and it is by far the best cross-browser web-mail experience &#8212; it is fast, and works for me not against me (I&#8217;m looking at you Yahoo Mail 2.0)</p>
<p>This all sounds well and good, what could be the problem you ask? Two things: 1) Java, and 2) It&#8217;s standalone nature.</p>
<p>The two together make it a real pain in general.  Java imposes higher memory requirement (don&#8217;t even think about running it on a machine with 512M ram) and the fact that is is pre-integrated with postfix/spamassasin&#8230;and meant to be run as a standalone solution makes it very hard to run with other software. It assumes it is the only thing running and binds by default to port 80, and 443&#8230;etc.</p>
<p>What would be better would  be a &#8220;componentized&#8221; version, meaning I already have postfix/mysql/spamassasin setup, and I want Zimbra for web-mail and shared calendering &#8212; drop a war into a tomcat webapps directory and presto, you have a great web-mail package.</p>
<p>My Current solution is just to run Xen and give it its own instance&#8230;but when thinking about moving to shared hosting or a dedicated server it is&#8230;restricting to say the least.</p>
<p>I love Zimbra, I still use it&#8230; but keep this in mind if you are considering it.</p>

<div class="sociable">

<ul>
	<li class="sociablefirst"><a rel="nofollow"  href="http://digg.com/submit?phase=2&amp;url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2007%2F12%2Fthe-problem-with-zimbra%2F&amp;title=The%20problem%20with%20Zimbra&amp;bodytext=Zimba%20is%20the%20full%20open%20source%20%22collaboration%22%20suite%20%28email...calendar...%29%20though%20it%20really%20should%20be%20classified%20as%20a%20semantic%20application%20due%20to%20its%20%22Documents%22%20feature%20which%20allows%20you%20to%20create%20notebooks%20with%20%22pages%22%20%28aka%20light%20weight%20web-pages%29%20th" title="Digg"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/digg.png" title="Digg" alt="Digg" class="sociable-hovers" /></a></li>
	<li><a rel="nofollow"  href="http://delicious.com/post?url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2007%2F12%2Fthe-problem-with-zimbra%2F&amp;title=The%20problem%20with%20Zimbra&amp;notes=Zimba%20is%20the%20full%20open%20source%20%22collaboration%22%20suite%20%28email...calendar...%29%20though%20it%20really%20should%20be%20classified%20as%20a%20semantic%20application%20due%20to%20its%20%22Documents%22%20feature%20which%20allows%20you%20to%20create%20notebooks%20with%20%22pages%22%20%28aka%20light%20weight%20web-pages%29%20th" title="del.icio.us"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/delicious.png" title="del.icio.us" alt="del.icio.us" class="sociable-hovers" /></a></li>
	<li><a rel="nofollow"  href="http://www.stumbleupon.com/submit?url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2007%2F12%2Fthe-problem-with-zimbra%2F&amp;title=The%20problem%20with%20Zimbra" title="StumbleUpon"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/stumbleupon.png" title="StumbleUpon" alt="StumbleUpon" class="sociable-hovers" /></a></li>
	<li><a rel="nofollow"  href="http://technorati.com/faves?add=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2007%2F12%2Fthe-problem-with-zimbra%2F" title="Technorati"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/technorati.png" title="Technorati" alt="Technorati" class="sociable-hovers" /></a></li>
	<li class="sociablelast"><a rel="nofollow"  href="http://reddit.com/submit?url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2007%2F12%2Fthe-problem-with-zimbra%2F&amp;title=The%20problem%20with%20Zimbra" title="Reddit"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/reddit.png" title="Reddit" alt="Reddit" class="sociable-hovers" /></a></li>
</ul>
</div>
]]></content:encoded>
			<wfw:commentRss>http://yousefourabi.com/blog/2007/12/the-problem-with-zimbra/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
		</item>
		<item>
		<title>Will Microsoft buy Ask?</title>
		<link>http://yousefourabi.com/blog/2007/11/will-microsoft-buy-ask/</link>
		<comments>http://yousefourabi.com/blog/2007/11/will-microsoft-buy-ask/#comments</comments>
		<pubDate>Sun, 18 Nov 2007 17:44:35 +0000</pubDate>
		<dc:creator>Yousef Ourabi</dc:creator>
				<category><![CDATA[Business]]></category>
		<category><![CDATA[Semantic Web]]></category>

		<guid isPermaLink="false">http://yousefourabi.com/semantic-web/will-microsoft-buy-ask</guid>
		<description><![CDATA[This month Barry Diller, CEO of IAC split up his media empire into five distinct companies, aligned more or less with their respective sectors. The Home shopping network &#8220;HSN&#8221;, Lending tree, Ticketmaster, Interval International, and IAC. IAC contains all the remaining Internet properties including Ask.com, Evite, Match.com and CollegeHumor.com
The timing of this is a little [...]]]></description>
			<content:encoded><![CDATA[<p>This month Barry Diller, CEO of IAC split up his media empire into five distinct companies, aligned more or less with their respective sectors. The Home shopping network &#8220;HSN&#8221;, Lending tree, Ticketmaster, Interval International, and IAC. IAC contains all the remaining Internet properties including Ask.com, Evite, Match.com and CollegeHumor.com</p>
<p>The timing of this is a little suspicious. Ask has been running a large, and mostly failed advertising campaign &#8220;Who killed the Algorithm&#8221; &#8212; perhaps a Sopranos reference?</p>
<p>The interesting thing in the back for my mind is: what will Microsoft do now that IAC is ripe for the picking?</p>
<p>Microsoft has been on a bender snapping up smaller companies (the cash must be burning holes in Ballmers pockets).  In early 2007 there was a stream of rumors that Microsoft was in serious talks to Acquire Yahoo. If those fell through, or were never legitimate to begin with will be a matter for the history books &#8212; but now, the number four search destination is open.</p>
<p>The ability to build a web search index while challenging is not impossible, something even smaller startups can do see:  <a href="http://yousefourabi.com/semantic-web/visvo-search-startup" title="Visvo Search Startup">Visvo Search</a></p>
<p>And what would Microsoft be buying? The experience to deliver on internet products, something Microsoft sorely lacks. Heck, it can barely deliver on its core competencies, desktop operating systems.<br />
<a href="http://yousefourabi.com/semantic-web/visvo-search-startup" title="Visvo Search Startup"></a></p>

<div class="sociable">

<ul>
	<li class="sociablefirst"><a rel="nofollow"  href="http://digg.com/submit?phase=2&amp;url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2007%2F11%2Fwill-microsoft-buy-ask%2F&amp;title=Will%20Microsoft%20buy%20Ask%3F&amp;bodytext=This%20month%20Barry%20Diller%2C%20CEO%20of%20IAC%20split%20up%20his%20media%20empire%20into%20five%20distinct%20companies%2C%20aligned%20more%20or%20less%20with%20their%20respective%20sectors.%20The%20Home%20shopping%20network%20%22HSN%22%2C%20Lending%20tree%2C%20Ticketmaster%2C%20Interval%20International%2C%20and%20IAC.%20IAC%20contains" title="Digg"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/digg.png" title="Digg" alt="Digg" class="sociable-hovers" /></a></li>
	<li><a rel="nofollow"  href="http://delicious.com/post?url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2007%2F11%2Fwill-microsoft-buy-ask%2F&amp;title=Will%20Microsoft%20buy%20Ask%3F&amp;notes=This%20month%20Barry%20Diller%2C%20CEO%20of%20IAC%20split%20up%20his%20media%20empire%20into%20five%20distinct%20companies%2C%20aligned%20more%20or%20less%20with%20their%20respective%20sectors.%20The%20Home%20shopping%20network%20%22HSN%22%2C%20Lending%20tree%2C%20Ticketmaster%2C%20Interval%20International%2C%20and%20IAC.%20IAC%20contains" title="del.icio.us"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/delicious.png" title="del.icio.us" alt="del.icio.us" class="sociable-hovers" /></a></li>
	<li><a rel="nofollow"  href="http://www.stumbleupon.com/submit?url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2007%2F11%2Fwill-microsoft-buy-ask%2F&amp;title=Will%20Microsoft%20buy%20Ask%3F" title="StumbleUpon"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/stumbleupon.png" title="StumbleUpon" alt="StumbleUpon" class="sociable-hovers" /></a></li>
	<li><a rel="nofollow"  href="http://technorati.com/faves?add=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2007%2F11%2Fwill-microsoft-buy-ask%2F" title="Technorati"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/technorati.png" title="Technorati" alt="Technorati" class="sociable-hovers" /></a></li>
	<li class="sociablelast"><a rel="nofollow"  href="http://reddit.com/submit?url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2007%2F11%2Fwill-microsoft-buy-ask%2F&amp;title=Will%20Microsoft%20buy%20Ask%3F" title="Reddit"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/reddit.png" title="Reddit" alt="Reddit" class="sociable-hovers" /></a></li>
</ul>
</div>
]]></content:encoded>
			<wfw:commentRss>http://yousefourabi.com/blog/2007/11/will-microsoft-buy-ask/feed/</wfw:commentRss>
		<slash:comments>2</slash:comments>
		</item>
		<item>
		<title>shelob the evil bot (spider from juniper networks)</title>
		<link>http://yousefourabi.com/blog/2007/11/shelob-the-evil-bot-spider-from-juniper-networks/</link>
		<comments>http://yousefourabi.com/blog/2007/11/shelob-the-evil-bot-spider-from-juniper-networks/#comments</comments>
		<pubDate>Sat, 10 Nov 2007 15:46:46 +0000</pubDate>
		<dc:creator>Yousef Ourabi</dc:creator>
				<category><![CDATA[Semantic Web]]></category>

		<guid isPermaLink="false">http://yousefourabi.com/semantic-web/shelob-the-evil-bot-spider-from-juniper-networks</guid>
		<description><![CDATA[I just noticed a new spider in my servers logs: &#8220;shelob v1.0&#8243; coming from host 208.223.208.181 which resolved to security-lab1.juniper.net &#8212; Per this site: http://ella.slis.indiana.edu/~pwelsch/shelob/ &#8212; shelob stands for &#8220; Shelob Helps Examine Links on Blogs&#8221;
For those of you who are keen Tolkien fans, you&#8217;ll remember Shelob is the &#8220;evil spider&#8221;.  This story gets [...]]]></description>
			<content:encoded><![CDATA[<p>I just noticed a new spider in my servers logs: &#8220;shelob v1.0&#8243; coming from host 208.223.208.181 which resolved to security-lab1.juniper.net &#8212; Per this site: <a href="http://ella.slis.indiana.edu/~pwelsch/shelob/">http://ella.slis.indiana.edu/~pwelsch/shelob/</a> &#8212; shelob stands for &#8220;<span class="sub"> Shelob Helps Examine Links on Blogs</span>&#8221;<br />
For those of you who are keen <a href="http://en.wikipedia.org/wiki/J._R._R._Tolkien" title="J R R Tolkien">Tolkien </a>fans, you&#8217;ll remember Shelob is the &#8220;evil spider&#8221;.  This story gets weird when I browsed to &#8220;http://security-lab1.juniper.net/&#8221; and found an open apache directory containing some images and an executable (note: I didn&#8217;t click or download the executable, I&#8217;m not that brave, and I suggest you don&#8217;t either).</p>
<p>Junpiers Security team shouldn&#8217;t really be doing stuff like this, and if they were they should be much more open about it, using a proper User Agent, and including the URL to a project page with legitimate information.</p>
<p>The  shelob v1.0 bot didn&#8217;t even check robots.txt, so this one is definately getting blocked with mod_security.</p>
<p>If anyone from Jupiter is reading this, feel free to post an explanation, but I won&#8217;t hold my breath.</p>
<p><em><strong>Resources: </strong></em></p>
<p><a href="http://en.wikipedia.org/wiki/Shelob">http://en.wikipedia.org/wiki/Shelob </a></p>
<p><a href="http://security-lab1.juniper.net/">http://security-lab1.juniper.net/ </a></p>

<div class="sociable">

<ul>
	<li class="sociablefirst"><a rel="nofollow"  href="http://digg.com/submit?phase=2&amp;url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2007%2F11%2Fshelob-the-evil-bot-spider-from-juniper-networks%2F&amp;title=shelob%20the%20evil%20bot%20%28spider%20from%20juniper%20networks%29&amp;bodytext=I%20just%20noticed%20a%20new%20spider%20in%20my%20servers%20logs%3A%20%22shelob%20v1.0%22%20coming%20from%20host%20208.223.208.181%20which%20resolved%20to%20security-lab1.juniper.net%20--%20Per%20this%20site%3A%20http%3A%2F%2Fella.slis.indiana.edu%2F%7Epwelsch%2Fshelob%2F%20--%20shelob%20stands%20for%20%22%20Shelob%20Helps%20Examine%20Lin" title="Digg"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/digg.png" title="Digg" alt="Digg" class="sociable-hovers" /></a></li>
	<li><a rel="nofollow"  href="http://delicious.com/post?url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2007%2F11%2Fshelob-the-evil-bot-spider-from-juniper-networks%2F&amp;title=shelob%20the%20evil%20bot%20%28spider%20from%20juniper%20networks%29&amp;notes=I%20just%20noticed%20a%20new%20spider%20in%20my%20servers%20logs%3A%20%22shelob%20v1.0%22%20coming%20from%20host%20208.223.208.181%20which%20resolved%20to%20security-lab1.juniper.net%20--%20Per%20this%20site%3A%20http%3A%2F%2Fella.slis.indiana.edu%2F%7Epwelsch%2Fshelob%2F%20--%20shelob%20stands%20for%20%22%20Shelob%20Helps%20Examine%20Lin" title="del.icio.us"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/delicious.png" title="del.icio.us" alt="del.icio.us" class="sociable-hovers" /></a></li>
	<li><a rel="nofollow"  href="http://www.stumbleupon.com/submit?url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2007%2F11%2Fshelob-the-evil-bot-spider-from-juniper-networks%2F&amp;title=shelob%20the%20evil%20bot%20%28spider%20from%20juniper%20networks%29" title="StumbleUpon"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/stumbleupon.png" title="StumbleUpon" alt="StumbleUpon" class="sociable-hovers" /></a></li>
	<li><a rel="nofollow"  href="http://technorati.com/faves?add=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2007%2F11%2Fshelob-the-evil-bot-spider-from-juniper-networks%2F" title="Technorati"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/technorati.png" title="Technorati" alt="Technorati" class="sociable-hovers" /></a></li>
	<li class="sociablelast"><a rel="nofollow"  href="http://reddit.com/submit?url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2007%2F11%2Fshelob-the-evil-bot-spider-from-juniper-networks%2F&amp;title=shelob%20the%20evil%20bot%20%28spider%20from%20juniper%20networks%29" title="Reddit"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/reddit.png" title="Reddit" alt="Reddit" class="sociable-hovers" /></a></li>
</ul>
</div>
]]></content:encoded>
			<wfw:commentRss>http://yousefourabi.com/blog/2007/11/shelob-the-evil-bot-spider-from-juniper-networks/feed/</wfw:commentRss>
		<slash:comments>6</slash:comments>
		</item>
		<item>
		<title>Visvo search startup</title>
		<link>http://yousefourabi.com/blog/2007/11/visvo-search-startup/</link>
		<comments>http://yousefourabi.com/blog/2007/11/visvo-search-startup/#comments</comments>
		<pubDate>Mon, 05 Nov 2007 08:06:42 +0000</pubDate>
		<dc:creator>Yousef Ourabi</dc:creator>
				<category><![CDATA[Semantic Web]]></category>
		<category><![CDATA[Startups]]></category>

		<guid isPermaLink="false">http://yousefourabi.com/semantic-web/visvo-search-startup</guid>
		<description><![CDATA[There is a new crawler on the block &#8212; the VisBot has been making its rounds. The seemingly legitimate crawler led me to its companies site: Visvo &#8212; and I like what I&#8217;ve found. Why does Visvo matter among this new wave of search start ups? Three reasons: 1) Each search result has an explain [...]]]></description>
			<content:encoded><![CDATA[<p><a href="http://yousefourabi.com/wp-content/uploads/2007/11/visvo_logo.png" title="Visvo Logo"><img src="http://yousefourabi.com/wp-content/uploads/2007/11/visvo_logo.png" alt="Visvo Logo" align="left" border="10" /></a>There is a new crawler on the block &#8212; the VisBot has been making its rounds. The seemingly legitimate crawler led me to its companies site: <a href="http://www.visvo.com/" title="Visvo Website">Visvo</a> &#8212; and I like what I&#8217;ve found. Why does Visvo matter among this new wave of search start ups? Three reasons: 1) Each search result has an explain link that details the technical details of how that was result was scored 2) They are using open Source Search Technology<a href="http://lucene.apache.org/nutch/" title="Nutch"> Nutch</a>, and <a href="http://lucene.apache.org/hadoop/" title="Hadoop">Hadoop</a> both of the <a href="http://lucene.apache.org/" title="Lucene">Lucene</a>  project 3) They are building their own index instead of using a feed like for example <a href="http://www.quintura.com/" title="Quintura">Quintura</a>.</p>
<p>The Dallas based startup explains its name as the <a href="http://en.wikipedia.org/wiki/Sanskirt" title="Sanskirt">Sanskrit </a>word for &#8220;universe&#8221;, and their tag line is: &#8220;its our universe&#8221;.  After the customary grandiose and unqualified statements about how advanced they are Visvo claims to have developed the &#8220;automatic categorization search engine for web content&#8221;. While this implementation of this wasn&#8217;t immediately obvious to me after trying their search engine, I decided I like them as a company for other reasons.</p>
<p><a href="http://yousefourabi.com/wp-content/uploads/2007/11/visvo-serp.png" title="Visvo Search Result Page"><img src="http://yousefourabi.com/wp-content/uploads/2007/11/visvo-serp.png" alt="Visvo Search Result Page" align="left" border="10" width="300" /></a>I immediately did my favorite search &#8212; a vanity search on my full name (should be fairly unique). The Visvo engine returned two (and once three) results that were highly relevant: The &#8220;<a href="http://www.freesoftwaremagazine.com/article/debian_as_a_desktop_system" title="Debian as  Desktop System">Debian as a Desktop System</a>&#8221; article I wrote for the Free Software Magazine, and a ping back for a blog entry I wrote about the upcoming <a href="http://yousefourabi.com/bsd/freebsd-7-and-the-freebsd-installer" title="FreeBSD 7 Installer ">FreeBSD installer &#8220;finstall&#8221;</a> . Here is a screen shot of the results page for my vanity search above (Click on image for full resolution):</p>
<p>Each search result has an &#8220;explain&#8221; link (see screen shots ) where so-inclined users can view the technical details of how the search result was ranked by the Visvo engine. It shows factors such as the <a href="http://en.wikipedia.org/wiki/Tfidf" title="tf idf">tfidf</a> score of the document, which is probably the most used statistical model for measuring the frequency of a term in a document relative to documents in the rest of the corpus. (Click on screen shot for full resolution) <a href="http://yousefourabi.com/wp-content/uploads/2007/11/visvo-explain.png" title="Visvo Explain Feature"><img src="http://yousefourabi.com/wp-content/uploads/2007/11/visvo-explain.png" alt="Visvo Explain Feature" align="right" border="10" width="250" /></a></p>
<p>The fact that they are building their own index is relevant, todays search giants have amassed too much power &#8212; in essence a monopoly on who finds what &#8212; and my hope for the future is that startups like Visvo can challenge the entrenched incumbents. The fact that they also open up their ranking is important, and this level of transparency is something Google could learn from.</p>
<p>The technical presentation by Dennis Kubes, founder and CTO of Visvo (linked at the bottom of the page) gives us some juicy details about their architecture.  Visvo is indeed using Hadoop, the open source Map-Reduce implementation, occasionally supplements its dedicated hardware with Amazons EC2 hardware service.</p>
<p>I&#8217;m impressed so far. I wish them success, as their success is also the success of the open source community, more so than with Google.</p>
<p>I encourage everyone to give it a go, although the product is still in Alpha, so probably won&#8217;t be ready for your day to day search needs for a while.</p>
<p><em><strong>Resources:</strong></em></p>
<p><a href="http://www.visvo.com" title="http://www.visvo.com"> http://www.visvo.com</a></p>
<p><a href="http://www.visvo.com/bot.html" title="Visvo Bot">http://www.visvo.com/bot.html</a></p>
<p><a href="http://yousefourabi.com/wp-content/uploads/2007/11/visvotopost.pdf" title="Visvo Technical Document">Visvo Technical Presentation</a></p>

<div class="sociable">

<ul>
	<li class="sociablefirst"><a rel="nofollow"  href="http://digg.com/submit?phase=2&amp;url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2007%2F11%2Fvisvo-search-startup%2F&amp;title=Visvo%20search%20startup&amp;bodytext=There%20is%20a%20new%20crawler%20on%20the%20block%20--%20the%20VisBot%20has%20been%20making%20its%20rounds.%20The%20seemingly%20legitimate%20crawler%20led%20me%20to%20its%20companies%20site%3A%20Visvo%20--%20and%20I%20like%20what%20I%27ve%20found.%20Why%20does%20Visvo%20matter%20among%20this%20new%20wave%20of%20search%20start%20ups%3F%20Three%20rea" title="Digg"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/digg.png" title="Digg" alt="Digg" class="sociable-hovers" /></a></li>
	<li><a rel="nofollow"  href="http://delicious.com/post?url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2007%2F11%2Fvisvo-search-startup%2F&amp;title=Visvo%20search%20startup&amp;notes=There%20is%20a%20new%20crawler%20on%20the%20block%20--%20the%20VisBot%20has%20been%20making%20its%20rounds.%20The%20seemingly%20legitimate%20crawler%20led%20me%20to%20its%20companies%20site%3A%20Visvo%20--%20and%20I%20like%20what%20I%27ve%20found.%20Why%20does%20Visvo%20matter%20among%20this%20new%20wave%20of%20search%20start%20ups%3F%20Three%20rea" title="del.icio.us"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/delicious.png" title="del.icio.us" alt="del.icio.us" class="sociable-hovers" /></a></li>
	<li><a rel="nofollow"  href="http://www.stumbleupon.com/submit?url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2007%2F11%2Fvisvo-search-startup%2F&amp;title=Visvo%20search%20startup" title="StumbleUpon"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/stumbleupon.png" title="StumbleUpon" alt="StumbleUpon" class="sociable-hovers" /></a></li>
	<li><a rel="nofollow"  href="http://technorati.com/faves?add=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2007%2F11%2Fvisvo-search-startup%2F" title="Technorati"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/technorati.png" title="Technorati" alt="Technorati" class="sociable-hovers" /></a></li>
	<li class="sociablelast"><a rel="nofollow"  href="http://reddit.com/submit?url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2007%2F11%2Fvisvo-search-startup%2F&amp;title=Visvo%20search%20startup" title="Reddit"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/reddit.png" title="Reddit" alt="Reddit" class="sociable-hovers" /></a></li>
</ul>
</div>
]]></content:encoded>
			<wfw:commentRss>http://yousefourabi.com/blog/2007/11/visvo-search-startup/feed/</wfw:commentRss>
		<slash:comments>1</slash:comments>
		</item>
		<item>
		<title>Download all wikipedia images with WikiX</title>
		<link>http://yousefourabi.com/blog/2007/10/download-all-wikipedia-images-with-wikix/</link>
		<comments>http://yousefourabi.com/blog/2007/10/download-all-wikipedia-images-with-wikix/#comments</comments>
		<pubDate>Fri, 26 Oct 2007 05:58:45 +0000</pubDate>
		<dc:creator>Yousef Ourabi</dc:creator>
				<category><![CDATA[Semantic Web]]></category>

		<guid isPermaLink="false">http://yousefourabi.com/semantic-web/download-all-wikipedia-images-with-wikix</guid>
		<description><![CDATA[There are scores of interesting projets to do with the data made available on Wikiepdia
I recently had the need to download all the images on Wikipedia, and an excellent project&#8211; wikix &#8212; was brought to my attention, which is the &#8220;best-practice&#8221; way of downloading Wikipedia image data.
It is an application written in C that parses [...]]]></description>
			<content:encoded><![CDATA[<p>There are scores of interesting projets to do with the data made available on <a href="http://wikipedia.org" title="Wikipedia">Wikiepdia</a></p>
<p>I recently had the need to download all the images on Wikipedia, and an excellent project&#8211; <a href="http://meta.wikimedia.org/wiki/Wikix" title="wikix">wikix</a> &#8212; was brought to my attention, which is the &#8220;best-practice&#8221; way of downloading Wikipedia image data.</p>
<p>It is an application written in C that parses the Wikipedia XML, extracts all the image links, and then creates a set of bash shell scripts that use common Unix utilities such as curl to actually fetch the images, while remaining respecting Wikipedia&#8217;s guidelines on bots.</p>
<p><strong><u>Compiling WikiX</u></strong></p>
<p>Compiling wikix is pretty straight forward. It is important to note that the package is bzipped, then gzipped, so you&#8217;ll run something similar to the following commands:</p>
<p>ftp://www.wikigadugi.org/wiki/MediaWiki/wikix.tar.gz.bz2</p>
<p>bzip2 -d wikix.tar.gz.bz2</p>
<p>tar xzvf wikix.tar.gz</p>
<p>cd wikix</p>
<p>chmod 775 *  (Note: the downloaded files have very restrictive permissions by default, this opens them up a bit)</p>
<p>make all &amp;&amp; make install</p>
<p>If you get the following error:  cc1: error: unrecognized command line option &#8220;-Wno-pointer-sign&#8221;  &#8212; it probably means you are trying to compile using gcc3 and not the newer gcc4. There are two options:</p>
<p>1) If you have both installed you can update the CC environment variable</p>
<p>2) you can comment you the CFLAGS and CFLAGS_LIB lines, and uncomment the already commented out versions at the top of the file, so the Makefile goes from looking like this:</p>
<p>#CFLAGS = -g<br />
#CFLAGS_LIB = -g -c<br />
CFLAGS = -Wno-pointer-sign -g<br />
CFLAGS_LIB = -Wno-pointer-sign -g -c</p>
<p>To looking like this:</p>
<p>CFLAGS = -g<br />
CFLAGS_LIB = -g -c<br />
#CFLAGS = -Wno-pointer-sign -g<br />
#CFLAGS_LIB = -Wno-pointer-sign -g -c</p>
<p><strong><u>Downloading Wikipedia Images</u></strong></p>
<p>To get started, first download the xml database dump. At the time of writing, I issued the following command:</p>
<p>wget <span class="Object" id="OBJ_PREFIX_DWT708"><a href="http://download.wikimedia.org/enwiki/latest/enwiki-latest-pages-articles.xml.bz2" target="_blank">http://download.wikimedia.org/enwiki/latest/enwiki-latest-pages-articles.xml.bz2</a></span></p>
<p>Then unpack the bzipped xml with:</p>
<p>bzip2 -d enwiki-latest-pages-articles.xml.bz2</p>
<p>Then run wikix, specifying<span style="font-size: 12pt; font-family: 'Times New Roman'"></span> the -p flag if you want the scripts to be parallelized ( I did ) or omit it if you don&#8217;t:</p>
<p>wikix -p &lt; enwiki-<span class="Object" id="OBJ_PREFIX_DWT709">20070802</span>-pages-articles.xml &amp;</p>
<p>This took about 12 minutes on the machine I am working on. However the machine is a Dual Dual-Core Opterons (model 254 @ 2.8 Ghz), with 16G ram, and 4+ Terabytes of storage &#8212; so your mileage may vary.</p>
<p>If you need to put the images in a different directory then your current working directory simply edit image_sh and changed the &#8220;OUTPUT&#8221; variable to the path where you want your images.</p>
<p>Then to actually start sucking down images over the internet run:</p>
<p>./image_sh  &#8212; this should be in the directory you were in when you ran wikix.</p>
<p>An added bonus is that the wikix script creates a file called &#8220;image.log&#8221; which contains each image found, one per line, which is an ideal format for writing a quick script to insert all those image names in a database, such as mysql.</p>
<p>The approximate size of all the images as of October, 2007 is approximately 406 gigabytes. So make sure you have lots of disk space!</p>
<p>A good place to start when looking for the dumps would be the official <a href="http://en.wikipedia.org/wiki/Wikipedia:Database_download" title="Wikipedia Database Dump">Wikipedia Database Dump page</a>.</p>
<p><strong><u>Resources</u></strong></p>
<p>http://en.wikipedia.org/wiki/Wikipedia:Database_download</p>
<p>http://meta.wikimedia.org/wiki/Wikix</p>

<div class="sociable">

<ul>
	<li class="sociablefirst"><a rel="nofollow"  href="http://digg.com/submit?phase=2&amp;url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2007%2F10%2Fdownload-all-wikipedia-images-with-wikix%2F&amp;title=Download%20all%20wikipedia%20images%20with%20WikiX&amp;bodytext=There%20are%20scores%20of%20interesting%20projets%20to%20do%20with%20the%20data%20made%20available%20on%20Wikiepdia%0D%0A%0D%0AI%20recently%20had%20the%20need%20to%20download%20all%20the%20images%20on%20Wikipedia%2C%20and%20an%20excellent%20project--%20wikix%20--%20was%20brought%20to%20my%20attention%2C%20which%20is%20the%20%22best-practice%22%20" title="Digg"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/digg.png" title="Digg" alt="Digg" class="sociable-hovers" /></a></li>
	<li><a rel="nofollow"  href="http://delicious.com/post?url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2007%2F10%2Fdownload-all-wikipedia-images-with-wikix%2F&amp;title=Download%20all%20wikipedia%20images%20with%20WikiX&amp;notes=There%20are%20scores%20of%20interesting%20projets%20to%20do%20with%20the%20data%20made%20available%20on%20Wikiepdia%0D%0A%0D%0AI%20recently%20had%20the%20need%20to%20download%20all%20the%20images%20on%20Wikipedia%2C%20and%20an%20excellent%20project--%20wikix%20--%20was%20brought%20to%20my%20attention%2C%20which%20is%20the%20%22best-practice%22%20" title="del.icio.us"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/delicious.png" title="del.icio.us" alt="del.icio.us" class="sociable-hovers" /></a></li>
	<li><a rel="nofollow"  href="http://www.stumbleupon.com/submit?url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2007%2F10%2Fdownload-all-wikipedia-images-with-wikix%2F&amp;title=Download%20all%20wikipedia%20images%20with%20WikiX" title="StumbleUpon"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/stumbleupon.png" title="StumbleUpon" alt="StumbleUpon" class="sociable-hovers" /></a></li>
	<li><a rel="nofollow"  href="http://technorati.com/faves?add=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2007%2F10%2Fdownload-all-wikipedia-images-with-wikix%2F" title="Technorati"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/technorati.png" title="Technorati" alt="Technorati" class="sociable-hovers" /></a></li>
	<li class="sociablelast"><a rel="nofollow"  href="http://reddit.com/submit?url=http%3A%2F%2Fyousefourabi.com%2Fblog%2F2007%2F10%2Fdownload-all-wikipedia-images-with-wikix%2F&amp;title=Download%20all%20wikipedia%20images%20with%20WikiX" title="Reddit"><img src="http://yousefourabi.com/blog/wp-content/plugins/sociable/images/reddit.png" title="Reddit" alt="Reddit" class="sociable-hovers" /></a></li>
</ul>
</div>
]]></content:encoded>
			<wfw:commentRss>http://yousefourabi.com/blog/2007/10/download-all-wikipedia-images-with-wikix/feed/</wfw:commentRss>
		<slash:comments>29</slash:comments>
		</item>
	</channel>
</rss>
