#!/usr/bin/perl sub usage { die " rss-to-site.pl -- a handy perl script to convert My Netscape-style RSS (Rich Site Summary) files to a .site file, or at least nearly. usage: rss-to-site.pl http://url.rss > whatever.site "; } $VERSION = "rss-to-site.pl 1.1"; use LWP::UserAgent; use URI::URL; use HTTP::Request::Common; $main::useragent = new LWP::UserAgent; $main::useragent->env_proxy; $url = $ARGV[0]; if (!defined $url) { &usage; } $req = new HTTP::Request ('GET', $url); $req->header ("Accept-Language" => "en", "Accept-Charset" => "iso-8859-1,*,utf-8"); $resp = $main::useragent->request ($req); if (!$resp->is_success) { die "HTTP GET failed: ".$resp->status_line." ($url)\n"; } $_ = $resp->content; s/\s+/ /g; s/>\s+/>/g; s/\s+.*?(.*?).*?,i && ($name = $1); m,.*?(.*?).*?,i && ($desc = $1); @links = m,]*>.*?(.*?).*?,ig; # TODO -- use heuristics to work out a good StoryURL for this site print <<__ENDOFMAIN; URL: $url Name: $name Description: $desc ContentsFormat: rss StoryURL: /.*\.s?html? # TODO -- edit the StoryURL line above and make a good story URL for # this site. Here's some sample URLs taken from the RSS file, for # guidance: # __ENDOFMAIN foreach $_ (@links) { // && (s/^.*(.*)<\/url>.*$/$1/g); next unless /^http:/i; print "# $_\n"; } print " # You may also want to add a StoryStart and StoryEnd line to # clean up the stories. Here's sample lines (you need to edit them): # # StoryStart: [some distinctive text before the start of the story text] # StoryEnd: [some distinctive text after the end of the story text] # (This is a sitescooper site file. see http://sitescooper.org/ # It was generated from the site's RSS by $VERSION.) "; =head1 NAME rss-to-site - convert a "My Netscape" RSS file to a Sitescooper .site file. =head1 SYNOPSIS rss-to-site http://url.rss > whatever.site =head1 DESCRIPTION This script will try to convert a My Netscape-style RSS (Rich Site Summary) file to a .site file suitable for use with B. Provide the URL of the RSS file as the command-line argument, and it'll try to work out a decent site file for that site. Currently the site file will still require a little bit of hand-editing afterwards. =head1 SEE ALSO C(1), C(1), C(1) =head1 AUTHOR Justin Mason Ejm /at/ jmason.orgE =head1 COPYRIGHT Copyright (C) 1999-2000 Justin Mason This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA, or read it on the web at http://www.gnu.org/copyleft/gpl.html . =cut