如何提取原子/RSS
-
21-09-2019 - |
题
给出一个网址,如果有任何RSS节点,那么我增加数据库。
例如:
对于 这个网址, rssDoc.SelectNodes("rss/channel/item").Count
大于零。
但是对于 原子url, rssDoc.SelectNodes("rss/channel/item").count
等于零。
我怎么可以检查,如果原子/RSS网址有任何节点没有?我已经尝试 rssDoc.SelectNodes("feed/entry").Count
, 但是给我零计数。
Public Shared Function HasRssItems(ByVal url as string) As Boolean
Dim myRequest As WebRequest
Dim myResponse As WebResponse
Try
myRequest = System.Net.WebRequest.Create(url)
myRequest.Timeout = 5000
myResponse = myRequest.GetResponse()
Dim rssStream As Stream = myResponse.GetResponseStream()
Dim rssDoc As New XmlDocument()
rssDoc.Load(rssStream)
Return rssDoc.SelectNodes("rss/channel/item").Count > 0
Catch ex As Exception
Return False
Finally
myResponse.Close()
End Try
结束功能
解决方案
你的主要问题是,XML"节点的路径"上,这一行:
Return rssDoc.SelectNodes("rss/channel/item").Count > 0
一种方法我在这过去是使用一个简单的函数转换成一个原子的饲料成。当然,你可以走另一条路,或不在所有,然而,转换到一个单一的格式让你写一个"通用"的代码块,这将拉出的各种要素的一个饲料的项目,您可能有兴趣(即日期、职等)
还有一个 原子RSS转换器的文章 在代码项目提供了这样一种转换,但是,这是在。我以前手工转换这个VB.NET 我自己,因此,这里的VB.NET 版本:
Private Function AtomToRssConverter(ByVal atomDoc As XmlDocument) As XmlDocument
Dim xmlDoc As XmlDocument = atomDoc
Dim xmlNode As XmlNode = Nothing
Dim mgr As New XmlNamespaceManager(xmlDoc.NameTable)
mgr.AddNamespace("atom", "http://purl.org/atom/ns#")
Const rssVersion As String = "2.0"
Const rssLanguage As String = "en-US"
Dim rssGenerator As String = "RDFFeedConverter"
Dim memoryStream As New MemoryStream()
Dim xmlWriter As New XmlTextWriter(memoryStream, Nothing)
xmlWriter.Formatting = Formatting.Indented
Dim feedTitle As String = ""
Dim feedLink As String = ""
Dim rssDescription As String = ""
xmlNode = xmlDoc.SelectSingleNode("//atom:title", mgr)
If xmlNode Is Nothing Then
This looks like an ATOM v1.0 format, rather than ATOM v0.3.
mgr.RemoveNamespace("atom", "http://purl.org/atom/ns#")
mgr.AddNamespace("atom", "http://www.w3.org/2005/Atom")
End If
xmlNode = xmlDoc.SelectSingleNode("//atom:title", mgr)
If Not xmlNode Is Nothing Then
feedTitle = xmlNode.InnerText
End If
xmlNode = xmlDoc.SelectNodes("//atom:link/@href", mgr)(2)
If Not xmlNode Is Nothing Then
feedLink = xmlNode.InnerText
End If
xmlNode = xmlDoc.SelectSingleNode("//atom:tagline", mgr)
If Not xmlNode Is Nothing Then
rssDescription = xmlNode.InnerText
End If
xmlNode = xmlDoc.SelectSingleNode("//atom:subtitle", mgr)
If Not xmlNode Is Nothing Then
rssDescription = xmlNode.InnerText
End If
xmlWriter.WriteStartElement("rss")
xmlWriter.WriteAttributeString("version", rssVersion)
xmlWriter.WriteStartElement("channel")
xmlWriter.WriteElementString("title", feedTitle)
xmlWriter.WriteElementString("link", feedLink)
xmlWriter.WriteElementString("description", rssDescription)
xmlWriter.WriteElementString("language", rssLanguage)
xmlWriter.WriteElementString("generator", rssGenerator)
Dim items As XmlNodeList = xmlDoc.SelectNodes("//atom:entry", mgr)
If items Is Nothing Then
Throw New FormatException("Atom feed is not in expected format. ")
Else
Dim title As String = [String].Empty
Dim link As String = [String].Empty
Dim description As String = [String].Empty
Dim author As String = [String].Empty
Dim pubDate As String = [String].Empty
For i As Integer = 0 To items.Count - 1
Dim nodTitle As XmlNode = items(i)
xmlNode = nodTitle.SelectSingleNode("atom:title", mgr)
If Not xmlNode Is Nothing Then
title = xmlNode.InnerText
End If
Try
link = items(i).SelectSingleNode("atom:link[@rel= alternate ]", mgr).Attributes("href").InnerText
Catch ex As Exception
link = items(i).SelectSingleNode("atom:link", mgr).Attributes("href").InnerText
End Try
xmlNode = items(i).SelectSingleNode("atom:content", mgr)
If Not xmlNode Is Nothing Then
description = xmlNode.InnerText
End If
xmlNode = items(i).SelectSingleNode("//atom:name", mgr)
If Not xmlNode Is Nothing Then
author = xmlNode.InnerText
End If
xmlNode = items(i).SelectSingleNode("atom:issued", mgr)
If Not xmlNode Is Nothing Then
pubDate = xmlNode.InnerText
End If
xmlNode = items(i).SelectSingleNode("atom:updated", mgr)
If Not xmlNode Is Nothing Then
pubDate = xmlNode.InnerText
End If
xmlWriter.WriteStartElement("item")
xmlWriter.WriteElementString("title", title)
xmlWriter.WriteElementString("link", link)
If pubDate.Length < 1 Then
pubDate = Date.MinValue.ToString()
End If
xmlWriter.WriteElementString("pubDate", Convert.ToDateTime(pubDate).ToUniversalTime().ToString("ddd, dd MMM yyyy HH:mm:ss G\MT"))
xmlWriter.WriteElementString("author", author)
xmlWriter.WriteElementString("description", description)
xmlWriter.WriteEndElement()
Next
xmlWriter.WriteEndElement()
xmlWriter.Flush()
xmlWriter.Close()
End If
Dim retDoc As New XmlDocument()
Dim outStr As String = Encoding.UTF8.GetString(memoryStream.ToArray())
retDoc.LoadXml(outStr)
Return retDoc
End Function
使用是相当直截了当。简单地装载在你的原子进入一个 XmlDocument
目,并通过它向这种功能,你将得到一个 XmlDocument
目后,在RSS格式!
如果你有兴趣,我已经把整个 RSSReader类了pastebin.com
不隶属于 StackOverflow