A possible generic specification of a extraction function using jsoup that can retrieve generic url's and items (or both, or either, or none) could be defined as follows. It's functional, and idiomatic afaik. Please provide comments if it can be improved.
import org.jsoup.nodes.Document
object ExtractorTraits {
case class UrlPair[T](data: String, payload: Option[T])
case class ResultPair[T,U](items: Option[Seq[T]],urls: Option[Seq[UrlPair[U]]] = None)
trait ItemExtractorTrait[I,C] {
type ExtractedType = I
type RetType = Option[Seq[ExtractedType]]
type ExtractorFunction = (Document,Option[C]) => RetType
def apply : ExtractorFunction
}
class ItemExtractor[I,C](extract_item : ItemExtractorTrait[I,C]#ExtractorFunction = (doc : Document,c: Option[C]) => None)
extends ItemExtractorTrait[I,C] { val apply = (doc: Document, ctxt: Option[C]) => extract_item(doc,ctxt) }
trait UrlExtractorTrait[U,C] {
type UrlPayload = U
type RetType = Option[Seq[UrlPair[U]]]
type ExtractorFunction = (Document,Option[C]) => RetType
def apply : ExtractorFunction
}
class UrlExtractor[U,C](extract_url : UrlExtractor[U,C]#ExtractorFunction = (doc:Document,c : Option[C]) => None)
extends UrlExtractorTrait[U,C] { val apply = (doc: Document, ctxt: Option[C]) => extract_url(doc,ctxt)}
trait ExtractorTrait[I,U,C] {
type RetType = ResultPair[I,U]
type ExtractorFunction = (Document, Option[C]) => RetType
def apply : ExtractorFunction
}
class Extractor[I,U,C] (item_extractor: ItemExtractorTrait[I,C]#ExtractorFunction = new ItemExtractor[I,C]().apply,
url_extractor: UrlExtractorTrait[U,C]#ExtractorFunction = new UrlExtractor[U,C]().apply) extends ExtractorTrait[I,U,C] {
val apply = (doc: Document, ctxt: Option[C]) => ResultPair[I,U](item_extractor(doc,ctxt),url_extractor(doc,ctxt))
}
}
edit
Split the function traits up so they can be worked with separately.
edit2
extractors now have context