Pregunta

I heredó un almacenamiento de datos que estaba usando simples archivos de texto para guardar documentos.

Documentos tenían algunos atributos (fecha, título y texto), y éstos fueron codificados en un nombre de archivo:. - .txt, con el cuerpo del archivo es el texto </p> <P> Sin embargo, en los documentos de realidad en el sistema tienen muchos atributos más, e incluso más nuevo se propone añadir. </P> <P> Parecía lógico para cambiar a un formato XML, y lo he hecho, con cada documento ahora codificado en su propio archivo XML. </P> <P> Sin embargo, la lectura de los archivos XML a partir de ahora es ridículamente lento! (¿Dónde 2000 artículos en el formato .txt tomaron segundos, ahora 2000 artículos en el formato .xml tarda más de 10 minutos). </P> <P> Yo estaba usando un analizador DOM, y después de que descubrí cómo retardar la lectura era, me cambié a un analizador SAX, sin embargo aún así es tan lenta (bueno, más rápido, pero todavía 10 minutos). </P> <P> es XML JUSTO QUE lenta, o estoy haciendo algo extraño? se agradecería cualquier pensamiento. </p> <P> El sistema está escrito en JavaSE 1.6. El analizador se crea de esta manera: </p> <pre><code> /* import javax.xml.parsers.ParserConfigurationException; import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; */ SAXParserFactory factory = SAXParserFactory.newInstance(); SAXParser saxParser; try { saxParser = factory.newSAXParser(); ArticleSaxHandler handler = new ArticleSaxHandler(); saxParser.parse(is, handler); return handler.getArticle(); } catch (ParserConfigurationException e) { throw new IOException(e); } catch (SAXException e) { throw new IOException(e); } finally { if (is != null) { try { is.close(); } catch (IOException e) { logger.error(e); } } } } private class ArticleSaxHandler extends DefaultHandler { private URI uri = null; private String source = null; private String author = null; private DateTime articleDatetime = null; private DateTime processedDatetime = null; private String title = null; private String text = null; private ArticleElement currentElement; private final StringBuilder builder = new StringBuilder(); public Article getArticle() { return new Article(uri, source, author, articleDatetime, processedDatetime, title, text); } /** Receive notification of the start of an element. */ public void startElement(String uri, String localName, String qName, Attributes attributes) { if (builder.length() != 0) { throw new RuntimeException(new SAXParseException(currentElement + " was not finished before " + qName + " was started", null)); } currentElement = ArticleElement.getElement(qName); } public void endElement(String uri, String localName, String qName) { final String elementText = builder.toString(); builder.delete(0, builder.length()); if (currentElement == null) { return; } switch (currentElement) { case ARTICLE: break; case URI: try { this.uri = new URI(elementText); } catch (URISyntaxException e) { throw new RuntimeException(e); } break; case SOURCE: source = elementText; break; case AUTHOR: author = elementText; break; case ARTICLE_DATE_TIME: articleDatetime = getDateTimeFormatter().parseDateTime(elementText); break; case PROCESSED_DATE_TIME: processedDatetime = getDateTimeFormatter().parseDateTime(elementText); break; case TITLE: title = elementText; break; case TEXT: this.text = elementText; break; default: throw new IllegalStateException("Unexpected ArticleElement: " + currentElement); } currentElement = null; } /** Receive notification of character data inside an element. */ public void characters(char[] ch, int start, int length) { builder.append(ch, start, length); } public void error(SAXParseException e) { fatalError(e); } public void fatalError(SAXParseException e) { logger.error("currentElement: " + currentElement + " ||builder: " + builder.toString() + "\n\n" + e.getMessage(), e); } } private enum ArticleElement { ARTICLE(ARTICLE_ELEMENT_NAME), URI(URI_ELEMENT_NAME), SOURCE(SOURCE_ELEMENT_NAME), AUTHOR(AUTHOR_ELEMENT_NAME), ARTICLE_DATE_TIME( ARTICLE_DATETIME_ELEMENT_NAME), PROCESSED_DATE_TIME(PROCESSED_DATETIME_ELEMENT_NAME), TITLE(TITLE_ELEMENT_NAME), TEXT(TEXT_ELEMENT_NAME); private String name; private ArticleElement(String name) { this.name = name; } public static ArticleElement getElement(String qName) { for (ArticleElement element : ArticleElement.values()) { if (element.name.equals(qName)) { return element; } } return null; } } </code> </pre> </div> </div> </div> <div id="boxRight" class="tab-content col-xl-6"> <div id="boxSoluzioneDescrizione" class="boxArticolo"> <div class="row"> <div class="col-md-6"> <div class="row justify-content-start"> <div class="col-md-12"> <form id="feedback" action="/es/articolo/feedback" method="post"> <input type="hidden" name="_csrf" value="fHZZ13cuWvUyhVzT9j8H3El6nUnILf_pYawFwvo-nR46NDHhQn89j1vxL-uicmnvKCqvMJ1piN4p_zKHkVHOaA=="> <div class="hidden" style="display:none;"> <div class="form-group field-feedbackform-pagina required"> <input type="hidden" id="feedbackform-pagina" class="pagina" name="FeedbackForm[pagina]" value="/articolo/details"> <p class="help-block help-block-error"></p> </div> <div class="form-group field-feedbackform-idargomento"> <input type="hidden" id="feedbackform-idargomento" class="idArgomento" name="FeedbackForm[idArgomento]" value="786783"> <p class="help-block help-block-error"></p> </div> </div> <div> ¿Fue útil? <div class="example-block text-center"> <label class="radio-inline" for="happy" style="font-size:1.5em;cursor:pointer;color:green;"> <i class="far fa-thumbs-up" title="This answer is useful"></i> <!-- fas per effetto mano piena --> <!--<img class="votoImg" src="https://img.icons8.com/color/100/000000/bored.png" width="84" height="84" loading="lazy" fetchpriority="high"> --> </label> <input type="radio" id="happy" class="voto input-hidden" name="FeedbackForm[voto]" value="10"> </div> <div class="example-block text-center"> <label class="radio-inline" for="sad" style="font-size:1.5em;cursor:pointer;color:red;"> <i class="far fa-thumbs-down" title="This answer is not useful"></i> <!-- fas per effetto mano piena --> <!--<img class="votoImg" src="https://img.icons8.com/color/100/000000/boring.png" width="84" height="84" loading="lazy" fetchpriority="high">--> </label> <input type="radio" id="sad" class="voto input-hidden" name="FeedbackForm[voto]" value="0"> </div> <!--<div class="col-auto example-block text-center"> <label class="radio-inline"> <input type="radio" name="voto" id="exicetd" class="input-hidden" /> <img class="votoImg" src="https://img.icons8.com/color/100/000000/smiling.png " width="84" height="84" loading="lazy" fetchpriority="high"> </label> </div>--> </div> <div class="row footer justify-content-between"> <div class="col"> <button type="button" class="btn btn-primary" data-dismiss="modal">Enviar</button> </div> </div> </form> </div> </div> </div> <div class="col-md-6"> </div> </div> <div class="row "> <div class="col-md-12"> <p class="title" style="background-color:green;"> <i class="far fa-thumbs-up"></i> Solución </p> <div class="testo"> <P> La lectura de datos a partir de una corriente sin búfer podría explicar estos problemas de rendimiento. Esto no está directamente relacionada con el cambio de texto a XML pero tal vez por casualidad, su nueva aplicación no utiliza un <code>BufferedInputStream</code> más. </P> <Hr> <P> follwing ese camino, en detalle, cheque si esto es amortiguada <code>is</code>: </p> <pre><code>saxParser.parse(is, handler); </code></pre> </div> </div> </div> </div> </div> </div> <div class="row mt-4 adv"> <div class="col-12 text-center"> <ins class="adsbygoogle" style="display:block; text-align:center;" data-ad-layout="in-article" data-ad-format="fluid" data-ad-client="ca-pub-5108424997424987" data-ad-slot="1879801491"></ins> <script defer async crossorigin="anonymous"> (adsbygoogle = window.adsbygoogle || []).push({}); </script> </div> </div> <div class="row mt-4 adv"> <div class="col-12 text-center"> </div> </div> <div class="row mt-4"> <div class="col-12"> </div> </div> <div class="row mt-4"> <div class="col-12"> <div class="attribution"> <div>Licenciado bajo: <a href="https://creativecommons.org/licenses/by-sa/3.0/" target="_blank">CC-BY-SA</a> con <a href="https://stackoverflow.blog/2009/06/25/attribution-required/" target="_blank">atribución</a></div> <div>No afiliado a <a href="https://stackoverflow.com/" target="_blank">StackOverflow</a></div> </div> </div> </div> <div id="share"></div> </div> <div class="row mb-4 adv"> <div class="col-md-12 text-center"> <!-- GeneraCodice - Footer pagina --> <ins class="adsbygoogle" style="display:block" data-ad-client="ca-pub-5108424997424987" data-ad-slot="5412049179" data-ad-format="auto" data-full-width-responsive="true"></ins> <script defer async crossorigin="anonymous"> (adsbygoogle = window.adsbygoogle || []).push({}); </script> </div> </div> </div> </div> <aside id="bannerRight" class="col-xs-12 col-md-4 col-lg-3 text-center" > <div class="container mt-4"> <div class="row mb-4 adv"> <div class="col-md-12"> <!-- GeneraCodice - Barra laterale --> <ins class="adsbygoogle" style="display:block" data-ad-client="ca-pub-5108424997424987" data-ad-slot="1592207755" data-ad-format="auto" data-full-width-responsive="true"></ins> <script defer async crossorigin="anonymous"> (adsbygoogle = window.adsbygoogle || []).push({}); </script> </div> </div> <div class="row adv"> <div class="col-md-12"> <!-- GeneraCodice - Barra laterale 2 --> <ins class="adsbygoogle" style="display:block" data-ad-client="ca-pub-5108424997424987" data-ad-slot="8889943968" data-ad-format="auto" data-full-width-responsive="true"></ins> <script defer async crossorigin="anonymous"> (adsbygoogle = window.adsbygoogle || []).push({}); </script> </div> </div> <div class="row topArticoli justify-content-center"> <div class="col-md-12 col-lg-10 pt-4"> </div> </div> </div> </aside> </div> </section> <!-- DA INSERIRE COLLEGAMENTO A GENERANEWS E GRATISFORGRATIS.COM --> <footer class="site-footer"> <div class="section-free d-block d-md-flex"> <div class="section-newsletter col"> </div> <div class="col content-free-projects mb-2"> <div> <p class="my-3">Enlaces útiles</p> </div> <div class="d-flex justify-content-around"> <div></div> <div> <a class="nav-link" href="https://www.generacodice.com/es/tag">Etiquetas</a> <a class="nav-link" href="https://www.generacodice.com/es/site/aboutus">Sobre nosotros</a> <a class="nav-link" href="https://www.generacodice.com/es/site/contacts">Contactos</a> <a class="nav-link" href="https://www.generacodice.com/es/site/privacy">Privacidad</a> </div> <div> <a class="nav-link social fb" href="https://www.facebook.com/generacodice" target="_blank"><i class="fab fa-facebook"></i> Facebook</a> <a class="nav-link social instagram" href="https://www.instagram.com/genera_codice" target="_blank"><i class="fab fa-instagram"></i> Instagram</a> </div> <div></div> </div> <div class="small-footer-link d-flex align-items-center justify-content-center"> <form action="https://www.paypal.com/cgi-bin/webscr" method="post" target="_top"> <input type="hidden" name="cmd" value="_s-xclick" /> <input type="hidden" name="hosted_button_id" value="42ZKUPRLM66J2" /> <input type="image" src="https://www.paypalobjects.com/en_US/i/btn/btn_donate_SM.gif" border="0" name="submit" title="PayPal - The safer, easier way to pay online!" alt="Donate with PayPal button" /> </form> </div> </div> </div> <div class="row m-0 justify-content-center text-center p-2"> <div class="col-md-5"> <p>El contenido tiene licencia bajo Creative Commons.</p> <p class="mb-0">Si encuentra violaciones de derechos de autor, puede contactarnos en <a href="mailto:info@generacodice.com"> info@generacodice.com </a> Para solicitar la eliminación del contenido.</p> </div> </div> </footer> <div id="scroll-to-top" style="display: block;background:none;"> <img src="https://www.generacodice.com/img/icone/scroll-top.svg" alt="scroll top" style="width:48px;height:48px;background-color:#fff;" /> </div> <!-- Google Analytics --> <!-- Google tag (gtag.js) --> <script src="https://www.googletagmanager.com/gtag/js?id=G-PNYLV6VWJG" async crossorigin="anonymous"></script> <script crossorigin="anonymous" defer> window.dataLayer = window.dataLayer || []; function gtag(){dataLayer.push(arguments);} gtag('js', new Date()); gtag('config', 'G-PNYLV6VWJG'); </script> <!-- Visualizzare barra ricerca su google --> <script type="application/ld+json" crossorigin="anonymous"> { "@context": "https://schema.org", "@type": "WebSite", "url": "https://www.generacodice.com/", "potentialAction": { "@type": "SearchAction", "target": "https://www.generacodice.com/articolo?ricerca={search_term_string}", "query-input": "required name=search_term_string" } } </script> <!-- Yandex.Metrika counter <script type="text/javascript" defer crossorigin="anonymous"> (function(m,e,t,r,i,k,a){m[i]=m[i]||function(){(m[i].a=m[i].a||[]).push(arguments)}; m[i].l=1*new Date();k=e.createElement(t),a=e.getElementsByTagName(t)[0],k.async=1,k.src=r,a.parentNode.insertBefore(k,a)}) (window, document, "script", "https://mc.yandex.ru/metrika/tag.js", "ym"); ym(79291009, "init", { clickmap:true, trackLinks:true, accurateTrackBounce:true, webvisor:true }); </script> <noscript><div><img src="https://mc.yandex.ru/watch/79291009" style="position:absolute; left:-9999px;" alt="" /></div></noscript> <!-- /Yandex.Metrika counter --> <script>var lingua = "https://www.generacodice.com/es";</script> <script src="/lib/wow.min.js" preload></script> <script src="/lib/js.cookie.min.js" preload></script> <script src="https://cdn.jsdelivr.net/npm/cookie-bar/cookiebar-latest.min.js?customize=1&tracking=1&thirdparty=1&always=1&noGeoIp=1&showNoConsent=1&showPolicyLink=1&privacyPage=https%3A%2F%2Fwww.generacodice.com%2Fsite%2Fprivacy" preload></script> <script src="/js/form_ricerca.js" preload></script> <script src="https://kit.fontawesome.com/99a60a9345.js" preload></script> <script src="/js/ads.js" defer="defer" preload></script> <script src="/js/main.js?timestamp=20221207" defer="defer" preload></script> <script src="/assets/44258436/yii.js"></script> <script src="/assets/44258436/yii.validation.js"></script> <script src="/assets/44258436/yii.activeForm.js"></script> <script src="/js/feedback.js" defer></script> <script src="/js/articulate.min.js"></script> <script src="/js/playerTesto.js?202310021220"></script> <script src="/lib/jssocials/jssocials.min.js"></script> <script src="/js/sceditor/minified/sceditor.min.js"></script> <script src="/js/sceditor/minified/formats/xhtml.js"></script> <script src="/js/articolo/details.js?202309292139"></script> <script>jQuery(function ($) { jQuery('#feedback').yiiActiveForm([{"id":"feedbackform-pagina","name":"pagina","container":".field-feedbackform-pagina","input":"#feedbackform-pagina","error":".help-block.help-block-error","validate":function (attribute, value, messages, deferred, $form) {yii.validation.required(value, messages, {"message":"Pagina no puede estar vacío."});}},{"id":"feedbackform-idargomento","name":"idArgomento","container":".field-feedbackform-idargomento","input":"#feedbackform-idargomento","error":".help-block.help-block-error","validate":function (attribute, value, messages, deferred, $form) {yii.validation.number(value, messages, {"pattern":/^[+-]?\d+$/,"message":"Id Argomento debe ser un número entero.","skipOnEmpty":1});}}], []); });</script> <script> window.addEventListener('load', function() { var is_adsense_load = 0 window.addEventListener('scroll', function() { if (is_adsense_load == 0) { is_adsense_load = 1; var ele = document.createElement('script'); ele.async = true; ele.src = 'https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js' var sc = document.getElementsByTagName('script')[0] sc.parentNode.insertBefore(ele, sc); (adsbygoogle = window.adsbygoogle || []).push({ google_ad_client: "ca-pub-5108424997424987", enable_page_level_ads: true }); } }) }) </script> </body> </html>