Issue #42: get rid of old XML parser.

This commit is contained in:
Sergey Vartanov 2021-05-22 03:56:16 +03:00
parent a946972caf
commit 062b76fc08
3 changed files with 19 additions and 186 deletions

Binary file not shown.

Before

Width:  |  Height:  |  Size: 166 KiB

After

Width:  |  Height:  |  Size: 166 KiB

Before After
Before After

View file

@ -23,7 +23,7 @@ from roentgen.mapper import (
check_level_number, check_level_overground check_level_number, check_level_overground
) )
from roentgen.osm_getter import get_osm from roentgen.osm_getter import get_osm
from roentgen.osm_reader import Map, OSMReader, OSMReaderET, OverpassReader from roentgen.osm_reader import Map, OSMReader, OverpassReader
from roentgen.point import Point from roentgen.point import Point
from roentgen.scheme import LineStyle, Scheme from roentgen.scheme import LineStyle, Scheme
from roentgen.util import MinMax from roentgen.util import MinMax
@ -69,7 +69,7 @@ def main(argv) -> None:
if options.mode in [AUTHOR_MODE, CREATION_TIME_MODE]: if options.mode in [AUTHOR_MODE, CREATION_TIME_MODE]:
full = True full = True
osm_reader = OSMReaderET() osm_reader = OSMReader()
for file_name in input_file_names: for file_name in input_file_names:
if not file_name.is_file(): if not file_name.is_file():

View file

@ -11,7 +11,6 @@ import numpy as np
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
from roentgen.ui import progress_bar
from roentgen.util import MinMax from roentgen.util import MinMax
__author__ = "Sergey Vartanov" __author__ = "Sergey Vartanov"
@ -59,29 +58,6 @@ class OSMNode(Tagged):
self.user: Optional[str] = None self.user: Optional[str] = None
self.uid: Optional[str] = None self.uid: Optional[str] = None
def parse_from_xml(self, text: str, is_full: bool = False) -> "OSMNode":
"""
Parse from XML node representation.
:param text: XML node representation
:param is_full: if false, parse only ID, latitude and longitude
"""
self.id_ = int(get_value("id", text))
self.coordinates = np.array(
(float(get_value("lat", text)), float(get_value("lon", text)))
)
if is_full:
self.visible = get_value("visible", text)
self.changeset = get_value("changeset", text)
self.timestamp = datetime.strptime(
get_value("timestamp", text), OSM_TIME_PATTERN
)
self.user = get_value("user", text)
self.uid = get_value("uid", text)
return self
@classmethod @classmethod
def from_xml_structure(cls, element, is_full: bool = False) -> "OSMNode": def from_xml_structure(cls, element, is_full: bool = False) -> "OSMNode":
node = cls() node = cls()
@ -137,26 +113,6 @@ class OSMWay(Tagged):
self.timestamp: Optional[datetime] = None self.timestamp: Optional[datetime] = None
self.uid: Optional[str] = None self.uid: Optional[str] = None
def parse_from_xml(self, text: str, is_full: bool = False) -> "OSMWay":
"""
Parse way from XML way representation.
:param text: XML way representation
:param is_full: if false, parse only ID
"""
self.id_ = int(get_value("id", text))
if is_full:
self.visible = get_value("visible", text)
self.changeset = get_value("changeset", text)
self.timestamp = datetime.strptime(
get_value("timestamp", text), OSM_TIME_PATTERN
)
self.user = get_value("user", text)
self.uid = get_value("uid", text)
return self
@classmethod @classmethod
def from_xml_structure(cls, element, nodes, is_full: bool) -> "OSMWay": def from_xml_structure(cls, element, nodes, is_full: bool) -> "OSMWay":
way = cls(int(element.attrib["id"])) way = cls(int(element.attrib["id"]))
@ -231,20 +187,6 @@ class OSMRelation(Tagged):
self.user: Optional[str] = None self.user: Optional[str] = None
self.timestamp: Optional[datetime] = None self.timestamp: Optional[datetime] = None
def parse_from_xml(self, text: str) -> "OSMRelation":
"""
Parse from XML relation representation.
:param text: XML way representation
"""
self.id_ = int(get_value("id", text))
self.user = get_value("user", text)
self.timestamp = datetime.strptime(
get_value("timestamp", text), OSM_TIME_PATTERN
)
return self
@classmethod @classmethod
def from_xml_structure(cls, element, is_full: bool) -> "OSMRelation": def from_xml_structure(cls, element, is_full: bool) -> "OSMRelation":
attributes = element.attrib attributes = element.attrib
@ -297,31 +239,6 @@ class OSMMember:
ref: int = 0 ref: int = 0
role: str = "" role: str = ""
def parse_from_xml(self, text: str) -> "OSMMember":
"""
Parse relation member from XML way representation.
:param text: XML relation member representation
"""
self.type_: str = get_value("type", text)
self.ref: int = int(get_value("ref", text))
self.role: str = get_value("role", text)
return self
def get_value(key: str, text: str):
"""
Parse xml value from the tag in the format of key="value".
"""
if key + '="' in text:
start_index: int = text.find(key + '="') + 2
end_index: int = start_index + len(key)
value = text[end_index : text.find('"', end_index)]
return value
return None
class Map: class Map:
""" """
@ -402,7 +319,11 @@ class OverpassReader:
return self.map_ return self.map_
class OSMReaderET: class OSMReader:
"""
OpenStreetMap XML file parser.
"""
def __init__(self): def __init__(self):
self.map_ = Map() self.map_ = Map()
@ -414,8 +335,18 @@ class OSMReaderET:
parse_relations: bool = True, parse_relations: bool = True,
is_full: bool = False, is_full: bool = False,
) -> Map: ) -> Map:
tree = ET.parse(file_name) """
root = tree.getroot() Parse OSM XML file.
:param file_name: input XML file
:param parse_nodes: whether nodes should be parsed
:param parse_ways: whether ways should be parsed
:param parse_relations: whether relations should be parsed
:param is_full: whether metadata should be parsed
:return: parsed map
"""
root = ET.parse(file_name).getroot()
for element in root: for element in root:
if element.tag == "node" and parse_nodes: if element.tag == "node" and parse_nodes:
node = OSMNode.from_xml_structure(element, is_full) node = OSMNode.from_xml_structure(element, is_full)
@ -431,101 +362,3 @@ class OSMReaderET:
OSMRelation.from_xml_structure(element, is_full) OSMRelation.from_xml_structure(element, is_full)
) )
return self.map_ return self.map_
class OSMReader:
"""
OSM XML representation reader.
"""
def __init__(self):
self.map_ = Map()
def parse_osm_file(
self,
file_name: Path,
parse_nodes: bool = True,
parse_ways: bool = True,
parse_relations: bool = True,
full: bool = False,
) -> Map:
"""
Parse OSM XML representation.
:param file_name: input OSM XML file name
"""
with file_name.open() as input_file:
lines_number: int = sum(1 for _ in input_file)
print(f"Parsing OSM file {file_name}...")
line_number: int = 0
element: Optional[Union[OSMNode, OSMWay, OSMRelation]] = None
with file_name.open() as input_file:
for line in input_file.readlines(): # type: str
line = line.strip()
line_number += 1
progress_bar(line_number, lines_number, text="Parsing")
# Node parsing.
if line.startswith("<node"):
if not parse_nodes:
if parse_ways or parse_relations:
continue
break
if line[-2] == "/":
node: OSMNode = OSMNode().parse_from_xml(line, full)
self.map_.add_node(node)
else:
element = OSMNode().parse_from_xml(line, full)
elif line == "</node>":
self.map_.add_node(element)
# Way parsing.
elif line.startswith("<way"):
if not parse_ways:
if parse_relations:
continue
break
if line[-2] == "/":
way = OSMWay().parse_from_xml(line, full)
self.map_.add_way(way)
else:
element = OSMWay().parse_from_xml(line, full)
elif line == "</way>":
self.map_.add_way(element)
# Relation parsing.
elif line.startswith("<relation"):
if not parse_relations:
break
if line[-2] == "/":
relation = OSMRelation().parse_from_xml(line)
self.map_.add_relation(relation)
else:
element = OSMRelation().parse_from_xml(line)
elif line == "</relation>":
self.map_.add_relation(element)
# Elements parsing.
elif line.startswith("<tag"):
key: str = get_value("k", line)
value = get_value("v", line)
element.tags[key] = value
elif line.startswith("<nd"):
element.nodes.append(
self.map_.node_map[int(get_value("ref", line))]
)
elif line.startswith("<member"):
element.members.append(OSMMember().parse_from_xml(line))
progress_bar(-1, lines_number, text="Parsing")
return self.map_