在MacOS 10.12.1上使用R Studio 1.0.44
这是我第一次使用xml文件而且我遇到了很多麻烦..
我想将我的xml文件转换为r数据框,如下所示:
Date househouldid memberid session begin end
11/1/15 4FDYT14A1Q 1 235 49448 49453
11/1/15 4FDYT14A1Q 1 1010001 49453 49463
11/1/15 4FDYT14A1Q 1 1010475 49463 49468
我只想在数据框中使用Date,householdid,memberid,session,begin和end.
我试过了…
xmldataframe<-xmlToDataFrame("DQA_audxml_20150105_EN.xml")
print(xmldataframe)
得到了……
destinations members regular_guests occasional_guests tvsets household_process
这是xml文件的一部分(我受字符数限制)..
<?xml version="1.0" encoding="UTF-8"?>
<households date="2015-01-05" creation_date="2015-01-11T04:22:35" xmlns:s="http://www.mediametrie.fr/nge/schemas" xmlns:xalan="http://xml.apache.org/xalan">
<household id="95D7N14AFA">
<destinations />
<members>
<member id="1">
<member_process result="OK" vacation="undefined">
<individual_audience />
<alarms />
</member_process>
</member>
<member id="2">
<member_process result="OK" vacation="undefined">
<individual_audience />
<alarms />
</member_process>
</member>
<member id="3">
<member_process result="OK" vacation="undefined">
<individual_audience />
<alarms />
</member_process>
</member>
</members>
<regular_guests />
<occasional_guests />
<tvsets>
<tvset id="1">
<tvset_process result="OK">
<tvset_audience />
<alarms>
<alarm id="AL_T_P_VALID_LAST_HOUR_REBOOT" rule_id="T_P_METER_STOPPING_TIMESTAMPING">
<parameters>
<parameter name="unique_id" value="149" />
<parameter name="reboot_date" value="2015-01-05T03:34:23" />
<parameter name="length" value="0.35116898148148146" />
</parameters>
</alarm>
<alarm id="AL_T_P_VALID_LAST_HOUR_REBOOT" rule_id="T_P_METER_STOPPING_TIMESTAMPING">
<parameters>
<parameter name="unique_id" value="150" />
<parameter name="reboot_date" value="2015-01-05T05:28:34" />
<parameter name="length" value="0.02806712962962963" />
</parameters>
</alarm>
<alarm id="AL_T_P_VALID_LAST_HOUR_REBOOT" rule_id="T_P_METER_STOPPING_TIMESTAMPING">
<parameters>
<parameter name="unique_id" value="151" />
<parameter name="reboot_date" value="2015-01-05T08:22:46" />
<parameter name="length" value="0.07322916666666666" />
</parameters>
</alarm>
<alarm id="AL_T_P_VALID_LAST_HOUR_REBOOT" rule_id="T_P_METER_STOPPING_TIMESTAMPING">
<parameters>
<parameter name="unique_id" value="152" />
<parameter name="reboot_date" value="2015-01-05T14:32:43" />
<parameter name="length" value="0.004803240740740742" />
</parameters>
</alarm>
<alarm id="AL_T_P_VALID_LAST_HOUR_REBOOT" rule_id="T_P_METER_STOPPING_TIMESTAMPING">
<parameters>
<parameter name="unique_id" value="153" />
<parameter name="reboot_date" value="2015-01-05T18:30:47" />
<parameter name="length" value="0.0012384259259259258" />
</parameters>
</alarm>
<alarm id="AL_T_P_VALID_LAST_HOUR_REBOOT" rule_id="T_P_METER_STOPPING_TIMESTAMPING">
<parameters>
<parameter name="unique_id" value="154" />
<parameter name="reboot_date" value="2015-01-06T02:52:01" />
<parameter name="length" value="0.33817129629629633" />
</parameters>
</alarm>
</alarms>
</tvset_process>
</tvset>
</tvsets>
<household_process result="OK" vacation="no">
<alarms />
</household_process>
</household>
<household id="ISDPN14ASG">
<destinations />
<members>
<member id="1">
<member_process result="OK" vacation="undefined">
<individual_audience />
<alarms />
</member_process>
</member>
<member id="2">
<member_process result="OK" vacation="undefined">
<individual_audience />
<alarms />
</member_process>
</member>
<member id="3">
<member_process result="OK" vacation="undefined">
<individual_audience />
<alarms />
</member_process>
</member>
<member id="4">
<member_process result="OK" vacation="undefined">
<individual_audience />
<alarms />
</member_process>
</member>
<member id="5">
<member_process result="OK" vacation="undefined">
<individual_audience />
<alarms />
</member_process>
</member>
</members>
<regular_guests />
<occasional_guests />
<tvsets>
<tvset id="1">
<tvset_process result="KO">
<tvset_audience />
<alarms>
<alarm id="AL_R_P_EVENT_CHRONOLOGY" rule_id="R_P_EVENT_CHRONOLOGY">
<parameters>
<parameter name="unique_id" value="16195" />
</parameters>
</alarm>
<alarm id="AL_PROCESS_STOP" rule_id="R_P_EVENT_CHRONOLOGY">
<parameters />
</alarm>
</alarms>
</tvset_process>
</tvset>
</tvsets>
<household_process result="KO" vacation="no">
<alarms>
<alarm id="AL_T_FP_HOUSEHOLD_ABS_DECLARATION" rule_id="T_FP_HOUSEHOLD_ABS_DECLARATION">
<parameters />
</alarm>
<alarm id="AL_R_FP_NO_TVSET_KO" rule_id="R_FP_NO_TVSET_KO">
<parameters>
<parameter name="tvset_id" value="1" />
</parameters>
</alarm>
<alarm id="AL_R_FP_AT_LEAST_ONE_TVSET_OK" rule_id="R_FP_AT_LEAST_ONE_TVSET_OK">
<parameters />
</alarm>
</alarms>
</household_process>
</household>
<household id="4FDYT14A1Q">
<destinations />
<members>
<member id="1">
<member_process result="OK" vacation="undefined">
<individual_audience />
<alarms />
</member_process>
</member>
<member id="2">
<member_process result="OK" vacation="undefined">
<individual_audience>
<individual_audience_tvset id="1">
<channel session="1010497" begin="46722" end="46738" />
<channel session="48" begin="46738" end="46743" />
<channel session="235" begin="46743" end="46748" />
<channel session="47" begin="46748" end="46753" />
<channel session="1010383" begin="46753" end="46973" />
<channel session="1010497" begin="46973" end="46983" />
<channel session="1010319" begin="46983" end="46993" />
<channel session="1010072" begin="46993" end="47578" />
<channel session="1010499" begin="47578" end="47618" />
<channel session="47" begin="47618" end="47633" />
<channel session="1010383" begin="47633" end="47693" />
<channel session="48" begin="47693" end="47703" />
<channel session="235" begin="47703" end="47728" />
<channel session="1010469" begin="47728" end="47778" />
<channel session="235" begin="47778" end="47783" />
<channel session="48" begin="47783" end="47828" />
<channel session="1010367" begin="47828" end="47853" />
<channel session="48" begin="47853" end="48373" />
<channel session="1010383" begin="48373" end="48573" />
<channel session="48" begin="48573" end="49433" />
<channel session="1010452" begin="49433" end="49448" />
<channel session="235" begin="49448" end="49453" />
<channel session="1010001" begin="49453" end="49463" />
<channel session="1010475" begin="49463" end="49468" />
<channel session="1010383" begin="49468" end="49483" />
<channel session="48" begin="49483" end="49508" />
<channel session="1010403" begin="49508" end="49543" />
<channel session="48" begin="49543" end="53839" />
<channel session="1010354" begin="60868" end="60878" />
<channel session="235" begin="60878" end="60893" />
<channel session="48" begin="60893" end="60908" />
<channel session="235" begin="60908" end="60913" />
<channel session="1010072" begin="60913" end="60928" />
<channel session="48" begin="60928" end="60933" />
<channel session="235" begin="60933" end="60938" />
<channel session="48" begin="60938" end="60943" />
<channel session="235" begin="60943" end="60953" />
<channel session="48" begin="60953" end="60968" />
<channel session="235" begin="60968" end="60993" />
<channel session="1010072" begin="60993" end="61483" />
<channel session="1010499" begin="61483" end="61538" />
<channel session="48" begin="61538" end="61543" />
<channel session="47" begin="61543" end="61578" />
<channel session="1010383" begin="61578" end="61603" />
<channel session="1010072" begin="61603" end="63428" />
<channel session="1010497" begin="63428" end="63438" />
<channel session="1010319" begin="63438" end="63888" />
<channel session="1010072" begin="63888" end="63898" />
<channel session="235" begin="63898" end="63903" />
<channel session="47" begin="63903" end="64368" />
<channel session="48" begin="64368" end="64373" />
<channel session="47" begin="64373" end="64378" />
<channel session="1010072" begin="64378" end="64393" />
<channel session="1010383" begin="64393" end="64408" />
<channel session="1010001" begin="64408" end="64423" />
<channel session="235" begin="64423" end="64428" />
<channel session="1010469" begin="64428" end="64468" />
<channel session="1010452" begin="64468" end="64473" />
<channel session="48" begin="64473" end="64528" />
<channel session="1010367" begin="64528" end="65053" />
<channel session="48" begin="65053" end="65328" />
<channel session="1010367" begin="65328" end="66613" />
<channel session="1010383" begin="66613" end="66628" />
<channel session="1010497" begin="66628" end="66823" />
<channel session="47" begin="66823" end="66833" />
<channel session="1010367" begin="66833" end="67538" />
<channel session="1010383" begin="67538" end="67553" />
<channel session="1010354" begin="67553" end="67563" />
<channel session="1010497" begin="67563" end="67658" />
<channel session="1010499" begin="67658" end="67713" />
<channel session="1010072" begin="67713" end="67733" />
<channel session="1010367" begin="67733" end="67803" />
<channel session="48" begin="67803" end="68018" />
<channel session="1010367" begin="68018" end="70963" />
<channel session="1010383" begin="70963" end="71048" />
<channel session="1010367" begin="71048" end="71848" />
<channel session="1010383" begin="71848" end="72023" />
<channel session="1010497" begin="72023" end="72043" />
<channel session="1010072" begin="72043" end="72068" />
<channel session="1010499" begin="72068" end="72083" />
<channel session="47" begin="72083" end="72098" />
<channel session="1010499" begin="72098" end="73643" />
<channel session="1010072" begin="73643" end="73668" />
<channel session="1010354" begin="73668" end="73683" />
<channel session="1010383" begin="73683" end="73693" />
<channel session="1010499" begin="73693" end="73708" />
<channel session="1010072" begin="73708" end="73748" />
<channel session="235" begin="73748" end="73753" />
<channel session="1010497" begin="73753" end="73763" />
<channel session="1010499" begin="73763" end="74323" />
<channel session="1010497" begin="74323" end="74363" />
<channel session="1010354" begin="74363" end="74378" />
<channel session="1010499" begin="74378" end="74443" />
<channel session="1010072" begin="74443" end="74453" />
<channel session="1010499" begin="74453" end="74883" />
<channel session="1010497" begin="74883" end="74938" />
<channel session="1010072" begin="74938" end="74953" />
<channel session="47" begin="74953" end="75603" />
<channel session="1010497" begin="75603" end="77673" />
<channel session="1010383" begin="77673" end="79208" />
<channel session="1010354" begin="79208" end="79228" />
<channel session="235" begin="79228" end="79233" />
<channel session="1010497" begin="79233" end="79603" />
<channel session="1010072" begin="79603" end="80088" />
<channel session="1010497" begin="80088" end="80238" />
<channel session="1010499" begin="80238" end="80253" />
<channel session="235" begin="80253" end="80258" />
<channel session="47" begin="80258" end="80423" />
<channel session="1010072" begin="80423" end="80678" />
<channel session="1010497" begin="80678" end="80688" />
<channel session="47" begin="80688" end="80853" />
<channel session="235" begin="80853" end="80858" />
<channel session="1010354" begin="80858" end="80868" />
<channel session="1010497" begin="80868" end="81428" />
<channel session="1010072" begin="81428" end="81438" />
<channel session="235" begin="81438" end="81448" />
<channel session="47" begin="81448" end="81453" />
<channel session="1010497" begin="81453" end="82108" />
<channel session="1010354" begin="82108" end="82528" />
<channel session="1010497" begin="82528" end="82538" />
<channel session="1010072" begin="82538" end="82548" />
<channel session="1010499" begin="82548" end="82608" />
<channel session="47" begin="82608" end="82628" />
<channel session="1010072" begin="82628" end="82648" />
<channel session="1010354" begin="82648" end="82973" />
<channel session="1010072" begin="82973" end="82983" />
<channel session="1010499" begin="82983" end="83273" />
<channel session="47" begin="83273" end="83313" />
<channel session="1010383" begin="83313" end="83393" />
<channel session="1010319" begin="83393" end="83443" />
<channel session="1010499" begin="83443" end="83498" />
<channel session="48" begin="83498" end="83688" />
<channel session="47" begin="83688" end="83883" />
<channel session="48" begin="83883" end="83898" />
<channel session="1010258" begin="83898" end="84193" />
<channel session="47" begin="84193" end="84198" />
<channel session="1010260" begin="84198" end="84253" />
<channel session="235" begin="84253" end="84258" />
<channel session="47" begin="84258" end="84273" />
<channel session="48" begin="84273" end="84423" />
<channel session="235" begin="84423" end="84433" />
<channel session="48" begin="84433" end="84448" />
<channel session="1010260" begin="84448" end="84463" />
<channel session="1010144" begin="84463" end="84578" />
<channel session="235" begin="84578" end="84583" />
<channel session="1010260" begin="84583" end="84628" />
<channel session="235" begin="84628" end="84633" />
<channel session="1010258" begin="84633" end="84643" />
<channel session="48" begin="84643" end="85088" />
<channel session="235" begin="85088" end="85098" />
<channel session="1010258" begin="85098" end="85113" />
<channel session="235" begin="85113" end="85118" />
<channel session="1010144" begin="85118" end="85163" />
<channel session="1010260" begin="85163" end="85178" />
<channel session="48" begin="85178" end="85398" />
<channel session="1010493" begin="85398" end="85418" />
<channel session="48" begin="85418" end="85428" />
<channel session="235" begin="85428" end="85433" />
<channel session="47" begin="85433" end="85488" />
<channel session="1010260" begin="85488" end="85493" />
<channel session="1010258" begin="85493" end="85638" />
<channel session="1010474" begin="85638" end="85785" />
</individual_audience_tvset>
</individual_audience>
<alarms />
</member_process>
</member>
<member id="3">
<member_process result="OK" vacation="undefined">
<individual_audience>
<individual_audience_tvset id="1">
<channel session="1010493" begin="54476" end="54490" />
<channel session="47" begin="54490" end="55035" />
<channel session="1010379" begin="55035" end="55210" />
<channel session="48" begin="55210" end="55220" />
<channel session="1010397" begin="55220" end="55570" />
<channel session="235" begin="55570" end="55580" />
<channel session="1010425" begin="55580" end="55585" />
<channel session="1010397" begin="55585" end="55620" />
<channel session="235" begin="55620" end="55625" />
<channel session="48" begin="55625" end="56875" />
<channel session="1010379" begin="56875" end="57345" />
<channel session="48" begin="57345" end="57615" />
<channel session="1010379" begin="57615" end="57865" />
<channel session="48" begin="57865" end="58079" />
</individual_audience_tvset>
</individual_audience>
<alarms />
</member_process>
</member>
<member id="4">
<member_process result="OK" vacation="undefined">
<individual_audience />
<alarms />
</member_process>
</member>
</members>
<regular_guests />
<occasional_guests />
<tvsets>
<tvset id="1">
<tvset_process result="OK">
<tvset_audience>
<channel session="1010497" begin="46708" end="46738" />
<channel session="48" begin="46738" end="46743" />
<channel session="235" begin="46743" end="46748" />
<channel session="47" begin="46748" end="46753" />
<channel session="1010383" begin="46753" end="46973" />
<channel session="1010497" begin="46973" end="46983" />
<channel session="1010319" begin="46983" end="46993" />
<channel session="1010072" begin="46993" end="47578" />
<channel session="1010499" begin="47578" end="47618" />
<channel session="47" begin="47618" end="47633" />
<channel session="1010383" begin="47633" end="47693" />
<channel session="48" begin="47693" end="47703" />
<channel session="235" begin="47703" end="47728" />
<channel session="1010469" begin="47728" end="47778" />
<channel session="235" begin="47778" end="47783" />
<channel session="48" begin="47783" end="47828" />
<channel session="1010367" begin="47828" end="47853" />
<channel session="48" begin="47853" end="48373" />
<channel session="1010383" begin="48373" end="48573" />
<channel session="48" begin="48573" end="49433" />
<channel session="1010452" begin="49433" end="49448" />
<channel session="235" begin="49448" end="49453" />
<channel session="1010001" begin="49453" end="49463" />
<channel session="1010475" begin="49463" end="49468" />
<channel session="1010383" begin="49468" end="49483" />
<channel session="48" begin="49483" end="49508" />
<channel session="1010403" begin="49508" end="49543" />
<channel session="48" begin="49543" end="53839" />
<channel session="1010493" begin="54460" end="54490" />
<channel session="47" begin="54490" end="55035" />
<channel session="1010379" begin="55035" end="55210" />
<channel session="48" begin="55210" end="55220" />
<channel session="1010397" begin="55220" end="55570" />
<channel session="235" begin="55570" end="55580" />
<channel session="1010425" begin="55580" end="55585" />
<channel session="1010397" begin="55585" end="55620" />
<channel session="235" begin="55620" end="55625" />
<channel session="48" begin="55625" end="56875" />
<channel session="1010379" begin="56875" end="57345" />
<channel session="48" begin="57345" end="57615" />
<channel session="1010379" begin="57615" end="57865" />
<channel session="48" begin="57865" end="58079" />
<channel session="1010354" begin="60848" end="60878" />
<channel session="235" begin="60878" end="60893" />
<channel session="48" begin="60893" end="60908" />
<channel session="235" begin="60908" end="60913" />
<channel session="1010072" begin="60913" end="60928" />
<channel session="48" begin="60928" end="60933" />
<channel session="235" begin="60933" end="60938" />
<channel session="48" begin="60938" end="60943" />
<channel session="235" begin="60943" end="60953" />
<channel session="48" begin="60953" end="60968" />
<channel session="235" begin="60968" end="60993" />
<channel session="1010072" begin="60993" end="61483" />
<channel session="1010499" begin="61483" end="61538" />
<channel session="48" begin="61538" end="61543" />
<channel session="47" begin="61543" end="61578" />
<channel session="1010383" begin="61578" end="61603" />
<channel session="1010072" begin="61603" end="63428" />
<channel session="1010497" begin="63428" end="63438" />
<channel session="1010319" begin="63438" end="63888" />
<channel session="1010072" begin="63888" end="63898" />
<channel session="235" begin="63898" end="63903" />
<channel session="47" begin="63903" end="64368" />
<channel session="48" begin="64368" end="64373" />
<channel session="47" begin="64373" end="64378" />
<channel session="1010072" begin="64378" end="64393" />
<channel session="1010383" begin="64393" end="64408" />
<channel session="1010001" begin="64408" end="64423" />
<channel session="235" begin="64423" end="64428" />
<channel session="1010469" begin="64428" end="64468" />
<channel session="1010452" begin="64468" end="64473" />
<channel session="48" begin="64473" end="64528" />
<channel session="1010367" begin="64528" end="65053" />
<channel session="48" begin="65053" end="65328" />
<channel session="1010367" begin="65328" end="66613" />
<channel session="1010383" begin="66613" end="66628" />
<channel session="1010497" begin="66628" end="66823" />
<channel session="47" begin="66823" end="66833" />
<channel session="1010367" begin="66833" end="67538" />
<channel session="1010383" begin="67538" end="67553" />
<channel session="1010354" begin="67553" end="67563" />
<channel session="1010497" begin="67563" end="67658" />
<channel session="1010499" begin="67658" end="67713" />
<channel session="1010072" begin="67713" end="67733" />
<channel session="1010367" begin="67733" end="67803" />
<channel session="48" begin="67803" end="68018" />
<channel session="1010367" begin="68018" end="70963" />
<channel session="1010383" begin="70963" end="71048" />
<channel session="1010367" begin="71048" end="71848" />
<channel session="1010383" begin="71848" end="72023" />
<channel session="1010497" begin="72023" end="72043" />
<channel session="1010072" begin="72043" end="72068" />
<channel session="1010499" begin="72068" end="72083" />
<channel session="47" begin="72083" end="72098" />
<channel session="1010499" begin="72098" end="73643" />
<channel session="1010072" begin="73643" end="73668" />
<channel session="1010354" begin="73668" end="73683" />
<channel session="1010383" begin="73683" end="73693" />
<channel session="1010499" begin="73693" end="73708" />
<channel session="1010072" begin="73708" end="73748" />
<channel session="235" begin="73748" end="73753" />
<channel session="1010497" begin="73753" end="73763" />
<channel session="1010499" begin="73763" end="74323" />
<channel session="1010497" begin="74323" end="74363" />
<channel session="1010354" begin="74363" end="74378" />
<channel session="1010499" begin="74378" end="74443" />
<channel session="1010072" begin="74443" end="74453" />
<channel session="1010499" begin="74453" end="74883" />
<channel session="1010497" begin="74883" end="74938" />
<channel session="1010072" begin="74938" end="74953" />
<channel session="47" begin="74953" end="75603" />
<channel session="1010497" begin="75603" end="77673" />
<channel session="1010383" begin="77673" end="79208" />
<channel session="1010354" begin="79208" end="79228" />
<channel session="235" begin="79228" end="79233" />
<channel session="1010497" begin="79233" end="79603" />
<channel session="1010072" begin="79603" end="80088" />
<channel session="1010497" begin="80088" end="80238" />
<channel session="1010499" begin="80238" end="80253" />
<channel session="235" begin="80253" end="80258" />
最佳答案 这只是肘部油脂,熟悉XML的结构.相对较新的purrr包在这里非常有用.但是,我认为你的问题仍然有点模糊,缺乏有效的XML文件不仅仅是个问题:
library(xml2)
library(purrr)
library(dplyr)
hh <- read_xml("~/data/so.xml")
xml_find_all(hh, ".//household") %>%
map_df(function(x) {
id <- xml_attr(x, "id")
members <- xml_find_all(x, ".//member")
map_df(members, function(y) {
channel_sessions <- xml_find_all(y, ".//channel")
if (length(channel_sessions) == 0) {
NULL
} else {
xml_attrs(channel_sessions) %>%
map_df(as.list) %>%
mutate(memberid=xml_attr(y, "id"),
household=id)
}
})
}) %>%
mutate(date=xml_find_first(hh, "//households") %>%
xml_attr("creation_date") %>%
as.Date())
## # A tibble: 176 × 6
## session begin end memberid household date
## <chr> <chr> <chr> <chr> <chr> <date>
## 1 1010497 46722 46738 2 4FDYT14A1Q 2015-01-11
## 2 48 46738 46743 2 4FDYT14A1Q 2015-01-11
## 3 235 46743 46748 2 4FDYT14A1Q 2015-01-11
## 4 47 46748 46753 2 4FDYT14A1Q 2015-01-11
## 5 1010383 46753 46973 2 4FDYT14A1Q 2015-01-11
## 6 1010497 46973 46983 2 4FDYT14A1Q 2015-01-11
## 7 1010319 46983 46993 2 4FDYT14A1Q 2015-01-11
## 8 1010072 46993 47578 2 4FDYT14A1Q 2015-01-11
## 9 1010499 47578 47618 2 4FDYT14A1Q 2015-01-11
## 10 47 47618 47633 2 4FDYT14A1Q 2015-01-11
## # ... with 166 more rows