Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions lib/ingestors/event_ingestion.rb
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,37 @@ def convert_location(input)
input
end

def parse_audience(description)
audience_mapping = {
'post-docs': 'researchers',
"PhD's candidate": 'researchers',
'PhD student': 'researchers',
'principal investigator': 'researchers',
'professor': 'researchers',
'scientist': 'researchers',
'library staff': 'research support staff',
'research librarian': 'research support staff',
'information specialist': 'research support staff',
'archivist': 'research support staff',
'repository manager': 'research support staff',
'data steward': 'research support staff',
'data manager': 'research support staff',
'data professional': 'research support staff',
'data engineer': 'research support staff',
'software engineer': 'research support staff',
'data librarian': 'research support staff',
'bachelor': 'students',
'master': 'students',
'teacher': 'trainers',
'coaches': 'trainers',
'educator': 'trainers',
}
audience_mapping
.select{ |key, val| description.downcase.include?(key.to_s.downcase) }
.values
.uniq
end

def parse_dates(input, timezone = nil)
Time.use_zone(timezone) do
# try to split on obvious interval markers
Expand Down
1 change: 1 addition & 0 deletions lib/ingestors/taxila/dans_ingestor.rb
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ def process_dans(url)

event.source = 'DANS'
event.timezone = 'Amsterdam'
event.target_audience = parse_audience(event.description)

add_event(event)
rescue Exception => e
Expand Down
1 change: 1 addition & 0 deletions lib/ingestors/taxila/dtls_ingestor.rb
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ def process_dtls(url)
event.set_default_times
event.source = 'DTL'
event.timezone = 'Amsterdam'
event.target_audience = parse_audience(event.description)
add_event(event)
rescue Exception => e
@messages << "Extract event fields failed with: #{e.message}"
Expand Down
1 change: 1 addition & 0 deletions lib/ingestors/taxila/han_ingestor.rb
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ def process_han(_url)
event.venue = "#{venue_super_css.text} #{venue_sub_css.text}"
event.source = "HAN"
event.timezone = 'Amsterdam'
event.target_audience = parse_audience(event.description)

add_event(event)
rescue Exception => e
Expand Down
1 change: 1 addition & 0 deletions lib/ingestors/taxila/lcrdm_ingestor.rb
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ def process_lcrdm(url)
event.source = 'LCRDM'
event.timezone = 'Amsterdam'
event.set_default_times
event.target_audience = parse_audience(event.description)

add_event(event)
rescue Exception => e
Expand Down
1 change: 1 addition & 0 deletions lib/ingestors/taxila/leiden_ingestor.rb
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ def process_leiden(url)
# does TeSS support that?

event.source = 'Universiteit Leiden'
event.target_audience = parse_audience(event.description)

add_event(event)
rescue Exception => e
Expand Down
1 change: 1 addition & 0 deletions lib/ingestors/taxila/maastricht_ingestor.rb
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ def process_maastricht(url)
event.timezone = 'Europe/Amsterdam' # how to get this from Icalendar Event object?

event.source = 'Maastricht University'
event.target_audience = parse_audience(event.description)

add_event(event)
rescue Exception => e
Expand Down
1 change: 1 addition & 0 deletions lib/ingestors/taxila/nwo_ingestor.rb
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ def process_nwo(url)
event.url = "https://www.nwo.nl#{event_data.css('h3.card__title > a').attribute('href').value}"

event.source = 'NWO'
event.target_audience = parse_audience(event.description)

add_event(event)
rescue Exception => e
Expand Down
1 change: 1 addition & 0 deletions lib/ingestors/taxila/odissei_ingestor.rb
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ def process_odissei(_url)
event.source = 'ODISSEI'
event.timezone = 'Amsterdam'
event.set_default_times
event.target_audience = parse_audience(event.description)
add_event(event)
rescue Exception => e
@messages << "Extract event fields failed with: #{e.message}"
Expand Down
1 change: 1 addition & 0 deletions lib/ingestors/taxila/oscd_ingestor.rb
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ def process_oscd(url)
end
if div&.next_sibling&.next_sibling.nil? || (div&.next_sibling&.next_sibling&.name == 'h1')
event.set_default_times
event.target_audience = parse_audience(event.description)
add_event(event)
end
end
Expand Down
1 change: 1 addition & 0 deletions lib/ingestors/taxila/oscm_ingestor.rb
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ def process_oscm(url)
# it's not really needed since dtstart and dtend contain timezone information
event.source = 'OSCM'
event.online = true
event.target_audience = parse_audience(event.description)

# add event to events array
add_event(event)
Expand Down
1 change: 1 addition & 0 deletions lib/ingestors/taxila/rdnl_ingestor.rb
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ def process_rdnl(url)
event.source = 'RDNL'
event.timezone = 'Amsterdam'
event.set_default_times
event.target_audience = parse_audience(event.description)

add_event(event)
rescue Exception => e
Expand Down
1 change: 1 addition & 0 deletions lib/ingestors/taxila/rug_ingestor.rb
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ def process_rug(url)
event.source = 'RUG'
event.timezone = 'Amsterdam'
event.set_default_times
event.target_audience = parse_audience(event.description)

add_event(event)
rescue Exception => e
Expand Down
1 change: 1 addition & 0 deletions lib/ingestors/taxila/surf_ingestor.rb
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ def process_surf(url)
event.source = 'SURF'
event.online = true
event.timezone = 'Amsterdam'
event.target_audience = parse_audience(event.description)

# add event to events array
add_event(event)
Expand Down
1 change: 1 addition & 0 deletions lib/ingestors/taxila/tdcc_ingestor.rb
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ def process_tdcc(url)
event.start += 1.year
event.end += 1.year
end
event.target_audience = parse_audience(event.description)

add_event(event)
rescue Exception => e
Expand Down
1 change: 1 addition & 0 deletions lib/ingestors/taxila/utwente_ingestor.rb
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ def process_utwente(url)
event.timezone = 'Amsterdam'
event.organizer = 'University of Twente'
event.source = 'University of Twente'
event.target_audience = parse_audience(event.description)
add_event(event)
rescue Exception => e
@messages << "Extract event fields failed with: #{e.message}"
Expand Down
1 change: 1 addition & 0 deletions lib/ingestors/taxila/uu_ingestor.rb
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ def process_uu(url)
event.timezone = 'Amsterdam'
# UU wants opt-in instead of opt-out for this scraper
event.visible = false
event.target_audience = parse_audience(event.description)

# the below code allows fetching the long description, at the cost of a
# page load per event.
Expand Down
1 change: 1 addition & 0 deletions lib/ingestors/taxila/uva_ingestor.rb
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ def process_uva(url)
event.keywords = attr.fetch('taxonomy', []).map(&:values).flatten

event.event_types = attr.fetch('eventType', []).map { |t| convert_event_types(t) }
event.target_audience = parse_audience(event.description)

# add event to events array
add_event(event)
Expand Down
1 change: 1 addition & 0 deletions lib/ingestors/taxila/wur_ingestor.rb
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ def process_wur(url)
event.set_default_times
event.source = 'WUR'
event.timezone = 'Amsterdam'
event.target_audience = parse_audience(event.description)

add_event(event)
rescue Exception => e
Expand Down