Skip to content

Using Tournesol dataset with Solidago

An instance of TournesolDataset can be used as a pipeline input in Solidago.

from solidago.pipeline.inputs import TournesolDataset

# Download the latest dataset from Tournesol API.
pipeline_input = TournesolDataset.download()

# Read a local file
pipeline_input = TournesolDataset("path/to/dataset.zip")

TournesolDataset

TournesolDataset(dataset_zip: Union[str, BinaryIO])

Bases: PipelineInput

Initilialize a Pipeline input, by loading a tournesol dataset (.zip format) from a local file, or a URL.

users

users = read_csv(users_file, keep_default_na=False)

vouchers

vouchers = read_csv(vouchers_file, keep_default_na=False)

username_to_user_id

username_to_user_id = Series(data=index, index=users['public_username'])

entity_id_to_video_id

entity_id_to_video_id = Series(sorted(set(video_a) | set(video_b) | set(video)), name='video_id')

video_id_to_entity_id

video_id_to_entity_id = {video_id: _OSZc8Vr1for (entity_id, video_id) in items()}

collective_scores

collective_scores = drop(columns=['video'])

comparisons

comparisons = drop(columns=['video_a', 'video_b'])

individual_scores

individual_scores = drop(columns=['public_username', 'video'])

ratings_properties

ratings_properties

download

download() -> TournesolDataset

get_comparisons

get_comparisons(criterion=None, user_id=None) -> DataFrame

get_individual_scores

get_individual_scores(user_id: Optional[int] = None, criterion: Optional[str] = None, with_n_comparisons=False) -> DataFrame

get_collective_scores

get_collective_scores(entity_id: Optional[str] = None, criterion: Optional[str] = None) -> DataFrame

get_vouches

get_vouches()

get_users

get_users()