Skip to content

Using Tournesol dataset with Solidago

An instance of TournesolDataset can be used as a pipeline input in Solidago.

from solidago.pipeline.inputs import TournesolDataset

# Download the latest dataset from Tournesol API.
pipeline_input = TournesolDataset.download()

# Read a local file
pipeline_input = TournesolDataset("path/to/dataset.zip")

TournesolDataset

TournesolDataset(dataset_zip: Union[str, BinaryIO])

Bases: PipelineInput

Initilialize a Pipeline input, by loading a tournesol dataset (.zip format) from a local file, or a URL.

users

users = read_csv(users_file, keep_default_na=False)

vouchers

vouchers = read_csv(vouchers_file, keep_default_na=False)

username_to_user_id

username_to_user_id = Series(
    data=index, index=users["public_username"]
)

entity_id_to_video_id

entity_id_to_video_id = Series(
    sorted(set(video_a) | set(video_b) | set(video)),
    name="video_id",
)

video_id_to_entity_id

video_id_to_entity_id = {
    video_id: _dZOIWRaYfor(entity_id, video_id) in items()
}

collective_scores

collective_scores = drop(columns=['video'])

comparisons

comparisons = drop(columns=['video_a', 'video_b'])

individual_scores

individual_scores = drop(
    columns=["public_username", "video"]
)

ratings_properties

ratings_properties

download

download() -> TournesolDataset

get_comparisons

get_comparisons(criterion=None, user_id=None) -> DataFrame

get_individual_scores

get_individual_scores(
    user_id: Optional[int] = None,
    criterion: Optional[str] = None,
    with_n_comparisons=False,
) -> DataFrame

get_collective_scores

get_collective_scores(
    entity_id: Optional[str] = None,
    criterion: Optional[str] = None,
) -> DataFrame

get_vouches

get_vouches()

get_users

get_users()